[cig-commits] r19129 - in seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src: cuda decompose_mesh_SCOTCH generate_databases shared specfem3D

danielpeter at geodynamics.org danielpeter at geodynamics.org
Sat Oct 29 19:25:30 PDT 2011


Author: danielpeter
Date: 2011-10-29 19:25:28 -0700 (Sat, 29 Oct 2011)
New Revision: 19129

Modified:
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/decompose_mesh_SCOTCH.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/part_decompose_mesh_SCOTCH.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/program_decompose_mesh_SCOTCH.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/calc_jacobian.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_global.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/memory_eval.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_external_values.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_tomography.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_arrays_source.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/convolve_source_timefunction.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_name_database.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_serial_name_database.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/define_derivation_matrices.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/exit_mpi.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_attenuation_model.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_cmt.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_jacobian_boundaries.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape2D.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape3D.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/gll_library.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/hex_nodes.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/lagrange_poly.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_topo_bathy_file.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_value_parameters.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/recompute_jacobian.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_header_file.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/smooth_vol_data.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/utm_geo.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/comp_source_time_function.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_receivers.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_source.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/save_adjoint_kernels.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_GLL_points.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
Log:
updates preparation routines; kernels tested w/ absorbing boundaries; version used for HP node benchmark

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -45,17 +45,17 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(check_max_norm_displ_gpu,
               CHECK_MAX_NORM_DISPL_GPU)(int* size, float* displ,long* Mesh_pointer_f,int* announceID) {
 
 TRACE("check_max_norm_displ_gpu");
-  
-  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container  
 
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
   cudaMemcpy(displ, mp->d_displ,*size*sizeof(float),cudaMemcpyDeviceToHost);
   float maxnorm=0;
-  
+
   for(int i=0;i<*size;i++) {
     maxnorm = MAX(maxnorm,fabsf(displ[i]));
   }
@@ -64,7 +64,7 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(check_max_norm_vector,
               CHECK_MAX_NORM_VECTOR)(int* size, float* vector1, int* announceID) {
 
@@ -85,14 +85,14 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(check_max_norm_displ,
               CHECK_MAX_NORM_DISPL)(int* size, float* displ, int* announceID) {
 
 TRACE("check_max_norm_displ");
 
   float maxnorm=0;
-  
+
   for(int i=0;i<*size;i++) {
     maxnorm = MAX(maxnorm,fabsf(displ[i]));
   }
@@ -101,22 +101,22 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(check_max_norm_b_displ_gpu,
               CHECK_MAX_NORM_B_DISPL_GPU)(int* size, float* b_displ,long* Mesh_pointer_f,int* announceID) {
 
 TRACE("check_max_norm_b_displ_gpu");
-  
-  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container  
 
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
   float* b_accel = (float*)malloc(*size*sizeof(float));
-  
+
   cudaMemcpy(b_displ, mp->d_b_displ,*size*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(float),cudaMemcpyDeviceToHost);
 
   float maxnorm=0;
   float maxnorm_accel=0;
-  
+
   for(int i=0;i<*size;i++) {
     maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
     maxnorm_accel = MAX(maxnorm,fabsf(b_accel[i]));
@@ -128,18 +128,18 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(check_max_norm_b_accel_gpu,
               CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, float* b_accel,long* Mesh_pointer_f,int* announceID) {
 
 TRACE("check_max_norm_b_accel_gpu");
-  
-  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container  
 
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
   cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(float),cudaMemcpyDeviceToHost);
 
   float maxnorm=0;
-  
+
   for(int i=0;i<*size;i++) {
     maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
   }
@@ -148,18 +148,18 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(check_max_norm_b_veloc_gpu,
               CHECK_MAX_NORM_B_VELOC_GPU)(int* size, float* b_veloc,long* Mesh_pointer_f,int* announceID) {
 
 TRACE("check_max_norm_b_veloc_gpu");
-  
-  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container  
 
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
   cudaMemcpy(b_veloc, mp->d_b_veloc,*size*sizeof(float),cudaMemcpyDeviceToHost);
 
   float maxnorm=0;
-  
+
   for(int i=0;i<*size;i++) {
     maxnorm = MAX(maxnorm,fabsf(b_veloc[i]));
   }
@@ -168,14 +168,14 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(check_max_norm_b_displ,
               CHECK_MAX_NORM_B_DISPL)(int* size, float* b_displ,int* announceID) {
 
 TRACE("check_max_norm_b_displ");
-    
+
   float maxnorm=0;
-  
+
   for(int i=0;i<*size;i++) {
     maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
   }
@@ -184,14 +184,14 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(check_max_norm_b_accel,
               CHECK_MAX_NORM_B_ACCEL)(int* size, float* b_accel,int* announceID) {
 
 TRACE("check_max_norm_b_accel");
-    
+
   float maxnorm=0;
-  
+
   for(int i=0;i<*size;i++) {
     maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
   }
@@ -200,7 +200,7 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(check_error_vectors,
               CHECK_ERROR_VECTORS)(int* sizef, float* vector1,float* vector2) {
 
@@ -213,9 +213,9 @@
   double temp;
   double maxerr=0;
   int maxerrorloc;
-  
+
   for(int i=0;i<size;++i) {
-    temp = vector1[i]-vector2[i];    
+    temp = vector1[i]-vector2[i];
     diff2 += temp*temp;
     sum += vector1[i]*vector1[i];
     if(maxerr < fabsf(temp)) {
@@ -224,17 +224,18 @@
     }
   }
 
-  printf("rel error = %f, maxerr = %e @ %d\n",diff2/sum,maxerr,maxerrorloc); 
+  printf("rel error = %f, maxerr = %e @ %d\n",diff2/sum,maxerr,maxerrorloc);
   int myrank;
   MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
-  if(myrank==0) {
+  if(myrank == 0) {
     for(int i=maxerrorloc;i>maxerrorloc-5;i--) {
       printf("[%d]: %e vs. %e\n",i,vector1[i],vector2[i]);
     }
   }
-  
+
 }
 
+
 /* ----------------------------------------------------------------------------------------------- */
 
 // Auxiliary functions
@@ -244,9 +245,9 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(get_max_accel,
-              GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {  
+              GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {
 
 TRACE("get_max_accel");
 
@@ -272,11 +273,11 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 __global__ void get_maximum_kernel(float* array, int size, float* d_max){
-  
-  /* simplest version: uses only 1 thread  
+
+  /* simplest version: uses only 1 thread
    float max;
    max = 0;
-   // finds maximum value in array  
+   // finds maximum value in array
    if( size > 0 ){
    max = abs(array[0]);
    for( int i=1; i < size; i++){
@@ -285,62 +286,62 @@
    }
    *d_max = max;
    */
-  
+
   // reduction example:
   __shared__ float sdata[256] ;
-  
+
   // load shared mem
   unsigned int tid = threadIdx.x;
   unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
-  
+
   // loads absolute values into shared memory
   sdata[tid] = (i < size) ? fabs(array[i]) : 0.0 ;
-  
+
   __syncthreads();
-  
+
   // do reduction in shared mem
-  for(unsigned int s=blockDim.x/2; s>0; s>>=1) 
+  for(unsigned int s=blockDim.x/2; s>0; s>>=1)
   {
     if (tid < s){
-      // summation: 
+      // summation:
       //sdata[tid] += sdata[tid + s];
-      // maximum: 
+      // maximum:
       if( sdata[tid] < sdata[tid + s] ) sdata[tid] = sdata[tid + s];
     }
     __syncthreads();
   }
-  
+
   // write result for this block to global mem
-  if (tid == 0) d_max[blockIdx.x] = sdata[0];  
-  
+  if (tid == 0) d_max[blockIdx.x] = sdata[0];
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(get_norm_acoustic_from_device_cuda,
-              GET_NORM_ACOUSTIC_FROM_DEVICE_CUDA)(float* norm, 
+void FC_FUNC_(get_norm_acoustic_from_device,
+              GET_NORM_ACOUSTIC_FROM_DEVICE)(float* norm,
                                                   long* Mesh_pointer_f,
                                                   int* SIMULATION_TYPE) {
-  
-TRACE("get_norm_acoustic_from_device_cuda");
+
+TRACE("get_norm_acoustic_from_device");
   //double start_time = get_time();
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
   float max;
   float *d_max;
-  
-  
-  
+
+
+
   max = 0;
-  
+
   /* way 1 : timing Elapsed time: 8.464813e-03
    float* h_array;
    h_array = (float*)calloc(mp->NGLOB_AB,sizeof(float));
-   
+
    print_CUDA_error_if_any(cudaMemcpy(h_array,mp->d_potential_dot_dot_acoustic,
    sizeof(float)*(mp->NGLOB_AB),cudaMemcpyDeviceToHost),131);
-   
+
    // finds maximum value in array
    max = h_array[0];
    for( int i=1; i < mp->NGLOB_AB; i++){
@@ -348,72 +349,72 @@
    }
    free(h_array);
    */
-  
+
   /* way 2: timing Elapsed time: 8.818102e-02
    // launch simple kernel
    cudaMalloc((void**)&d_max,sizeof(float));
-   
+
    dim3 grid(1,1);
    dim3 threads(1,1,1);
-   
+
    get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
    mp->NGLOB_AB,
-   d_max);    
+   d_max);
    print_CUDA_error_if_any(cudaMemcpy(&max,d_max, sizeof(float), cudaMemcpyDeviceToHost),222);
-   
+
    cudaFree(d_max);
    */
-  
+
   // way 2 b: timing Elapsed time: 1.236916e-03
   // launch simple reduction kernel
   float* h_max;
   int blocksize = 256;
-  
+
   int num_blocks_x = ceil(mp->NGLOB_AB/blocksize);
   //printf("num_blocks_x %i \n",num_blocks_x);
-  
-  h_max = (float*) calloc(num_blocks_x,sizeof(float));    
+
+  h_max = (float*) calloc(num_blocks_x,sizeof(float));
   cudaMalloc((void**)&d_max,num_blocks_x*sizeof(float));
-  
+
   dim3 grid(num_blocks_x,1);
   dim3 threads(blocksize,1,1);
-  
-  if(*SIMULATION_TYPE == 1 ){       
+
+  if(*SIMULATION_TYPE == 1 ){
     get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
                                          mp->NGLOB_AB,
-                                         d_max);    
+                                         d_max);
   }
-  
+
   if(*SIMULATION_TYPE == 3 ){
     get_maximum_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
                                          mp->NGLOB_AB,
-                                         d_max);    
-  }    
-  
+                                         d_max);
+  }
+
   print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(float),cudaMemcpyDeviceToHost),222);
-  
+
   // determines max for all blocks
   max = h_max[0];
   for(int i=1;i<num_blocks_x;i++) {
     if( max < h_max[i]) max = h_max[i];
   }
-  
+
   cudaFree(d_max);
   free(h_max);
-  
+
   /* way 3: doesn't work properly...
    cublasStatus status;
-   
-   // Initialize CUBLAS 
-   status = cublasInit();    
+
+   // Initialize CUBLAS
+   status = cublasInit();
    if (status != CUBLAS_STATUS_SUCCESS) {
    fprintf (stderr, "!!!! CUBLAS initialization error\n");
    exit(1);
    }
-   
+
    // cublas function: cublasIsamax
-   //       finds the smallest index of the maximum magnitude element of single  
-   //      precision vector x 
+   //       finds the smallest index of the maximum magnitude element of single 
+   //      precision vector x
    int incr = 1;
    int imax = 0;
    imax = cublasIsamax(mp->NGLOB_AB,(float*)mp->d_potential_dot_dot_acoustic, incr);
@@ -421,29 +422,29 @@
    if (status != CUBLAS_STATUS_SUCCESS) {
    fprintf (stderr, "!!!! CUBLAS error in cublasIsamax\n");
    exit(1);
-   }    
-   
+   }
+
    print_CUDA_error_if_any(cudaMemcpy(&max,&(mp->d_potential_dot_dot_acoustic[imax]), sizeof(float), cudaMemcpyDeviceToHost),222);
-   
+
    printf("maximum %i %i %f \n",mp->NGLOB_AB,imax,max);
-   
-   // Shutdown 
+
+   // Shutdown
    status = cublasShutdown();
    if (status != CUBLAS_STATUS_SUCCESS) {
    fprintf (stderr, "!!!! shutdown error (A)\n");
    exit(1);
    }
-   
+
    */
-  
+
   // return result
-  *norm = max;    
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING       
+  *norm = max;
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
-  exit_on_cuda_error("after get_norm_acoustic_from_device_cuda");  
-#endif      
+  exit_on_cuda_error("after get_norm_acoustic_from_device");
+#endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -453,99 +454,99 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 __global__ void get_maximum_vector_kernel(float* array, int size, float* d_max){
-  
+
   // reduction example:
   __shared__ float sdata[256] ;
-  
+
   // load shared mem
   unsigned int tid = threadIdx.x;
   unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
-  
-  // loads values into shared memory: assume array is a vector array 
+
+  // loads values into shared memory: assume array is a vector array
   sdata[tid] = (i < size) ? sqrt(array[i*3]*array[i*3]
                                  + array[i*3+1]*array[i*3+1]
                                  + array[i*3+2]*array[i*3+2]) : 0.0 ;
-  
+
   __syncthreads();
-  
+
   // do reduction in shared mem
-  for(unsigned int s=blockDim.x/2; s>0; s>>=1) 
+  for(unsigned int s=blockDim.x/2; s>0; s>>=1)
   {
     if (tid < s){
-      // summation: 
+      // summation:
       //sdata[tid] += sdata[tid + s];
-      // maximum: 
+      // maximum:
       if( sdata[tid] < sdata[tid + s] ) sdata[tid] = sdata[tid + s];
     }
     __syncthreads();
   }
-  
+
   // write result for this block to global mem
-  if (tid == 0) d_max[blockIdx.x] = sdata[0];  
-  
+  if (tid == 0) d_max[blockIdx.x] = sdata[0];
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(get_norm_elastic_from_device_cuda,
-              GET_NORM_ELASTIC_FROM_DEVICE_CUDA)(float* norm, 
+void FC_FUNC_(get_norm_elastic_from_device,
+              GET_NORM_ELASTIC_FROM_DEVICE)(float* norm,
                                                  long* Mesh_pointer_f,
                                                  int* SIMULATION_TYPE) {
-  
-  TRACE("get_norm_elastic_from_device_cuda");
+
+  TRACE("get_norm_elastic_from_device");
   //double start_time = get_time();
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
   float max;
   float *d_max;
-  
+
   max = 0;
-  
+
   // launch simple reduction kernel
   float* h_max;
   int blocksize = 256;
-  
+
   int num_blocks_x = ceil(mp->NGLOB_AB/blocksize);
   //printf("num_blocks_x %i \n",num_blocks_x);
-  
-  h_max = (float*) calloc(num_blocks_x,sizeof(float));    
+
+  h_max = (float*) calloc(num_blocks_x,sizeof(float));
   cudaMalloc((void**)&d_max,num_blocks_x*sizeof(float));
-  
+
   dim3 grid(num_blocks_x,1);
   dim3 threads(blocksize,1,1);
-  
-  if(*SIMULATION_TYPE == 1 ){       
+
+  if(*SIMULATION_TYPE == 1 ){
     get_maximum_vector_kernel<<<grid,threads>>>(mp->d_displ,
                                                 mp->NGLOB_AB,
-                                                d_max);    
+                                                d_max);
   }
-  
+
   if(*SIMULATION_TYPE == 3 ){
     get_maximum_vector_kernel<<<grid,threads>>>(mp->d_b_displ,
                                                 mp->NGLOB_AB,
-                                                d_max);    
-  }    
-  
+                                                d_max);
+  }
+
   print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(float),cudaMemcpyDeviceToHost),222);
-  
+
   // determines max for all blocks
   max = h_max[0];
   for(int i=1;i<num_blocks_x;i++) {
     if( max < h_max[i]) max = h_max[i];
   }
-  
+
   cudaFree(d_max);
   free(h_max);
-  
+
   // return result
-  *norm = max;    
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING       
+  *norm = max;
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
-  exit_on_cuda_error("after get_norm_elastic_from_device_cuda");  
-#endif      
+  exit_on_cuda_error("after get_norm_elastic_from_device");
+#endif
 }
 
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -48,133 +48,166 @@
 
 
 // crashes if the CMTSOLUTION does not match the mesh properly
-__global__ void compute_add_sources_kernel(float* accel, int* ibool, int* ispec_is_inner, int phase_is_inner, float* sourcearrays, double* stf_pre_compute,int myrank, int* islice_selected_source, int* ispec_selected_source, int* ispec_is_elastic, int NSOURCES,float* d_debug) {
+__global__ void compute_add_sources_kernel(float* accel,
+                                           int* ibool,
+                                           int* ispec_is_inner,
+                                           int phase_is_inner,
+                                           float* sourcearrays,
+                                           double* stf_pre_compute,
+                                           int myrank,
+                                           int* islice_selected_source,
+                                           int* ispec_selected_source,
+                                           int* ispec_is_elastic,
+                                           int NSOURCES,
+                                           float* d_debug) {
   int i = threadIdx.x;
   int j = threadIdx.y;
   int k = threadIdx.z;
-  
+
   int isource  = blockIdx.x + gridDim.x*blockIdx.y; // bx
   int ispec;
   int iglob;
-  double stf;
+  float stf;
 
   if(isource < NSOURCES) { // when NSOURCES > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
-    
+
     if(myrank == islice_selected_source[isource]) {
 
       ispec = ispec_selected_source[isource]-1;
 
-      if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] == 1) {
-		    
-        stf = stf_pre_compute[isource];
+      if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] ) {
+
+        stf = (float) stf_pre_compute[isource];
+
+        //if(i==0 && j==0 && k==0) printf("add sources kernel: stf = %e\n",stf);
+
         iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
         atomicAdd(&accel[iglob*3],
                   sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf);
         atomicAdd(&accel[iglob*3+1],
                   sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 1, i,j,k)]*stf);
-	// if((iglob*3+2 == 304598)) {
-	//   atomicAdd(&d_debug[0],1.0f);
-	//   d_debug[1] = accel[iglob*3+2];
-	//   d_debug[2] = sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)];
-	//   d_debug[3] = stf;
-	// }
-	// d_debug[4] = 42.0f;
+  // if((iglob*3+2 == 304598)) {
+  //   atomicAdd(&d_debug[0],1.0f);
+  //   d_debug[1] = accel[iglob*3+2];
+  //   d_debug[2] = sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)];
+  //   d_debug[3] = stf;
+  // }
+  // d_debug[4] = 42.0f;
         atomicAdd(&accel[iglob*3+2],
-                  sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)]*stf);	
+                  sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)]*stf);
       }
     }
   }
-  
+
 }
 
+
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(add_sourcearrays_adjoint_cuda,
-              ADD_SOURCEARRAYS_ADJOINT_CUDA)(long* Mesh_pointer,
-                                              int* USE_FORCE_POINT_SOURCE,
-                                              double* h_stf_pre_compute,int* NSOURCES,
-                                              int* phase_is_inner,int* myrank) {
-TRACE("add_sourcearrays_adjoint_cuda");
-// EPIK_TRACER("add_sourcearrays_adjoint_cuda");
-  
-  Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-  if(*USE_FORCE_POINT_SOURCE) {
-    printf("USE FORCE POINT SOURCE not implemented for GPU_MODE");
-    MPI_Abort(MPI_COMM_WORLD, 1);
-  }
+extern "C"
+void FC_FUNC_(compute_add_sources_el_cuda,
+              COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
+                                           //int* NSPEC_ABf, int* NGLOB_ABf,
+                                            int* phase_is_innerf,
+                                            int* NSOURCESf,
+                                            //int* itf, float* dtf, float* t0f,
+                                            //int* SIMULATION_TYPEf,int* NSTEPf,
+                                            //int* NOISE_TOMOGRAPHYf,
+                                            //int* USE_FORCE_POINT_SOURCEf,
+                                            double* h_stf_pre_compute,
+                                            int* myrankf) {
 
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,(*NSOURCES)*sizeof(double),cudaMemcpyHostToDevice),18);
+TRACE("compute_add_sources_el_cuda");
 
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING  
-  exit_on_cuda_error("noise_read_add_surface_movie_cuda_kernel");
-#endif
-  
-  int num_blocks_x = *NSOURCES;
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  // check if anything to do
+  if( mp->nsources_local == 0 ) return;
+
+  //int NSPEC_AB = *NSPEC_ABf;
+  //int NGLOB_AB = *NGLOB_ABf;
+  int phase_is_inner = *phase_is_innerf;
+  //int it = *itf;
+  //float dt = *dtf;
+  //float t0 = *t0f;
+  //int SIMULATION_TYPE = *SIMULATION_TYPEf;
+  //int NSTEP = *NSTEPf;
+  //int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
+  int NSOURCES = *NSOURCESf;
+  //int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
+  int myrank = *myrankf;
+
+  float* d_debug;
+
+  int num_blocks_x = NSOURCES;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
 
+  //double* d_stf_pre_compute;
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+                                     NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(5,5,5);
+  // (float* accel, int* ibool, int* ispec_is_inner, int phase_is_inner,
+  // float* sourcearrays, double* stf_pre_compute,int myrank,
+  // int* islice_selected_source, int* ispec_selected_source,
+  // int* ispec_is_elastic, int NSOURCES)
 
-  float* d_debug;
-  // float* h_debug = (float*)calloc(128,sizeof(float));
-  // cudaMalloc((void**)&d_debug,128*sizeof(float));
-  // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-  
-  compute_add_sources_kernel<<<grid,threads>>>(mp->d_b_accel,mp->d_ibool, 
-                                               mp->d_ispec_is_inner, *phase_is_inner, 
-                                               mp->d_sourcearrays, 
+  //daniel
+  //printf("add sources : nsources_local = %d\n",mp->nsources_local);
+  //printf("add sources : stf = %e\n",h_stf_pre_compute[0]);
+
+
+  compute_add_sources_kernel<<<grid,threads>>>(mp->d_accel,
+                                               mp->d_ibool,
+                                               mp->d_ispec_is_inner,
+                                               phase_is_inner,
+                                               mp->d_sourcearrays,
                                                mp->d_stf_pre_compute,
-                                               *myrank, 
-                                               mp->d_islice_selected_source,mp->d_ispec_selected_source,
-                                               mp->d_ispec_is_elastic, 
-                                               *NSOURCES,
+                                               myrank,
+                                               mp->d_islice_selected_source,
+                                               mp->d_ispec_selected_source,
+                                               mp->d_ispec_is_elastic,
+                                               NSOURCES,
                                                d_debug);
 
-  // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
-  // for(int i=0;i<10;i++) {
-  //   printf("debug[%d] = %e \n",i,h_debug[i]);
-  // }
-  
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("add_sourcearrays_adjoint_cuda");
+  exit_on_cuda_error("compute_add_sources_kernel");
 #endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(compute_add_sources_elastic_cuda,
-              COMPUTE_ADD_SOURCES_ELASTIC_CUDA)(long* Mesh_pointer_f, 
-                                                int* NSPEC_ABf, int* NGLOB_ABf, 
-                                                int* phase_is_innerf,int* NSOURCESf, 
-                                                int* itf, float* dtf, float* t0f,
-                                                int* SIMULATION_TYPEf,int* NSTEPf,
-                                                int* NOISE_TOMOGRAPHYf, 
-                                                int* USE_FORCE_POINT_SOURCEf, 
-                                                double* h_stf_pre_compute, int* myrankf) {
+extern "C"
+void FC_FUNC_(compute_add_sources_el_s3_cuda,
+              COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
+                                              int* USE_FORCE_POINT_SOURCE,
+                                              double* h_stf_pre_compute,
+                                              int* NSOURCESf,
+                                              int* phase_is_inner,int* myrank) {
+  TRACE("compute_add_sources_el_s3_cuda");
+  // EPIK_TRACER("compute_add_sources_el_s3_cuda");
 
-TRACE("compute_add_sources_elastic_cuda");
+  Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
 
-  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  //int NSPEC_AB = *NSPEC_ABf;
-  //int NGLOB_AB = *NGLOB_ABf;
-  int phase_is_inner = *phase_is_innerf;
-  //int it = *itf;
-  //float dt = *dtf;
-  //float t0 = *t0f;
-  //int SIMULATION_TYPE = *SIMULATION_TYPEf;
-  //int NSTEP = *NSTEPf;
-  //int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
   int NSOURCES = *NSOURCESf;
-  //int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
-  int myrank = *myrankf;
 
-  float* d_debug;
+  if(*USE_FORCE_POINT_SOURCE) {
+    printf("USE FORCE POINT SOURCE not implemented for GPU_MODE");
+    MPI_Abort(MPI_COMM_WORLD, 1);
+  }
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+                                     NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("compute_add_sources_el_s3_cuda");
+#endif
+
   int num_blocks_x = NSOURCES;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
@@ -182,27 +215,44 @@
     num_blocks_y = num_blocks_y*2;
   }
 
-  //double* d_stf_pre_compute;
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
-  
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(5,5,5);
-  // (float* accel, int* ibool, int* ispec_is_inner, int phase_is_inner, float* sourcearrays, double* stf_pre_compute,int myrank, int* islice_selected_source, int* ispec_selected_source, int* ispec_is_elastic, int NSOURCES)
-  
-  
-  
-  compute_add_sources_kernel<<<grid,threads>>>(mp->d_accel,mp->d_ibool, mp->d_ispec_is_inner, phase_is_inner, mp->d_sourcearrays, mp->d_stf_pre_compute,myrank, mp->d_islice_selected_source,mp->d_ispec_selected_source,mp->d_ispec_is_elastic, NSOURCES,d_debug);
 
+  float* d_debug;
+  // float* h_debug = (float*)calloc(128,sizeof(float));
+  // cudaMalloc((void**)&d_debug,128*sizeof(float));
+  // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
+
+  compute_add_sources_kernel<<<grid,threads>>>(mp->d_b_accel,mp->d_ibool,
+                                               mp->d_ispec_is_inner, *phase_is_inner,
+                                               mp->d_sourcearrays,
+                                               mp->d_stf_pre_compute,
+                                               *myrank,
+                                               mp->d_islice_selected_source,mp->d_ispec_selected_source,
+                                               mp->d_ispec_is_elastic,
+                                               NSOURCES,
+                                               d_debug);
+
+  // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+  // for(int i=0;i<10;i++) {
+  //   printf("debug[%d] = %e \n",i,h_debug[i]);
+  // }
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("compute_add_sources_kernel");
+  exit_on_cuda_error("compute_add_sources_el_s3_cuda");
 #endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void add_source_master_rec_noise_cuda_kernel(int* ibool, int* ispec_selected_rec, 
-                                                        int irec_master_noise, 
-                                                        real* accel, real* noise_sourcearray, 
+// NOISE sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void add_source_master_rec_noise_cuda_kernel(int* ibool, int* ispec_selected_rec,
+                                                        int irec_master_noise,
+                                                        realw* accel,
+                                                        realw* noise_sourcearray,
                                                         int it) {
   int tx = threadIdx.x;
   int iglob = ibool[tx + 125*(ispec_selected_rec[irec_master_noise-1]-1)]-1;
@@ -211,24 +261,24 @@
   // accel[3*iglob] += noise_sourcearray[3*tx + 3*125*it];
   // accel[1+3*iglob] += noise_sourcearray[1+3*tx + 3*125*it];
   // accel[2+3*iglob] += noise_sourcearray[2+3*tx + 3*125*it];
-  
+
   atomicAdd(&accel[iglob*3],noise_sourcearray[3*tx + 3*125*it]);
   atomicAdd(&accel[iglob*3+1],noise_sourcearray[1+3*tx + 3*125*it]);
   atomicAdd(&accel[iglob*3+2],noise_sourcearray[2+3*tx + 3*125*it]);
-  
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(add_source_master_rec_noise_cuda,
-              ADD_SOURCE_MASTER_REC_NOISE_CUDA)(long* Mesh_pointer_f, 
-                                                int* myrank_f,  
-                                                int* it_f, 
-                                                int* irec_master_noise_f, 
+extern "C"
+void FC_FUNC_(add_source_master_rec_noise_cu,
+              ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
+                                                int* myrank_f,
+                                                int* it_f,
+                                                int* irec_master_noise_f,
                                                 int* islice_selected_rec) {
 
-TRACE("add_source_master_rec_noise_cuda");
+TRACE("add_source_master_rec_noise_cu");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
@@ -239,8 +289,8 @@
   dim3 threads(125,1,1);
   if(myrank == islice_selected_rec[irec_master_noise-1]) {
     add_source_master_rec_noise_cuda_kernel<<<grid,threads>>>(mp->d_ibool, mp->d_ispec_selected_rec,
-							      irec_master_noise, mp->d_accel,
-							      mp->d_noise_sourcearray, it);    
+                    irec_master_noise, mp->d_accel,
+                    mp->d_noise_sourcearray, it);
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("add_source_master_rec_noise_cuda_kernel");
@@ -250,57 +300,61 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void add_sources_SIM_TYPE_2_OR_3_kernel(float* accel, int nrec,		  
-						   float* adj_sourcearrays,
-						   int* ibool,
-						   int* ispec_is_inner,
-						   int* ispec_selected_rec,
-						   int phase_is_inner,
-						   int* islice_selected_rec,
-						   int* pre_computed_irec,
-						   int nadj_rec_local,
-						   int NTSTEP_BETWEEN_ADJSRC,
-						   int myrank,
-						   int* debugi,
-						   float* debugf) {
+// ADJOINT sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void add_sources_SIM_TYPE_2_OR_3_kernel(float* accel, int nrec,
+                                                   float* adj_sourcearrays,
+                                                   int* ibool,
+                                                   int* ispec_is_inner,
+                                                   int* ispec_selected_rec,
+                                                   int phase_is_inner,
+                                                   int* islice_selected_rec,
+                                                   int* pre_computed_irec,
+                                                   int nadj_rec_local,
+                                                   int NTSTEP_BETWEEN_ADJSRC,
+                                                   int myrank,
+                                                   int* debugi,
+                                                   float* debugf) {
   int irec_local = blockIdx.x + gridDim.x*blockIdx.y;
   if(irec_local<nadj_rec_local) { // when nrec > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
 
     int irec = pre_computed_irec[irec_local];
-    
+
     int ispec_selected = ispec_selected_rec[irec]-1;
     if(ispec_is_inner[ispec_selected] == phase_is_inner) {
       int i = threadIdx.x;
       int j = threadIdx.y;
       int k = threadIdx.z;
       int iglob = ibool[i+5*(j+5*(k+5*ispec_selected))]-1;
-      
+
       // atomic operations are absolutely necessary for correctness!
       atomicAdd(&(accel[0+3*iglob]),adj_sourcearrays[INDEX5(5,5,5,3,
-							    i,j,k,
-							    0,
-							    irec_local)]);
-		
+                  i,j,k,
+                  0,
+                  irec_local)]);
+
       atomicAdd(&accel[1+3*iglob], adj_sourcearrays[INDEX5(5,5,5,3,
-							   i,j,k,
-							   1,
-							   irec_local)]);
-      
+                 i,j,k,
+                 1,
+                 irec_local)]);
+
       atomicAdd(&accel[2+3*iglob],adj_sourcearrays[INDEX5(5,5,5,3,
-							  i,j,k,
-							  2,
-							  irec_local)]);
+                i,j,k,
+                2,
+                irec_local)]);
     }
-     
+
   }
-  
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(add_sources_sim_type_2_or_3,
-              ADD_SOURCES_SIM_TYPE_2_OR_3)(long* Mesh_pointer, 
+              ADD_SOURCES_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
                                            float* h_adj_sourcearrays,
                                            int* size_adj_sourcearrays, int* ispec_is_inner,
                                            int* phase_is_inner, int* ispec_selected_rec,
@@ -312,11 +366,11 @@
 TRACE("add_sources_sim_type_2_or_3");
 
   if(*nadj_rec_local > 0) {
-  
+
     Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
     int rank;
     MPI_Comm_rank(MPI_COMM_WORLD,&rank);
-  
+
     // make sure grid dimension is less than 65535 in x dimension
     int num_blocks_x = *nadj_rec_local;
     int num_blocks_y = 1;
@@ -326,112 +380,110 @@
     }
     dim3 grid(num_blocks_x,num_blocks_y,1);
     dim3 threads(5,5,5);
-  
+
     float* d_adj_sourcearrays;
     print_CUDA_error_if_any(cudaMalloc((void**)&d_adj_sourcearrays,
-				       (*nadj_rec_local)*3*125*sizeof(float)),1);
+                                       (*nadj_rec_local)*3*125*sizeof(float)),1);
     float* h_adj_sourcearrays_slice = (float*)malloc((*nadj_rec_local)*3*125*sizeof(float));
 
     int* h_pre_computed_irec = new int[*nadj_rec_local];
     int* d_pre_computed_irec;
     cudaMalloc((void**)&d_pre_computed_irec,(*nadj_rec_local)*sizeof(int));
-  
+
     // build slice of adj_sourcearrays because full array is *very* large.
     int irec_local = 0;
     for(int irec = 0;irec<*nrec;irec++) {
       if(*myrank == h_islice_selected_rec[irec]) {
-	irec_local++;
-	h_pre_computed_irec[irec_local-1] = irec;
-	if(ispec_is_inner[ispec_selected_rec[irec]-1] == *phase_is_inner) {
-	  for(int k=0;k<5;k++) {
-	    for(int j=0;j<5;j++) {
-	      for(int i=0;i<5;i++) {
+        irec_local++;
+        h_pre_computed_irec[irec_local-1] = irec;
+        if(ispec_is_inner[ispec_selected_rec[irec]-1] == *phase_is_inner) {
+          for(int k=0;k<5;k++) {
+            for(int j=0;j<5;j++) {
+              for(int i=0;i<5;i++) {
 
-		h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
-						i,j,k,0,
-						irec_local-1)]
-		  = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
-					      *NTSTEP_BETWEEN_READ_ADJSRC,
-					      3,5,5,
-					      irec_local-1,
-					      *time_index-1,
-					      0,i,j,k)];
-	      
-		h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
-						i,j,k,1,
-						irec_local-1)]
-		  = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
-					      *NTSTEP_BETWEEN_READ_ADJSRC,
-					      3,5,5,
-					      irec_local-1,
-					      *time_index-1,
-					      1,i,j,k)];
-	      
-		h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
-						i,j,k,2,
-						irec_local-1)]
-		  = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
-					      *NTSTEP_BETWEEN_READ_ADJSRC,
-					      3,5,5,
-					      irec_local-1,
-					      *time_index-1,
-					      2,i,j,k)];
-	      
-								   
-	      }
-	    }
-	  }
-	}
+                h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+                                                i,j,k,0,
+                                                irec_local-1)]
+                        = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+                                                    *NTSTEP_BETWEEN_READ_ADJSRC,
+                                                    3,5,5,
+                                                    irec_local-1,
+                                                    *time_index-1,
+                                                    0,i,j,k)];
+
+                h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+                                                i,j,k,1,
+                                                irec_local-1)]
+                        = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+                                                    *NTSTEP_BETWEEN_READ_ADJSRC,
+                                                    3,5,5,
+                                                    irec_local-1,
+                                                    *time_index-1,
+                                                    1,i,j,k)];
+
+                h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+                                                i,j,k,2,
+                                                irec_local-1)]
+                        = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+                                                    *NTSTEP_BETWEEN_READ_ADJSRC,
+                                                    3,5,5,
+                                                    irec_local-1,
+                                                    *time_index-1,
+                                                    2,i,j,k)];
+              }
+            }
+          }
+        }
       }
     }
     // printf("irec_local vs. *nadj_rec_local -> %d vs. %d\n",irec_local,*nadj_rec_local);
     // for(int ispec=0;ispec<(*nadj_rec_local);ispec++) {
     //   for(int i=0;i<5;i++)
     //     for(int j=0;j<5;j++)
-    // 	for(int k=0;k<5;k++) {
-    // 	  h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,0,ispec)] =
-    // 	    h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
-    // 				      ispec,
-    // 				      *time_index-1,
-    // 				      0,
-    // 				      i,j,k)];
-    // 	  h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,1,ispec)] =
-    // 	    h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
-    // 				      ispec,
-    // 				      *time_index-1,
-    // 				      1,
-    // 				      i,j,k)];
-    // 	  h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,2,ispec)] =
-    // 	    h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_ADJSRC,3,5,5,
-    // 				      ispec,
-    // 				      *time_index-1,
-    // 				      2,
-    // 				      i,j,k)];	  
-    // 	}
-    
+    //  for(int k=0;k<5;k++) {
+    //    h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,0,ispec)] =
+    //      h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
+    //              ispec,
+    //              *time_index-1,
+    //              0,
+    //              i,j,k)];
+    //    h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,1,ispec)] =
+    //      h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
+    //              ispec,
+    //              *time_index-1,
+    //              1,
+    //              i,j,k)];
+    //    h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,2,ispec)] =
+    //      h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_ADJSRC,3,5,5,
+    //              ispec,
+    //              *time_index-1,
+    //              2,
+    //              i,j,k)];
+    //  }
+
     // }
-  
+
     cudaMemcpy(d_adj_sourcearrays, h_adj_sourcearrays_slice,(*nadj_rec_local)*3*125*sizeof(float),
-	       cudaMemcpyHostToDevice);
-  
+         cudaMemcpyHostToDevice);
 
+
     // the irec_local variable needs to be precomputed (as
     // h_pre_comp..), because normally it is in the loop updating accel,
     // and due to how it's incremented, it cannot be parallelized
-  
+
     // int irec_local=0;
     // for(int irec=0;irec<*nrec;irec++) {
     //   if(*myrank == h_islice_selected_rec[irec]) {
     //     h_pre_computed_irec_local_index[irec] = irec_local;
     //     irec_local++;
     //     if(irec_local==1) {
-    // 	// printf("%d:first useful irec==%d\n",rank,irec);
+    //  // printf("%d:first useful irec==%d\n",rank,irec);
     //     }
     //   }
     //   else h_pre_computed_irec_local_index[irec] = 0;
     // }
     cudaMemcpy(d_pre_computed_irec,h_pre_computed_irec,
-	       (*nadj_rec_local)*sizeof(int),cudaMemcpyHostToDevice);
+         (*nadj_rec_local)*sizeof(int),cudaMemcpyHostToDevice);
     // pause_for_debugger(1);
     int* d_debugi, *h_debugi;
     float* d_debugf, *h_debugf;
@@ -441,33 +493,33 @@
     h_debugf = (float*)calloc(num_blocks_x,sizeof(float));
     cudaMalloc((void**)&d_debugf,num_blocks_x*sizeof(float));
     cudaMemcpy(d_debugf,h_debugf,num_blocks_x*sizeof(float),cudaMemcpyHostToDevice);
-      
+
     add_sources_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_accel, *nrec,
-							 d_adj_sourcearrays, mp->d_ibool,
-							 mp->d_ispec_is_inner,
-							 mp->d_ispec_selected_rec,
-							 *phase_is_inner,
-							 mp->d_islice_selected_rec,
-							 d_pre_computed_irec,
-							 *nadj_rec_local,
-							 *NTSTEP_BETWEEN_READ_ADJSRC,
-							 *myrank,
-							 d_debugi,d_debugf);
+               d_adj_sourcearrays, mp->d_ibool,
+               mp->d_ispec_is_inner,
+               mp->d_ispec_selected_rec,
+               *phase_is_inner,
+               mp->d_islice_selected_rec,
+               d_pre_computed_irec,
+               *nadj_rec_local,
+               *NTSTEP_BETWEEN_READ_ADJSRC,
+               *myrank,
+               d_debugi,d_debugf);
 
     cudaMemcpy(h_debugi,d_debugi,num_blocks_x*sizeof(int),cudaMemcpyDeviceToHost);
     cudaMemcpy(h_debugf,d_debugf,num_blocks_x*sizeof(float),cudaMemcpyDeviceToHost);
-  
+
     // printf("%d: pre_com0:%d\n",rank,h_pre_computed_irec_local_index[0]);
     // printf("%d: pre_com1:%d\n",rank,h_pre_computed_irec_local_index[1]);
     // printf("%d: pre_com2:%d\n",rank,h_pre_computed_irec_local_index[2]);
     // for(int i=156;i<(156+30);i++) {
     //   if(rank==0) printf("%d:debug[%d] = i/f = %d / %e\n",rank,i,h_debugi[i],h_debugf[i]);
     // }
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
     // MPI_Barrier(MPI_COMM_WORLD);
-    exit_on_cuda_error("add_sources_SIM_TYPE_2_OR_3_kernel");    
-  
+    exit_on_cuda_error("add_sources_SIM_TYPE_2_OR_3_kernel");
+
     // printf("Proc %d exiting with successful kernel\n",rank);
     // exit(1);
 #endif
@@ -483,82 +535,86 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void compute_add_sources_acoustic_kernel(float* potential_dot_dot_acoustic, 
-                                                    int* ibool, 
-                                                    int* ispec_is_inner, 
-                                                    int phase_is_inner, 
-                                                    float* sourcearrays, 
+__global__ void compute_add_sources_acoustic_kernel(float* potential_dot_dot_acoustic,
+                                                    int* ibool,
+                                                    int* ispec_is_inner,
+                                                    int phase_is_inner,
+                                                    float* sourcearrays,
                                                     double* stf_pre_compute,
-                                                    int myrank, 
-                                                    int* islice_selected_source, 
-                                                    int* ispec_selected_source, 
-                                                    int* ispec_is_acoustic, 
+                                                    int myrank,
+                                                    int* islice_selected_source,
+                                                    int* ispec_selected_source,
+                                                    int* ispec_is_acoustic,
                                                     float* kappastore,
                                                     int NSOURCES) {
   int i = threadIdx.x;
   int j = threadIdx.y;
   int k = threadIdx.z;
-  
+
   int isource  = blockIdx.x + gridDim.x*blockIdx.y; // bx
-  
+
   int ispec;
   int iglob;
-  double stf;
+  float stf;
   float kappal;
-  
+
   if( isource < NSOURCES ){
-    
+
     //if(myrank == 0 && i== 0 && j == 0 && k == 0) printf("source isource = %i \n",isource);
-    
+
     if(myrank == islice_selected_source[isource]) {
-      
+
       ispec = ispec_selected_source[isource]-1;
-      
-      if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] == 1) {
-        
-        stf = stf_pre_compute[isource];
+
+      if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] ) {
+
+        stf = (float) stf_pre_compute[isource];
         iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
         kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
-        
+
         //printf("source ispec = %i %i %e %e \n",ispec,iglob,stf,kappal);
         //printf("source arr = %e %i %i %i %i %i\n", -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal,i,j,k,iglob,ispec);
-        
+
         atomicAdd(&potential_dot_dot_acoustic[iglob],
                   -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal);
-        
+
         //      potential_dot_dot_acoustic[iglob] +=
-        //                -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal; 
-        
+        //                -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal;
+
         //printf("potential = %e %i %i %i %i %i\n", potential_dot_dot_acoustic[iglob],i,j,k,iglob,ispec);
-        
-        
+
+
       }
     }
-  }  
+  }
 }
 
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(compute_add_sources_acoustic_cuda,
-              COMPUTE_ADD_SOURCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f, 
+extern "C"
+void FC_FUNC_(compute_add_sources_ac_cuda,
+              COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
                                                  int* phase_is_innerf,
-                                                 int* NSOURCESf, 
+                                                 int* NSOURCESf,
                                                  int* SIMULATION_TYPEf,
-                                                 int* USE_FORCE_POINT_SOURCEf, 
-                                                 double* h_stf_pre_compute, 
+                                                 int* USE_FORCE_POINT_SOURCEf,
+                                                 double* h_stf_pre_compute,
                                                  int* myrankf) {
-  
-TRACE("compute_add_sources_acoustic_cuda");
-  
+
+TRACE("compute_add_sources_ac_cuda");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  // check if anything to do
+  if( mp->nsources_local == 0 ) return;
+
   int phase_is_inner = *phase_is_innerf;
   //int SIMULATION_TYPE = *SIMULATION_TYPEf;
   int NSOURCES = *NSOURCESf;
   //int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
-  int myrank = *myrankf;  
-  
+  int myrank = *myrankf;
+
   int num_blocks_x = NSOURCES;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
@@ -566,51 +622,56 @@
     num_blocks_y = num_blocks_y*2;
   }
 
-  // copies pre-computed source time factors onto GPU  
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
-  
+  // copies pre-computed source time factors onto GPU
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+                                     NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(5,5,5);
-  
+
   compute_add_sources_acoustic_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
-                                                        mp->d_ibool, 
-                                                        mp->d_ispec_is_inner, 
-                                                        phase_is_inner, 
-                                                        mp->d_sourcearrays, 
+                                                        mp->d_ibool,
+                                                        mp->d_ispec_is_inner,
+                                                        phase_is_inner,
+                                                        mp->d_sourcearrays,
                                                         mp->d_stf_pre_compute,
-                                                        myrank, 
+                                                        myrank,
                                                         mp->d_islice_selected_source,
                                                         mp->d_ispec_selected_source,
-                                                        mp->d_ispec_is_acoustic, 
+                                                        mp->d_ispec_is_acoustic,
                                                         mp->d_kappastore,
                                                         NSOURCES);
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("compute_add_sources_acoustic_cuda");  
-#endif    
+  exit_on_cuda_error("compute_add_sources_ac_cuda");
+#endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(compute_add_sources_acoustic_sim3_cuda,
-              COMPUTE_ADD_SOURCES_ACOUSTIC_SIM3_CUDA)(long* Mesh_pointer_f, 
+extern "C"
+void FC_FUNC_(compute_add_sources_ac_s3_cuda,
+              COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
                                                       int* phase_is_innerf,
-                                                      int* NSOURCESf, 
+                                                      int* NSOURCESf,
                                                       int* SIMULATION_TYPEf,
-                                                      int* USE_FORCE_POINT_SOURCEf, 
-                                                      double* h_stf_pre_compute, 
+                                                      int* USE_FORCE_POINT_SOURCEf,
+                                                      double* h_stf_pre_compute,
                                                       int* myrankf) {
-  
-TRACE("compute_add_sources_acoustic_sim3_cuda");
-  
+
+TRACE("compute_add_sources_ac_s3_cuda");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  // check if anything to do
+  if( mp->nsources_local == 0 ) return;
+
   int phase_is_inner = *phase_is_innerf;
   //int SIMULATION_TYPE = *SIMULATION_TYPEf;
   int NSOURCES = *NSOURCESf;
   //int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
-  int myrank = *myrankf;  
-  
+  int myrank = *myrankf;
+
   int num_blocks_x = NSOURCES;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
@@ -618,28 +679,29 @@
     num_blocks_y = num_blocks_y*2;
   }
 
-  // copies source time factors onto GPU  
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
-  
+  // copies source time factors onto GPU
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+                                     NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(5,5,5);
-  
+
   compute_add_sources_acoustic_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
-                                                        mp->d_ibool, 
-                                                        mp->d_ispec_is_inner, 
-                                                        phase_is_inner, 
-                                                        mp->d_sourcearrays, 
+                                                        mp->d_ibool,
+                                                        mp->d_ispec_is_inner,
+                                                        phase_is_inner,
+                                                        mp->d_sourcearrays,
                                                         mp->d_stf_pre_compute,
-                                                        myrank, 
+                                                        myrank,
                                                         mp->d_islice_selected_source,
                                                         mp->d_ispec_selected_source,
-                                                        mp->d_ispec_is_acoustic, 
+                                                        mp->d_ispec_is_acoustic,
                                                         mp->d_kappastore,
                                                         NSOURCES);
-    
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("compute_add_sources_acoustic_sim3_cuda");  
-#endif    
+  exit_on_cuda_error("compute_add_sources_ac_s3_cuda");
+#endif
 }
 
 
@@ -649,7 +711,7 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void add_sources_acoustic_SIM_TYPE_2_OR_3_kernel(float* potential_dot_dot_acoustic, 
+__global__ void add_sources_acoustic_SIM_TYPE_2_OR_3_kernel(float* potential_dot_dot_acoustic,
                                                             int nrec,
                                                             int pre_computed_index,
                                                             float* adj_sourcearrays,
@@ -664,51 +726,51 @@
                                                             int nadj_rec_local,
                                                             int NTSTEP_BETWEEN_ADJSRC,
                                                             int myrank) {
-  
+
   int irec = blockIdx.x + gridDim.x*blockIdx.y;
-  
+
   //float kappal;
   int i,j,k,iglob,ispec;
-  
+
   // because of grid shape, irec can be too big
-  if(irec<nrec) { 
-    
+  if(irec<nrec) {
+
     // adds source only if this proc carries the sources
     if( myrank == islice_selected_rec[irec] ){
-      
+
       // adds acoustic source
-      ispec = ispec_selected_rec[irec]-1;      
-      if( ispec_is_acoustic[ispec] == 1 ){
-        
+      ispec = ispec_selected_rec[irec]-1;
+      if( ispec_is_acoustic[ispec] ){
+
         // checks if element is in phase_is_inner run
         if(ispec_is_inner[ispec] == phase_is_inner) {
           i = threadIdx.x;
           j = threadIdx.y;
           k = threadIdx.z;
           iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-          
+
           //kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
-          
+
           //potential_dot_dot_acoustic[iglob] += adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
           //                                            pre_computed_irec_local_index[irec],
           //                                            pre_computed_index,
           //                                            0,
-          //                                            i,j,k)]/kappal;            
+          //                                            i,j,k)]/kappal;
 
-          // beware, for acoustic medium, a pressure source would be taking the negative 
+          // beware, for acoustic medium, a pressure source would be taking the negative
           // and divide by Kappa of the fluid;
           // this would have to be done when constructing the adjoint source.
           //
           // note: we take the first component of the adj_sourcearrays
           //          the idea is to have e.g. a pressure source, where all 3 components would be the same
-          
+
           atomicAdd(&potential_dot_dot_acoustic[iglob],
                     +adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
                                              pre_computed_irec_local_index[irec],pre_computed_index-1,
-                                             0,i,j,k)] // / kappal 
+                                             0,i,j,k)] // / kappal
                                              );
-        }    
-      }  
+        }
+      }
     }
   }
 }
@@ -716,23 +778,23 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 
-extern "C" 
-void FC_FUNC_(add_sources_acoustic_sim_type_2_or_3_cuda,
-              ADD_SOURCES_ACOUSTIC_SIM_TYPE_2_OR_3_CUDA)(long* Mesh_pointer, 
+extern "C"
+void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda,
+              ADD_SOURCES_AC_SIM_2_OR_3_CUDA)(long* Mesh_pointer,
                                                          float* h_adj_sourcearrays,
-                                                         int* size_adj_sourcearrays, 
+                                                         int* size_adj_sourcearrays,
                                                          int* phase_is_inner,
-                                                         int* myrank, 
-                                                         int* nrec, 
+                                                         int* myrank,
+                                                         int* nrec,
                                                          int* pre_computed_index,
                                                          int* h_islice_selected_rec,
                                                          int* nadj_rec_local,
                                                          int* NTSTEP_BETWEEN_ADJSRC) {
-  
-TRACE("add_sources_acoustic_sim_type_2_or_3_cuda");
-  
+
+TRACE("add_sources_ac_sim_2_or_3_cuda");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-  
+
   // make sure grid dimension is less than 65535 in x dimension
   int num_blocks_x = *nrec;
   int num_blocks_y = 1;
@@ -742,23 +804,23 @@
   }
   dim3 grid(num_blocks_x,num_blocks_y,1);
   dim3 threads(5,5,5);
-  
+
   // copies source arrays
-  // daniel: todo workaround -- adj_sourcearrays can be very big, but here only at 
+  // daniel: todo workaround -- adj_sourcearrays can be very big, but here only at
   //             specific time it (pre_computed_irec_local_index) is actually needed...
   float* d_adj_sourcearrays;
   print_CUDA_error_if_any(cudaMalloc((void**)&d_adj_sourcearrays,
                                      (*size_adj_sourcearrays)*sizeof(float)),731);
   print_CUDA_error_if_any(cudaMemcpy(d_adj_sourcearrays, h_adj_sourcearrays,
                                      (*size_adj_sourcearrays)*sizeof(float),cudaMemcpyHostToDevice),732);
-  
+
   //int* h_pre_computed_irec_local_index = new int[*nadj_rec_local];
   int* h_pre_computed_irec_local_index = (int*) calloc(*nrec,sizeof(int));
-  
+
   int* d_pre_computed_irec_local_index;
   print_CUDA_error_if_any(cudaMalloc((void**)&d_pre_computed_irec_local_index,
                                      (*nrec)*sizeof(int)),741);
-  
+
   // the irec_local variable needs to be precomputed (as
   // h_pre_comp..), because normally it is in the loop updating accel,
   // and due to how it's incremented, it cannot be parallized
@@ -766,39 +828,39 @@
   for(int irec=0;irec<*nrec;irec++) {
     if(*myrank == h_islice_selected_rec[irec]) {
       h_pre_computed_irec_local_index[irec] = irec_local;
-      irec_local++;      
+      irec_local++;
     }
     else h_pre_computed_irec_local_index[irec] = 0;
   }
-  
+
   //daniel
   //printf("irec local: rank=%d irec_local=%d nadj_rec_local=%d nrec=%d \n",*myrank,irec_local,*nadj_rec_local,*nrec);
-  
+
   print_CUDA_error_if_any(cudaMemcpy(d_pre_computed_irec_local_index,h_pre_computed_irec_local_index,
                                      (*nrec)*sizeof(int),cudaMemcpyHostToDevice),742);
-  
-  add_sources_acoustic_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic, 
-                                                                *nrec, 
+
+  add_sources_acoustic_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+                                                                *nrec,
                                                                 *pre_computed_index,
-                                                                d_adj_sourcearrays, 
+                                                                d_adj_sourcearrays,
                                                                 mp->d_ibool,
                                                                 mp->d_ispec_is_inner,
                                                                 mp->d_ispec_is_acoustic,
                                                                 mp->d_kappastore,
                                                                 mp->d_ispec_selected_rec,
-                                                                *phase_is_inner,                                                       
+                                                                *phase_is_inner,
                                                                 mp->d_islice_selected_rec,
                                                                 d_pre_computed_irec_local_index,
                                                                 *nadj_rec_local,
                                                                 *NTSTEP_BETWEEN_ADJSRC,
                                                                 *myrank);
-  
+
   //delete h_pre_computed_irec_local_index;
   free(h_pre_computed_irec_local_index);
   cudaFree(d_adj_sourcearrays);
-  cudaFree(d_pre_computed_irec_local_index);  
-  
+  cudaFree(d_pre_computed_irec_local_index);
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("add_sources_acoustic_SIM_TYPE_2_OR_3_kernel");  
-#endif    
+  exit_on_cuda_error("add_sources_acoustic_SIM_TYPE_2_OR_3_kernel");
+#endif
 }

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -44,46 +44,46 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void compute_coupling_acoustic_el_kernel(float* displ, 
-                                                    float* potential_dot_dot_acoustic, 
+__global__ void compute_coupling_acoustic_el_kernel(float* displ,
+                                                    float* potential_dot_dot_acoustic,
                                                     int num_coupling_ac_el_faces,
                                                     int* coupling_ac_el_ispec,
-                                                    int* coupling_ac_el_ijk, 
+                                                    int* coupling_ac_el_ijk,
                                                     float* coupling_ac_el_normal,
                                                     float* coupling_ac_el_jacobian2Dw,
                                                     int* ibool,
-                                                    int* ispec_is_inner, 
+                                                    int* ispec_is_inner,
                                                     int phase_is_inner) {
-  
-  int igll = threadIdx.x; 
-  int iface = blockIdx.x + gridDim.x*blockIdx.y; 
-  
+
+  int igll = threadIdx.x;
+  int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
   int i,j,k,iglob,ispec;
   realw displ_x,displ_y,displ_z,displ_n;
   realw nx,ny,nz;
   realw jacobianw;
-  
+
   if( iface < num_coupling_ac_el_faces){
-  
-    // don't compute points outside NGLLSQUARE==NGLL2==25  
+
+    // don't compute points outside NGLLSQUARE==NGLL2==25
     // way 2: no further check needed since blocksize = 25
-    //  if(igll<NGLL2) {    
-    
+    //  if(igll<NGLL2) {
+
     // "-1" from index values to convert from Fortran-> C indexing
     ispec = coupling_ac_el_ispec[iface]-1;
-    
+
     if(ispec_is_inner[ispec] == phase_is_inner ) {
-      
+
       i = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1;
       j = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
       k = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
       iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-  1;
-      
+
       // elastic displacement on global point
       displ_x = displ[iglob*3] ; // (1,iglob)
       displ_y = displ[iglob*3+1] ; // (2,iglob)
       displ_z = displ[iglob*3+2] ; // (3,iglob)
-      
+
       // gets associated normal on GLL point
       nx = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; // (1,igll,iface)
       ny = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; // (2,igll,iface)
@@ -92,17 +92,17 @@
       // calculates displacement component along normal
       // (normal points outwards of acoustic element)
       displ_n = displ_x*nx + displ_y*ny + displ_z*nz;
-      
-      
-      // gets associated, weighted jacobian      
-      jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];            
-      
+
+
+      // gets associated, weighted jacobian
+      jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
       //daniel
       //if( igll == 0 ) printf("gpu: %i %i %i %i %i %e \n",i,j,k,ispec,iglob,jacobianw);
 
 
       // continuity of pressure and normal displacement on global point
-      
+
       // note: newark time scheme together with definition of scalar potential:
       //          pressure = - chi_dot_dot
       //          requires that this coupling term uses the updated displacement at time step [t+delta_t],
@@ -111,73 +111,73 @@
       //          it also means you have to calculate and update this here first before
       //          calculating the coupling on the elastic side for the acceleration...
       atomicAdd(&potential_dot_dot_acoustic[iglob],+ jacobianw*displ_n);
-            
+
     }
-  //  }  
+  //  }
   }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(compute_coupling_acoustic_el_cuda,
-              COMPUTE_COUPLING_ACOUSTIC_EL_CUDA)(
-                                            long* Mesh_pointer_f, 
-                                            int* phase_is_innerf, 
-                                            int* num_coupling_ac_el_facesf, 
+extern "C"
+void FC_FUNC_(compute_coupling_ac_el_cuda,
+              COMPUTE_COUPLING_AC_EL_CUDA)(
+                                            long* Mesh_pointer_f,
+                                            int* phase_is_innerf,
+                                            int* num_coupling_ac_el_facesf,
                                             int* SIMULATION_TYPEf) {
-  TRACE("compute_coupling_acoustic_el_cuda");
+  TRACE("compute_coupling_ac_el_cuda");
   //double start_time = get_time();
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
   int phase_is_inner            = *phase_is_innerf;
   int num_coupling_ac_el_faces  = *num_coupling_ac_el_facesf;
   int SIMULATION_TYPE           = *SIMULATION_TYPEf;
-  
+
   // way 1: exact blocksize to match NGLLSQUARE
-  int blocksize = 25; 
-  
+  int blocksize = 25;
+
   int num_blocks_x = num_coupling_ac_el_faces;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
 
 //daniel
 // printf("gpu: %i %i %i \n",num_coupling_ac_el_faces,SIMULATION_TYPE,phase_is_inner);
 
-    
+
   compute_coupling_acoustic_el_kernel<<<grid,threads>>>(mp->d_displ,
                                                        mp->d_potential_dot_dot_acoustic,
                                                        num_coupling_ac_el_faces,
-                                                       mp->d_coupling_ac_el_ispec, 
-                                                       mp->d_coupling_ac_el_ijk, 
+                                                       mp->d_coupling_ac_el_ispec,
+                                                       mp->d_coupling_ac_el_ijk,
                                                        mp->d_coupling_ac_el_normal,
-                                                       mp->d_coupling_ac_el_jacobian2Dw, 
-                                                       mp->d_ibool, 
-                                                       mp->d_ispec_is_inner, 
+                                                       mp->d_coupling_ac_el_jacobian2Dw,
+                                                       mp->d_ibool,
+                                                       mp->d_ispec_is_inner,
                                                        phase_is_inner);
-  
+
   //  adjoint simulations
-  if (SIMULATION_TYPE == 3 ){  
+  if (SIMULATION_TYPE == 3 ){
     compute_coupling_acoustic_el_kernel<<<grid,threads>>>(mp->d_b_displ,
                                                           mp->d_b_potential_dot_dot_acoustic,
                                                           num_coupling_ac_el_faces,
-                                                          mp->d_coupling_ac_el_ispec, 
-                                                          mp->d_coupling_ac_el_ijk, 
+                                                          mp->d_coupling_ac_el_ispec,
+                                                          mp->d_coupling_ac_el_ijk,
                                                           mp->d_coupling_ac_el_normal,
-                                                          mp->d_coupling_ac_el_jacobian2Dw, 
-                                                          mp->d_ibool, 
-                                                          mp->d_ispec_is_inner, 
+                                                          mp->d_coupling_ac_el_jacobian2Dw,
+                                                          mp->d_ibool,
+                                                          mp->d_ispec_is_inner,
                                                           phase_is_inner);
-    
+
   }
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING  
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
   exit_on_cuda_error("compute_coupling_acoustic_el_kernel");
@@ -191,56 +191,56 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void compute_coupling_elastic_ac_kernel(float* potential_dot_dot_acoustic, 
-                                                    float* accel, 
+__global__ void compute_coupling_elastic_ac_kernel(float* potential_dot_dot_acoustic,
+                                                    float* accel,
                                                     int num_coupling_ac_el_faces,
                                                     int* coupling_ac_el_ispec,
-                                                    int* coupling_ac_el_ijk, 
+                                                    int* coupling_ac_el_ijk,
                                                     float* coupling_ac_el_normal,
                                                     float* coupling_ac_el_jacobian2Dw,
                                                     int* ibool,
-                                                    int* ispec_is_inner, 
+                                                    int* ispec_is_inner,
                                                     int phase_is_inner) {
-  
-  int igll = threadIdx.x; 
-  int iface = blockIdx.x + gridDim.x*blockIdx.y; 
-  
+
+  int igll = threadIdx.x;
+  int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
   int i,j,k,iglob,ispec;
   realw pressure;
   realw nx,ny,nz;
   realw jacobianw;
-  
+
   if( iface < num_coupling_ac_el_faces){
-    
-    // don't compute points outside NGLLSQUARE==NGLL2==25  
+
+    // don't compute points outside NGLLSQUARE==NGLL2==25
     // way 2: no further check needed since blocksize = 25
-    //  if(igll<NGLL2) {    
-    
+    //  if(igll<NGLL2) {
+
     // "-1" from index values to convert from Fortran-> C indexing
     ispec = coupling_ac_el_ispec[iface]-1;
-    
+
     if(ispec_is_inner[ispec] == phase_is_inner ) {
-      
+
       i = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1;
       j = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
       k = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
       iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-  1;
-      
+
       // acoustic pressure on global point
       pressure = - potential_dot_dot_acoustic[iglob];
-            
+
       // gets associated normal on GLL point
       nx = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; // (1,igll,iface)
       ny = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; // (2,igll,iface)
       nz = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,2,igll,iface)]; // (3,igll,iface)
-      
-      // gets associated, weighted jacobian      
-      jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];            
-      
+
+      // gets associated, weighted jacobian
+      jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
       //daniel
       //if( igll == 0 ) printf("gpu: %i %i %i %i %i %e \n",i,j,k,ispec,iglob,jacobianw);
-      
-      
+
+
       // continuity of displacement and pressure on global point
       //
       // note: newark time scheme together with definition of scalar potential:
@@ -254,73 +254,73 @@
       atomicAdd(&accel[iglob*3+1],+ jacobianw*ny*pressure);
       atomicAdd(&accel[iglob*3+2],+ jacobianw*nz*pressure);
     }
-    //  }  
+    //  }
   }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(compute_coupling_elastic_ac_cuda,
-              COMPUTE_COUPLING_ELASTIC_AC_CUDA)(
-                                                 long* Mesh_pointer_f, 
-                                                 int* phase_is_innerf, 
-                                                 int* num_coupling_ac_el_facesf, 
+extern "C"
+void FC_FUNC_(compute_coupling_el_ac_cuda,
+              COMPUTE_COUPLING_EL_AC_CUDA)(
+                                                 long* Mesh_pointer_f,
+                                                 int* phase_is_innerf,
+                                                 int* num_coupling_ac_el_facesf,
                                                  int* SIMULATION_TYPEf) {
-  TRACE("compute_coupling_elastic_ac_cuda");
+  TRACE("compute_coupling_el_ac_cuda");
   //double start_time = get_time();
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
   int phase_is_inner            = *phase_is_innerf;
   int num_coupling_ac_el_faces  = *num_coupling_ac_el_facesf;
   int SIMULATION_TYPE           = *SIMULATION_TYPEf;
-  
+
   // way 1: exact blocksize to match NGLLSQUARE
-  int blocksize = 25; 
-  
+  int blocksize = 25;
+
   int num_blocks_x = num_coupling_ac_el_faces;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-  
+
   //daniel
   // printf("gpu: %i %i %i \n",num_coupling_ac_el_faces,SIMULATION_TYPE,phase_is_inner);
-  
-  
+
+
   compute_coupling_elastic_ac_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
                                                        mp->d_accel,
                                                        num_coupling_ac_el_faces,
-                                                       mp->d_coupling_ac_el_ispec, 
-                                                       mp->d_coupling_ac_el_ijk, 
+                                                       mp->d_coupling_ac_el_ispec,
+                                                       mp->d_coupling_ac_el_ijk,
                                                        mp->d_coupling_ac_el_normal,
-                                                       mp->d_coupling_ac_el_jacobian2Dw, 
-                                                       mp->d_ibool, 
-                                                       mp->d_ispec_is_inner, 
+                                                       mp->d_coupling_ac_el_jacobian2Dw,
+                                                       mp->d_ibool,
+                                                       mp->d_ispec_is_inner,
                                                        phase_is_inner);
-  
+
   //  adjoint simulations
-  if (SIMULATION_TYPE == 3 ){  
+  if (SIMULATION_TYPE == 3 ){
     compute_coupling_elastic_ac_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
                                                          mp->d_b_accel,
                                                          num_coupling_ac_el_faces,
-                                                         mp->d_coupling_ac_el_ispec, 
-                                                         mp->d_coupling_ac_el_ijk, 
+                                                         mp->d_coupling_ac_el_ispec,
+                                                         mp->d_coupling_ac_el_ijk,
                                                          mp->d_coupling_ac_el_normal,
-                                                         mp->d_coupling_ac_el_jacobian2Dw, 
-                                                         mp->d_ibool, 
-                                                         mp->d_ispec_is_inner, 
+                                                         mp->d_coupling_ac_el_jacobian2Dw,
+                                                         mp->d_ibool,
+                                                         mp->d_ispec_is_inner,
                                                          phase_is_inner);
-    
+
   }
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING  
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
-  exit_on_cuda_error("compute_coupling_elastic_ac_cuda");
+  exit_on_cuda_error("compute_coupling_el_ac_cuda");
 #endif
 }

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -42,22 +42,22 @@
 
 // prepares a device array with with all inter-element edge-nodes -- this
 // is followed by a memcpy and MPI operations
-__global__ void prepare_boundary_potential_on_device(float* d_potential_dot_dot_acoustic, 
+__global__ void prepare_boundary_potential_on_device(float* d_potential_dot_dot_acoustic,
                                                      float* d_send_potential_dot_dot_buffer,
-                                                     int num_interfaces_ext_mesh, 
+                                                     int num_interfaces_ext_mesh,
                                                      int max_nibool_interfaces_ext_mesh,
                                                      int* d_nibool_interfaces_ext_mesh,
                                                      int* d_ibool_interfaces_ext_mesh) {
 
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-  int iinterface=0;  
-  
+  int iinterface=0;
+
   for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
     if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
       d_send_potential_dot_dot_buffer[(id + max_nibool_interfaces_ext_mesh*iinterface)] =
         d_potential_dot_dot_acoustic[(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
     }
-  }  
+  }
 
 }
 
@@ -66,22 +66,24 @@
 
 // prepares and transfers the inter-element edge-nodes to the host to be MPI'd
 extern "C"
-void FC_FUNC_(transfer_boundary_potential_from_device,
-              TRANSFER_BOUNDARY_POTENTIAL_FROM_DEVICE)(
-                                              int* size, 
-                                              long* Mesh_pointer_f, 
-                                              float* potential_dot_dot_acoustic, 
+void FC_FUNC_(transfer_boun_pot_from_device,
+              TRANSFER_BOUN_POT_FROM_DEVICE)(
+                                              int* size,
+                                              long* Mesh_pointer_f,
+                                              float* potential_dot_dot_acoustic,
                                               float* send_potential_dot_dot_buffer,
-                                              int* num_interfaces_ext_mesh, 
+                                              int* num_interfaces_ext_mesh,
                                               int* max_nibool_interfaces_ext_mesh,
-                                              int* nibool_interfaces_ext_mesh, 
+                                              int* nibool_interfaces_ext_mesh,
                                               int* ibool_interfaces_ext_mesh,
                                               int* FORWARD_OR_ADJOINT){
 
-TRACE("transfer_boundary_potential_from_device");
+TRACE("transfer_boun_pot_from_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
+  if( *num_interfaces_ext_mesh == 0 ) return;
+
   int blocksize = 256;
   int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
   int num_blocks_x = size_padded/blocksize;
@@ -90,11 +92,11 @@
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-    
-  if(*FORWARD_OR_ADJOINT == 1) {  
+
+  if(*FORWARD_OR_ADJOINT == 1) {
     prepare_boundary_potential_on_device<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
                                                          mp->d_send_potential_dot_dot_buffer,
                                                          *num_interfaces_ext_mesh,
@@ -108,17 +110,12 @@
                                                            *num_interfaces_ext_mesh,
                                                            *max_nibool_interfaces_ext_mesh,
                                                            mp->d_nibool_interfaces_ext_mesh,
-                                                           mp->d_ibool_interfaces_ext_mesh);    
+                                                           mp->d_ibool_interfaces_ext_mesh);
   }
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("prepare_boundary_kernel");
-#endif
 
-  
   cudaMemcpy(send_potential_dot_dot_buffer,mp->d_send_potential_dot_dot_buffer,
-	      *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real),cudaMemcpyDeviceToHost);
-  
+        *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(realw),cudaMemcpyDeviceToHost);
+
   // finish timing of kernel+memcpy
   // cudaEventRecord( stop, 0 );
   // cudaEventSynchronize( stop );
@@ -126,22 +123,24 @@
   // cudaEventDestroy( start );
   // cudaEventDestroy( stop );
   // printf("boundary xfer d->h Time: %f ms\n",time);
-  
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("prepare_boundary_kernel");
+#endif
 }
 
 
 /* ----------------------------------------------------------------------------------------------- */
 
 
-__global__ void assemble_boundary_potential_on_device(float* d_potential_dot_dot_acoustic, 
+__global__ void assemble_boundary_potential_on_device(float* d_potential_dot_dot_acoustic,
                                                       float* d_send_potential_dot_dot_buffer,
-                                                      int num_interfaces_ext_mesh, 
+                                                      int num_interfaces_ext_mesh,
                                                       int max_nibool_interfaces_ext_mesh,
                                                       int* d_nibool_interfaces_ext_mesh,
                                                       int* d_ibool_interfaces_ext_mesh) {
 
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-  int iinterface=0;  
+  int iinterface=0;
 
   for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
     if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
@@ -149,7 +148,7 @@
       // for testing atomic operations against not atomic operations (0.1ms vs. 0.04 ms)
       // d_potential_dot_dot_acoustic[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)] +=
       // d_send_potential_dot_dot_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)];
-            
+
       atomicAdd(&d_potential_dot_dot_acoustic[(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)],
                 d_send_potential_dot_dot_buffer[(id + max_nibool_interfaces_ext_mesh*iinterface)]);
     }
@@ -166,34 +165,34 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C"  
-void FC_FUNC_(transfer_and_assemble_potential_to_device,
-              TRANSFER_AND_ASSEMBLE_POTENTIAL_TO_DEVICE)(
-                                                long* Mesh_pointer, 
-                                                real* potential_dot_dot_acoustic, 
-                                                real* buffer_recv_scalar_ext_mesh,
-                                                int* num_interfaces_ext_mesh, 
+extern "C"
+void FC_FUNC_(transfer_asmbl_pot_to_device,
+              TRANSFER_ASMBL_POT_TO_DEVICE)(
+                                                long* Mesh_pointer,
+                                                realw* potential_dot_dot_acoustic,
+                                                realw* buffer_recv_scalar_ext_mesh,
+                                                int* num_interfaces_ext_mesh,
                                                 int* max_nibool_interfaces_ext_mesh,
-                                                int* nibool_interfaces_ext_mesh, 
+                                                int* nibool_interfaces_ext_mesh,
                                                 int* ibool_interfaces_ext_mesh,
                                                 int* FORWARD_OR_ADJOINT) {
 
-TRACE("transfer_and_assemble_potential_to_device");
+TRACE("transfer_asmbl_pot_to_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
   //double start_time = get_time();
-  // cudaEvent_t start, stop; 
-  // float time; 
-  // cudaEventCreate(&start); 
-  // cudaEventCreate(&stop); 
+  // cudaEvent_t start, stop;
+  // float time;
+  // cudaEventCreate(&start);
+  // cudaEventCreate(&stop);
   // cudaEventRecord( start, 0 );
 
-  // copies buffer onto GPU  
-  cudaMemcpy(mp->d_send_potential_dot_dot_buffer, buffer_recv_scalar_ext_mesh, 
-             *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real), cudaMemcpyHostToDevice);
+  // copies buffer onto GPU
+  cudaMemcpy(mp->d_send_potential_dot_dot_buffer, buffer_recv_scalar_ext_mesh,
+             *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(realw), cudaMemcpyHostToDevice);
 
   // assembles on GPU
-  int blocksize = 256;  
+  int blocksize = 256;
   int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
   int num_blocks_x = size_padded/blocksize;
   int num_blocks_y = 1;
@@ -205,25 +204,25 @@
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
 
-  if(*FORWARD_OR_ADJOINT == 1) { 
+  if(*FORWARD_OR_ADJOINT == 1) {
     //assemble forward field
-    assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic, 
+    assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
                                                           mp->d_send_potential_dot_dot_buffer,
                                                           *num_interfaces_ext_mesh,
                                                           *max_nibool_interfaces_ext_mesh,
                                                           mp->d_nibool_interfaces_ext_mesh,
                                                           mp->d_ibool_interfaces_ext_mesh);
   }
-  else if(*FORWARD_OR_ADJOINT == 3) { 
+  else if(*FORWARD_OR_ADJOINT == 3) {
     //assemble reconstructed/backward field
-    assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic, 
+    assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
                                                             mp->d_send_potential_dot_dot_buffer,
                                                             *num_interfaces_ext_mesh,
                                                             *max_nibool_interfaces_ext_mesh,
                                                             mp->d_nibool_interfaces_ext_mesh,
-                                                            mp->d_ibool_interfaces_ext_mesh);    
+                                                            mp->d_ibool_interfaces_ext_mesh);
   }
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   // cudaEventRecord( stop, 0 );
   // cudaEventSynchronize( stop );
@@ -233,8 +232,8 @@
   // printf("Boundary Assemble Kernel Execution Time: %f ms\n",time);
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
-  exit_on_cuda_error("transfer_and_assemble_potential_to_device");
-#endif  
+  exit_on_cuda_error("transfer_asmbl_pot_to_device");
+#endif
 }
 
 
@@ -242,14 +241,14 @@
 
 void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, int SIMULATION_TYPE);
 
-__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_acoustic, 
+__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_acoustic,
                                        int num_phase_ispec_acoustic, int d_iphase,
-                                       float* d_potential_acoustic, float* d_potential_dot_dot_acoustic, 
-                                       float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz, 
-                                       float* d_gammax, float* d_gammay, float* d_gammaz, 
-                                       float* hprime_xx, float* hprime_yy, float* hprime_zz, 
-                                       float* hprimewgll_xx, float* hprimewgll_yy, float* hprimewgll_zz, 
-                                       float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,                                       
+                                       float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
+                                       float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
+                                       float* d_gammax, float* d_gammay, float* d_gammaz,
+                                       float* hprime_xx, float* hprime_yy, float* hprime_zz,
+                                       float* hprimewgll_xx, float* hprimewgll_yy, float* hprimewgll_zz,
+                                       float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
                                        float* d_rhostore);
 
 
@@ -269,29 +268,31 @@
 
 TRACE("compute_forces_acoustic_cuda");
   //double start_time = get_time();
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
 
   int num_elements;
-  
+
   if( *iphase == 1 )
     num_elements = *nspec_outer_acoustic;
   else
-    num_elements = *nspec_inner_acoustic;  
+    num_elements = *nspec_inner_acoustic;
 
-  //int myrank;  
+  if( num_elements == 0 ) return;
+
+  //int myrank;
   /* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
   /* if(myrank==0) { */
-  
-  Kernel_2_acoustic(num_elements, mp, *iphase, *SIMULATION_TYPE);  
-  
-  cudaThreadSynchronize();
 
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  Kernel_2_acoustic(num_elements, mp, *iphase, *SIMULATION_TYPE);
+
+  //cudaThreadSynchronize();
+
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   /* MPI_Barrier(MPI_COMM_WORLD); */
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
-#endif
+//#endif
 }
 
 
@@ -303,14 +304,14 @@
 
 void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, int SIMULATION_TYPE)
 {
-    
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("before acoustic kernel Kernel 2");
 #endif
-    
+
     /* if the grid can handle the number of blocks, we let it be 1D */
     /* grid_2_x = nb_elem_color; */
-    /* nb_elem_color is just how many blocks we are computing now */    
+    /* nb_elem_color is just how many blocks we are computing now */
 
     int num_blocks_x = nb_blocks_to_compute;
     int num_blocks_y = 1;
@@ -318,49 +319,53 @@
       num_blocks_x = ceil(num_blocks_x/2.0);
       num_blocks_y = num_blocks_y*2;
     }
-    
+
     int threads_2 = 128;//BLOCK_SIZE_K2;
-    dim3 grid_2(num_blocks_x,num_blocks_y);    
+    dim3 grid_2(num_blocks_x,num_blocks_y);
 
-    
+
     // Cuda timing
-    // cudaEvent_t start, stop; 
-    // float time; 
-    // cudaEventCreate(&start); 
-    // cudaEventCreate(&stop); 
+    // cudaEvent_t start, stop;
+    // float time;
+    // cudaEventCreate(&start);
+    // cudaEventCreate(&stop);
     // cudaEventRecord( start, 0 );
-    
-    Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
-    						 mp->d_phase_ispec_inner_acoustic, mp->num_phase_ispec_acoustic, d_iphase,
-    						 mp->d_potential_acoustic, mp->d_potential_dot_dot_acoustic,
-    						 mp->d_xix, mp->d_xiy, mp->d_xiz,
-    						 mp->d_etax, mp->d_etay, mp->d_etaz,
-    						 mp->d_gammax, mp->d_gammay, mp->d_gammaz,
-                 mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz, 
-                 mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz, 
-                 mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,                 
-    						 mp->d_rhostore);
 
+    Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
+                                                           mp->NGLOB_AB, mp->d_ibool,
+                                                           mp->d_phase_ispec_inner_acoustic,
+                                                           mp->num_phase_ispec_acoustic, d_iphase,
+                                                           mp->d_potential_acoustic, mp->d_potential_dot_dot_acoustic,
+                                                           mp->d_xix, mp->d_xiy, mp->d_xiz,
+                                                           mp->d_etax, mp->d_etay, mp->d_etaz,
+                                                           mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+                                                           mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
+                                                           mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
+                                                           mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+                                                           mp->d_rhostore);
+
     if(SIMULATION_TYPE == 3) {
-      Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
-                  mp->d_phase_ispec_inner_acoustic,mp->num_phase_ispec_acoustic, d_iphase,
-                  mp->d_b_potential_acoustic, mp->d_b_potential_dot_dot_acoustic,
-                  mp->d_xix, mp->d_xiy, mp->d_xiz,
-                  mp->d_etax, mp->d_etay, mp->d_etaz,
-                  mp->d_gammax, mp->d_gammay, mp->d_gammaz,
-                  mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz, 
-                  mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
-                  mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,                 
-                  mp->d_rhostore);
+      Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
+                                                            mp->NGLOB_AB, mp->d_ibool,
+                                                            mp->d_phase_ispec_inner_acoustic,
+                                                            mp->num_phase_ispec_acoustic, d_iphase,
+                                                            mp->d_b_potential_acoustic, mp->d_b_potential_dot_dot_acoustic,
+                                                            mp->d_xix, mp->d_xiy, mp->d_xiz,
+                                                            mp->d_etax, mp->d_etay, mp->d_etaz,
+                                                            mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+                                                            mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
+                                                            mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
+                                                            mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+                                                            mp->d_rhostore);
     }
-    
+
     // cudaEventRecord( stop, 0 );
     // cudaEventSynchronize( stop );
     // cudaEventElapsedTime( &time, start, stop );
     // cudaEventDestroy( start );
     // cudaEventDestroy( stop );
     // printf("Kernel2 Execution Time: %f ms\n",time);
-        
+
     /* cudaThreadSynchronize(); */
     /* TRACE("Kernel 2 finished"); */
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -377,26 +382,26 @@
 
 
 __global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
-                                      int* d_phase_ispec_inner_acoustic, 
+                                      int* d_phase_ispec_inner_acoustic,
                                       int num_phase_ispec_acoustic, int d_iphase,
-                                      float* d_potential_acoustic, float* d_potential_dot_dot_acoustic, 
-                                      float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz, 
-                                      float* d_gammax, float* d_gammay, float* d_gammaz, 
-                                      float* hprime_xx, float* hprime_yy, float* hprime_zz, 
+                                      float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
+                                      float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
+                                      float* d_gammax, float* d_gammay, float* d_gammaz,
+                                      float* hprime_xx, float* hprime_yy, float* hprime_zz,
                                       float* hprimewgll_xx, float* hprimewgll_yy, float* hprimewgll_zz,
                                       float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
                                       float* d_rhostore){
-    
+
   int bx = blockIdx.y*gridDim.x+blockIdx.x;
   int tx = threadIdx.x;
 
   const int NGLL3 = 125;
   const int NGLL3_ALIGN = 128;
-    
+
   int K = (tx/NGLL2);
   int J = ((tx-K*NGLL2)/NGLLX);
   int I = (tx-K*NGLL2-J*NGLLX);
-    
+
   int active,offset,offset1,offset2,offset3;
   int iglob = 0;
   int working_element;
@@ -409,33 +414,33 @@
     int l;
     float hp1,hp2,hp3;
 #endif
-    
+
     __shared__ reald s_dummy_loc[NGLL3];
-    
+
     __shared__ reald s_temp1[NGLL3];
     __shared__ reald s_temp2[NGLL3];
     __shared__ reald s_temp3[NGLL3];
-    
+
 // use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
 // because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
     active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
-    
+
 // copy from global memory to shared memory
 // each thread writes one of the NGLL^3 = 125 data points
     if (active) {
       // iphase-1 and working_element-1 for Fortran->C array conventions
       working_element = d_phase_ispec_inner_acoustic[bx + num_phase_ispec_acoustic*(d_iphase-1)]-1;
-      // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;                 
+      // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
       iglob = d_ibool[working_element*125 + tx]-1;
-      
+
 #ifdef USE_TEXTURES
         s_dummy_loc[tx] = tex1Dfetch(tex_potential_acoustic, iglob);
 #else
-	// changing iglob indexing to match fortran row changes fast style
+  // changing iglob indexing to match fortran row changes fast style
         s_dummy_loc[tx] = d_potential_acoustic[iglob];
 #endif
-    }  
-  
+    }
+
 // synchronize all the threads (one thread for each of the NGLL grid points of the
 // current spectral element) because we need the whole element to be ready in order
 // to be able to compute the matrix products along cut planes of the 3D element below
@@ -446,10 +451,10 @@
     if (active) {
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-//      if(iglob == 0 )printf("kernel 2: iglob %i  hprime_xx %f %f %f \n",iglob,hprime_xx[0],hprime_xx[1],hprime_xx[2]);  
+//      if(iglob == 0 )printf("kernel 2: iglob %i  hprime_xx %f %f %f \n",iglob,hprime_xx[0],hprime_xx[1],hprime_xx[2]);
 #endif
-      
 
+
 #ifndef MANUALLY_UNROLLED_LOOPS
 
         temp1l = 0.f;
@@ -461,7 +466,7 @@
             hp1 = hprime_xx[l*NGLLX+I];
             offset1 = K*NGLL2+J*NGLLX+l;
             temp1l += s_dummy_loc[offset1]*hp1;
-	    
+
             // daniel: not more assumes that hprime_xx = hprime_yy = hprime_zz
             hp2 = hprime_yy[l*NGLLX+J];
             offset2 = K*NGLL2+l*NGLLX+I;
@@ -477,8 +482,8 @@
                 + s_dummy_loc[K*NGLL2+J*NGLLX+1]*hprime_xx[NGLLX+I]
                 + s_dummy_loc[K*NGLL2+J*NGLLX+2]*hprime_xx[2*NGLLX+I]
                 + s_dummy_loc[K*NGLL2+J*NGLLX+3]*hprime_xx[3*NGLLX+I]
-                + s_dummy_loc[K*NGLL2+J*NGLLX+4]*hprime_xx[4*NGLLX+I];	    
-  
+                + s_dummy_loc[K*NGLL2+J*NGLLX+4]*hprime_xx[4*NGLLX+I];
+
         temp2l = s_dummy_loc[K*NGLL2+I]*hprime_yy[J]
                 + s_dummy_loc[K*NGLL2+NGLLX+I]*hprime_yy[NGLLX+J]
                 + s_dummy_loc[K*NGLL2+2*NGLLX+I]*hprime_yy[2*NGLLX+J]
@@ -511,15 +516,15 @@
                           +xizl*(etaxl*gammayl-etayl*gammaxl));
 
         // derivatives of potential
-        dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l;	
+        dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l;
         dpotentialdyl = xiyl*temp1l + etayl*temp2l + gammayl*temp3l;
         dpotentialdzl = xizl*temp1l + etazl*temp2l + gammazl*temp3l;
-	
+
         // density (reciproc)
         rho_invl = 1.f / d_rhostore[offset];
 
         // form the dot product with the test vector
-        s_temp1[tx] = jacobianl * rho_invl * (dpotentialdxl*xixl + dpotentialdyl*xiyl + dpotentialdzl*xizl);	
+        s_temp1[tx] = jacobianl * rho_invl * (dpotentialdxl*xixl + dpotentialdyl*xiyl + dpotentialdzl*xizl);
         s_temp2[tx] = jacobianl * rho_invl * (dpotentialdxl*etaxl + dpotentialdyl*etayl + dpotentialdzl*etazl);
         s_temp3[tx] = jacobianl * rho_invl * (dpotentialdxl*gammaxl + dpotentialdyl*gammayl + dpotentialdzl*gammazl);
     }
@@ -541,7 +546,7 @@
             fac1 = hprimewgll_xx[I*NGLLX+l];
             offset1 = K*NGLL2+J*NGLLX+l;
             temp1l += s_temp1[offset1]*fac1;
-          
+
             //daniel: not more assumes hprimewgll_xx = hprimewgll_yy = hprimewgll_zz
             fac2 = hprimewgll_yy[J*NGLLX+l];
             offset2 = K*NGLL2+l*NGLLX+I;
@@ -558,8 +563,8 @@
                 + s_temp1[K*NGLL2+J*NGLLX+2]*hprimewgll_xx[I*NGLLX+2]
                 + s_temp1[K*NGLL2+J*NGLLX+3]*hprimewgll_xx[I*NGLLX+3]
                 + s_temp1[K*NGLL2+J*NGLLX+4]*hprimewgll_xx[I*NGLLX+4];
-  
 
+
         temp2l = s_temp2[K*NGLL2+I]*hprimewgll_yy[J*NGLLX]
                 + s_temp2[K*NGLL2+NGLLX+I]*hprimewgll_yy[J*NGLLX+1]
                 + s_temp2[K*NGLL2+2*NGLLX+I]*hprimewgll_yy[J*NGLLX+2]
@@ -581,16 +586,16 @@
         fac3 = wgllwgll_xy[J*NGLLX+I];
 
 #ifdef USE_TEXTURES
-        d_potential_dot_dot_acoustic[iglob] = tex1Dfetch(tex_potential_dot_dot_acoustic, iglob) 
+        d_potential_dot_dot_acoustic[iglob] = tex1Dfetch(tex_potential_dot_dot_acoustic, iglob)
                                               - (fac1*temp1l + fac2*temp2l + fac3*temp3l);
 #else
-	/* OLD version that uses coloring to get around race condition. About 1.6x faster */
+  /* OLD version that uses coloring to get around race condition. About 1.6x faster */
       // d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
       // d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
-      // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);		
-	
-        atomicAdd(&d_potential_dot_dot_acoustic[iglob],-(fac1*temp1l + fac2*temp2l + fac3*temp3l));		
-	
+      // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+
+        atomicAdd(&d_potential_dot_dot_acoustic[iglob],-(fac1*temp1l + fac2*temp2l + fac3*temp3l));
+
 #endif
     }
 
@@ -607,31 +612,31 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 
-__global__ void kernel_3_a_acoustic_cuda_device(float* potential_dot_dot_acoustic, 
+__global__ void kernel_3_a_acoustic_cuda_device(float* potential_dot_dot_acoustic,
                                                 int size,
                                                 float* rmass_acoustic) {
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-  
+
   /* because of block and grid sizing problems, there is a small */
   /* amount of buffer at the end of the calculation */
-  if(id < size) {     
+  if(id < size) {
     // multiplies pressure with the inverse of the mass matrix
-    potential_dot_dot_acoustic[id] = potential_dot_dot_acoustic[id]*rmass_acoustic[id];     
+    potential_dot_dot_acoustic[id] = potential_dot_dot_acoustic[id]*rmass_acoustic[id];
   }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 __global__ void kernel_3_b_acoustic_cuda_device(float* potential_dot_acoustic,
-                                                float* potential_dot_dot_acoustic, 
+                                                float* potential_dot_dot_acoustic,
                                                 int size,
-                                                real deltatover2, 
+                                                realw deltatover2,
                                                 float* rmass_acoustic) {
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-  
+
   /* because of block and grid sizing problems, there is a small */
   /* amount of buffer at the end of the calculation */
-  if(id < size) {     
+  if(id < size) {
     // Newmark time scheme: corrector term
     potential_dot_acoustic[id] = potential_dot_acoustic[id] + deltatover2*potential_dot_dot_acoustic[id];
   }
@@ -642,7 +647,7 @@
 extern "C"
 void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
                              long* Mesh_pointer,
-                             int* size_F, 
+                             int* size_F,
                              int* SIMULATION_TYPE) {
 
 TRACE("kernel_3_a_acoustic_cuda");
@@ -660,17 +665,17 @@
    }
    dim3 grid(num_blocks_x,num_blocks_y);
    dim3 threads(blocksize,1,1);
-   
-   kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_dot_acoustic, 
-                                                       size,  
+
+   kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_dot_acoustic,
+                                                       size,
                                                        mp->d_rmass_acoustic);
 
    if(*SIMULATION_TYPE == 3) {
-     kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_dot_acoustic, 
+     kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_dot_acoustic,
                                                          size,
                                                          mp->d_rmass_acoustic);
    }
-   
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
   exit_on_cuda_error("after kernel 3 a");
@@ -682,18 +687,18 @@
 extern "C"
 void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
                                                              long* Mesh_pointer,
-                                                             int* size_F, 
-                                                             float* deltatover2_F, 
-                                                             int* SIMULATION_TYPE, 
+                                                             int* size_F,
+                                                             float* deltatover2_F,
+                                                             int* SIMULATION_TYPE,
                                                              float* b_deltatover2_F) {
 
 TRACE("kernel_3_b_acoustic_cuda");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
   int size = *size_F;
-  real deltatover2 = *deltatover2_F;
-  real b_deltatover2 = *b_deltatover2_F;
-  
+  realw deltatover2 = *deltatover2_F;
+  realw b_deltatover2 = *b_deltatover2_F;
+
   int blocksize=128;
   int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
   int num_blocks_x = size_padded/blocksize;
@@ -704,19 +709,19 @@
   }
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-  
-  kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_acoustic, 
-                                                    mp->d_potential_dot_dot_acoustic, 
-                                                    size, deltatover2, 
+
+  kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_acoustic,
+                                                    mp->d_potential_dot_dot_acoustic,
+                                                    size, deltatover2,
                                                     mp->d_rmass_acoustic);
-  
+
   if(*SIMULATION_TYPE == 3) {
-    kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_acoustic, 
-                                                      mp->d_b_potential_dot_dot_acoustic, 
+    kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_acoustic,
+                                                      mp->d_b_potential_dot_dot_acoustic,
                                                       size, b_deltatover2,
                                                       mp->d_rmass_acoustic);
   }
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
   exit_on_cuda_error("after kernel 3 b");
@@ -733,68 +738,71 @@
 
 
 __global__ void enforce_free_surface_cuda_kernel(
-                                       float* potential_acoustic, 
+                                       float* potential_acoustic,
                                        float* potential_dot_acoustic,
-                                       float* potential_dot_dot_acoustic, 
+                                       float* potential_dot_dot_acoustic,
                                        int num_free_surface_faces,
                                        int* free_surface_ispec,
                                        int* free_surface_ijk,
                                        int* ibool,
                                        int* ispec_is_acoustic) {
   // gets spectral element face id
-  int iface = blockIdx.x + gridDim.x*blockIdx.y;   
-  
+  int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
   // for all faces on free surface
   if( iface < num_free_surface_faces ){
-  
+
     int ispec = free_surface_ispec[iface]-1;
 
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING    
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
 //  if( iface > 648-1 ){printf("device iface: %i \n",iface);}
 //#endif
-  
+
     // checks if element is in acoustic domain
-    if( ispec_is_acoustic[ispec] == 1 ){
-  
+    if( ispec_is_acoustic[ispec] ){
+
       // gets global point index
-      int igll = threadIdx.x + threadIdx.y*blockDim.x;  
+      int igll = threadIdx.x + threadIdx.y*blockDim.x;
 
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING    
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
 //      if( igll > 25-1 ){printf("device igll: %i \n",igll);}
 //#endif
-    
+
       int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface)
       int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
       int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-      
+
       int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
-      
+
       // sets potentials to zero at free surface
       potential_acoustic[iglob] = 0;
       potential_dot_acoustic[iglob] = 0;
-      potential_dot_dot_acoustic[iglob] = 0; 
+      potential_dot_dot_acoustic[iglob] = 0;
 
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING    
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
 //    if( ispec == 160 && igll < 25 ){printf("device: %i %i %i %i %i \n",igll,i,j,k,iglob);}
 //#endif
     }
-  }  
+  }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(acoustic_enforce_free_surface_cuda,
-              ACOUSTIC_ENFORCE_FREE_SURFACE_CUDA)(long* Mesh_pointer_f, 
+void FC_FUNC_(acoustic_enforce_free_surf_cuda,
+              ACOUSTIC_ENFORCE_FREE_SURF_CUDA)(long* Mesh_pointer_f,
                                                   int* SIMULATION_TYPE,
                                                   int* ABSORB_FREE_SURFACE) {
-  
-TRACE("acoustic_enforce_free_surface_cuda");
 
+TRACE("acoustic_enforce_free_surf_cuda");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   // checks if anything to do
   if( *ABSORB_FREE_SURFACE == 0 ){
+
+    // does not absorb free surface, thus we enforce the potential to be zero at surface
+
     // block sizes
     int num_blocks_x = mp->num_free_surface_faces;
     int num_blocks_y = 1;
@@ -808,45 +816,45 @@
     //#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
     // debugging
     //int* d_debug;
-    //printf("acoustic_enforce_free_surface_cuda ...\n");
+    //printf("acoustic_enforce_free_surf_cuda ...\n");
     //print_CUDA_error_if_any(cudaMalloc((void**)&d_debug,128*sizeof(int)),999);
 
     //int* h_debug;
     //h_debug = (int*) calloc(128,sizeof(int));
     //for(int i=0;i<128;i++){h_debug[i] = 0;}
     //cudaMemcpy(d_debug,h_debug,128*sizeof(int),cudaMemcpyHostToDevice);
-    
-    //printf("acoustic_enforce_free_surface_cuda start...\n");
+
+    //printf("acoustic_enforce_free_surf_cuda start...\n");
     //doesnt' work...: printf("free_surface_ispec: %i %i %i \n",mp->d_free_surface_ispec[0],mp->d_free_surface_ispec[1],mp->d_free_surface_ispec[2]);
     //printf("free_surface_ispec: %i \n",mp->num_free_surface_faces);
 
     //cudaThreadSynchronize();
     //#endif
-    
+
     // sets potentials to zero at free surface
-    enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_potential_acoustic, 
-                                                       mp->d_potential_dot_acoustic, 
+    enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_potential_acoustic,
+                                                       mp->d_potential_dot_acoustic,
                                                        mp->d_potential_dot_dot_acoustic,
                                                        mp->num_free_surface_faces,
-                                                       mp->d_free_surface_ispec, 
+                                                       mp->d_free_surface_ispec,
                                                        mp->d_free_surface_ijk,
                                                        mp->d_ibool,
                                                        mp->d_ispec_is_acoustic);
     // for backward/reconstructed potentials
     if(*SIMULATION_TYPE == 3) {
-      enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic, 
-                                                         mp->d_b_potential_dot_acoustic, 
+      enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic,
+                                                         mp->d_b_potential_dot_acoustic,
                                                          mp->d_b_potential_dot_dot_acoustic,
                                                          mp->num_free_surface_faces,
-                                                         mp->d_free_surface_ispec, 
+                                                         mp->d_free_surface_ispec,
                                                          mp->d_free_surface_ijk,
                                                          mp->d_ibool,
-                                                         mp->d_ispec_is_acoustic);      
-                                                         
+                                                         mp->d_ispec_is_acoustic);
+
     }
     //#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
     //cudaThreadSynchronize();
-    //cudaMemcpy(h_debug,d_debug,128*sizeof(int),cudaMemcpyDeviceToHost);    
+    //cudaMemcpy(h_debug,d_debug,128*sizeof(int),cudaMemcpyDeviceToHost);
     //for(int i=0;i<25;i++) {printf("ispec d_debug = %d \n",h_debug[i]);}
     //cudaFree(d_debug);
     //free(h_debug);
@@ -854,9 +862,9 @@
     //#endif
 
   }
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("enforce_free_surface_cuda");
-#endif  
+#endif
 }
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -52,17 +52,17 @@
 
 
 void Kernel_2(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
-	      int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE,int ATTENUATION);
+        int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE,int ATTENUATION);
 
-//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic, 
+//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic,
 //                            int num_phase_ispec_elastic, int d_iphase, int* d_ibool);
 
 __global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
                               int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,
-                              float* d_displ, float* d_accel, 
-                              float* d_xix, float* d_xiy, float* d_xiz, 
-                              float* d_etax, float* d_etay, float* d_etaz, 
-                              float* d_gammax, float* d_gammay, float* d_gammaz, 
+                              float* d_displ, float* d_accel,
+                              float* d_xix, float* d_xiy, float* d_xiz,
+                              float* d_etax, float* d_etay, float* d_etaz,
+                              float* d_gammax, float* d_gammay, float* d_gammaz,
                               float* d_kappav, float* d_muv,
                               float* d_debug,
                               int COMPUTE_AND_STORE_STRAIN,
@@ -70,7 +70,7 @@
                               float* epsilondev_xz,float* epsilondev_yz,float* epsilon_trace_over_3,
                               int SIMULATION_TYPE,
                               int ATTENUATION,int NSPEC,
-                              float* one_minus_sum_beta,float* factor_common,                              
+                              float* one_minus_sum_beta,float* factor_common,
                               float* R_xx,float* R_yy,float* R_xy,float* R_xz,float* R_yz,
                               float* alphaval,float* betaval,float* gammaval);
 
@@ -80,46 +80,48 @@
 // prepares a device array with with all inter-element edge-nodes -- this
 // is followed by a memcpy and MPI operations
 __global__ void prepare_boundary_accel_on_device(float* d_accel, float* d_send_accel_buffer,
-						 int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
-						 int* d_nibool_interfaces_ext_mesh,
-						 int* d_ibool_interfaces_ext_mesh) {
+             int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
+             int* d_nibool_interfaces_ext_mesh,
+             int* d_ibool_interfaces_ext_mesh) {
 
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
   //int bx = blockIdx.y*gridDim.x+blockIdx.x;
   //int tx = threadIdx.x;
-  int iinterface=0;  
-  
+  int iinterface=0;
+
   for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
     if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
       d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)] =
-	d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
+        d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
       d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1] =
-	d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1];
+        d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1];
       d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2] =
-	d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2];
+        d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2];
     }
-  }  
+  }
 
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 // prepares and transfers the inter-element edge-nodes to the host to be MPI'd
-extern "C" 
-void FC_FUNC_(transfer_boundary_accel_from_device,
-              TRANSFER_BOUNDARY_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, float* accel,
+// (elements on boundary)
+extern "C"
+void FC_FUNC_(transfer_boun_accel_from_device,
+              TRANSFER_BOUN_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, float* accel,
                                                     float* send_accel_buffer,
                                                     int* num_interfaces_ext_mesh,
                                                     int* max_nibool_interfaces_ext_mesh,
                                                     int* nibool_interfaces_ext_mesh,
                                                     int* ibool_interfaces_ext_mesh,
                                                     int* FORWARD_OR_ADJOINT){
-TRACE("transfer_boundary_accel_from_device");
+TRACE("transfer_boun_accel_from_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-    
+
+  if( *num_interfaces_ext_mesh == 0 ) return;
+
   int blocksize = 256;
-
   int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
   int num_blocks_x = size_padded/blocksize;
   int num_blocks_y = 1;
@@ -127,39 +129,35 @@
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-  
+
   //timing for memory xfer
-  // cudaEvent_t start, stop; 
-  // float time; 
-  // cudaEventCreate(&start); 
-  // cudaEventCreate(&stop); 
+  // cudaEvent_t start, stop;
+  // float time;
+  // cudaEventCreate(&start);
+  // cudaEventCreate(&stop);
   // cudaEventRecord( start, 0 );
   if(*FORWARD_OR_ADJOINT == 1) {
-  prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel,mp->d_send_accel_buffer,
-						     *num_interfaces_ext_mesh,
-						     *max_nibool_interfaces_ext_mesh,
-						     mp->d_nibool_interfaces_ext_mesh,
-						     mp->d_ibool_interfaces_ext_mesh);
+    prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel,mp->d_send_accel_buffer,
+                 *num_interfaces_ext_mesh,
+                 *max_nibool_interfaces_ext_mesh,
+                 mp->d_nibool_interfaces_ext_mesh,
+                 mp->d_ibool_interfaces_ext_mesh);
   }
   else if(*FORWARD_OR_ADJOINT == 3) {
     prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel,mp->d_send_accel_buffer,
-						     *num_interfaces_ext_mesh,
-						     *max_nibool_interfaces_ext_mesh,
-						     mp->d_nibool_interfaces_ext_mesh,
-						     mp->d_ibool_interfaces_ext_mesh);
+                 *num_interfaces_ext_mesh,
+                 *max_nibool_interfaces_ext_mesh,
+                 mp->d_nibool_interfaces_ext_mesh,
+                 mp->d_ibool_interfaces_ext_mesh);
   }
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("transfer_boundary_accel_from_device");  
-#endif
 
-  
+
   cudaMemcpy(send_accel_buffer,mp->d_send_accel_buffer,
-	     3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real),cudaMemcpyDeviceToHost);
-  
+             3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(realw),cudaMemcpyDeviceToHost);
+
   // finish timing of kernel+memcpy
   // cudaEventRecord( stop, 0 );
   // cudaEventSynchronize( stop );
@@ -167,20 +165,22 @@
   // cudaEventDestroy( start );
   // cudaEventDestroy( stop );
   // printf("boundary xfer d->h Time: %f ms\n",time);
-  
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("transfer_boun_accel_from_device");
+#endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 __global__ void assemble_boundary_accel_on_device(float* d_accel, float* d_send_accel_buffer,
-						 int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
-						 int* d_nibool_interfaces_ext_mesh,
-						 int* d_ibool_interfaces_ext_mesh) {
+             int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
+             int* d_nibool_interfaces_ext_mesh,
+             int* d_ibool_interfaces_ext_mesh) {
 
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
   //int bx = blockIdx.y*gridDim.x+blockIdx.x;
   //int tx = threadIdx.x;
-  int iinterface=0;  
+  int iinterface=0;
 
   for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
     if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
@@ -192,14 +192,14 @@
       // d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1];
       // d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2] +=
       // d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2];
-      
-      
+
+
       atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)],
-		d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)]);
+                d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)]);
       atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1],
-		d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1]);
+                d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1]);
       atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2],
-		d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2]);
+                d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2]);
     }
   }
   // ! This step is done via previous function transfer_and_assemble...
@@ -214,20 +214,22 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 // FORWARD_OR_ADJOINT == 1 for accel, and == 3 for b_accel
-extern "C" 
-void FC_FUNC_(transfer_and_assemble_accel_to_device,
-              TRANSFER_AND_ASSEMBLE_ACCEL_TO_DEVICE)(long* Mesh_pointer, real* accel,
-                                                    real* buffer_recv_vector_ext_mesh,
+extern "C"
+void FC_FUNC_(transfer_asmbl_accel_to_device,
+              TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer, realw* accel,
+                                                    realw* buffer_recv_vector_ext_mesh,
                                                     int* num_interfaces_ext_mesh,
                                                     int* max_nibool_interfaces_ext_mesh,
                                                     int* nibool_interfaces_ext_mesh,
-                                                    int* ibool_interfaces_ext_mesh,int* FORWARD_OR_ADJOINT) {
-TRACE("transfer_and_assemble_accel_to_device");
+                                                    int* ibool_interfaces_ext_mesh,
+                                                    int* FORWARD_OR_ADJOINT) {
+TRACE("transfer_asmbl_accel_to_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-  
-  cudaMemcpy(mp->d_send_accel_buffer, buffer_recv_vector_ext_mesh, 3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real), cudaMemcpyHostToDevice);
 
+  cudaMemcpy(mp->d_send_accel_buffer, buffer_recv_vector_ext_mesh,
+             3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(realw), cudaMemcpyHostToDevice);
+
   int blocksize = 256;
   int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
   int num_blocks_x = size_padded/blocksize;
@@ -240,24 +242,24 @@
   //double start_time = get_time();
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-  // cudaEvent_t start, stop; 
-  // float time; 
-  // cudaEventCreate(&start); 
-  // cudaEventCreate(&stop); 
+  // cudaEvent_t start, stop;
+  // float time;
+  // cudaEventCreate(&start);
+  // cudaEventCreate(&stop);
   // cudaEventRecord( start, 0 );
   if(*FORWARD_OR_ADJOINT == 1) { //assemble forward accel
     assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel, mp->d_send_accel_buffer,
-							*num_interfaces_ext_mesh,
-							*max_nibool_interfaces_ext_mesh,
-							mp->d_nibool_interfaces_ext_mesh,
-							mp->d_ibool_interfaces_ext_mesh);
+              *num_interfaces_ext_mesh,
+              *max_nibool_interfaces_ext_mesh,
+              mp->d_nibool_interfaces_ext_mesh,
+              mp->d_ibool_interfaces_ext_mesh);
   }
   else if(*FORWARD_OR_ADJOINT == 3) { //assemble adjoint accel
     assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel, mp->d_send_accel_buffer,
-							*num_interfaces_ext_mesh,
-							*max_nibool_interfaces_ext_mesh,
-							mp->d_nibool_interfaces_ext_mesh,
-							mp->d_ibool_interfaces_ext_mesh);
+              *num_interfaces_ext_mesh,
+              *max_nibool_interfaces_ext_mesh,
+              mp->d_nibool_interfaces_ext_mesh,
+              mp->d_ibool_interfaces_ext_mesh);
   }
 
   // cudaEventRecord( stop, 0 );
@@ -269,7 +271,7 @@
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
-  exit_on_cuda_error("transfer_and_assemble_accel_to_device_");
+  exit_on_cuda_error("transfer_asmbl_accel_to_device");
 #endif
 }
 
@@ -277,43 +279,45 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 
-extern "C" 
+extern "C"
 void FC_FUNC_(compute_forces_elastic_cuda,
               COMPUTE_FORCES_ELASTIC_CUDA)(long* Mesh_pointer_f,
                                            int* iphase,
                                            int* nspec_outer_elastic,
                                            int* nspec_inner_elastic,
+                                           int* SIMULATION_TYPE,
                                            int* COMPUTE_AND_STORE_STRAIN,
-                                           int* SIMULATION_TYPE,
                                            int* ATTENUATION) {
 
-TRACE("compute_forces_elastic_cuda");  
+TRACE("compute_forces_elastic_cuda");
 // EPIK_TRACER("compute_forces_elastic_cuda");
 //printf("Running compute_forces\n");
   //double start_time = get_time();
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
 
   int num_elements;
-  
+
   if( *iphase == 1 )
     num_elements = *nspec_outer_elastic;
   else
-    num_elements = *nspec_inner_elastic;  
+    num_elements = *nspec_inner_elastic;
 
-  //int myrank;  
+  if( num_elements == 0 ) return;
+
+  //int myrank;
   /* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
   /* if(myrank==0) { */
 
-  Kernel_2(num_elements,mp,*iphase,*COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE,*ATTENUATION);  
-  
-  cudaThreadSynchronize();
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  Kernel_2(num_elements,mp,*iphase,*COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE,*ATTENUATION);
+
+  //cudaThreadSynchronize();
+
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
 /* MPI_Barrier(MPI_COMM_WORLD); */
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
-#endif
+//#endif
 }
 
 
@@ -322,28 +326,28 @@
 // updates stress
 
 __device__ void compute_element_att_stress(int tx,int working_element,int NSPEC,
-                                           float* R_xx, 
-                                           float* R_yy, 
-                                           float* R_xy, 
-                                           float* R_xz, 
-                                           float* R_yz, 
-                                           reald* sigma_xx, 
-                                           reald* sigma_yy, 
+                                           float* R_xx,
+                                           float* R_yy,
+                                           float* R_xy,
+                                           float* R_xz,
+                                           float* R_yz,
+                                           reald* sigma_xx,
+                                           reald* sigma_yy,
                                            reald* sigma_zz,
-                                           reald* sigma_xy, 
+                                           reald* sigma_xy,
                                            reald* sigma_xz,
                                            reald* sigma_yz) {
 
   int i_sls,offset_sls;
   reald R_xx_val,R_yy_val;
-  
+
   for(i_sls = 0; i_sls < N_SLS; i_sls++){
     // index
     offset_sls = tx + 125*(working_element + NSPEC*i_sls);
-    
+
     R_xx_val = R_xx[offset_sls]; //(i,j,k,ispec,i_sls)
     R_yy_val = R_yy[offset_sls];
-    
+
     *sigma_xx = *sigma_xx - R_xx_val;
     *sigma_yy = *sigma_yy - R_yy_val;
     *sigma_zz = *sigma_zz + R_xx_val + R_yy_val;
@@ -352,7 +356,7 @@
     *sigma_yz = *sigma_yz - R_yz[offset_sls];
   }
   return;
-}  
+}
 
 /* ----------------------------------------------------------------------------------------------- */
 
@@ -375,59 +379,59 @@
   reald mul;
   reald alphaval_loc,betaval_loc,gammaval_loc;
   reald factor_loc,Sn,Snp1;
-  
+
   // indices
   offset_align = tx + 128 * working_element;
   ijk_ispec = tx + 125 * working_element;
-  
+
   mul = d_muv[offset_align];
-  
+
   // use Runge-Kutta scheme to march in time
   for(i_sls = 0; i_sls < N_SLS; i_sls++){
 
     // indices
     offset_common = i_sls + N_SLS*(tx + 125*working_element); // (i_sls,i,j,k,ispec)
     offset_sls = tx + 125*(working_element + NSPEC*i_sls);   // (i,j,k,ispec,i_sls)
-    
+
     factor_loc = mul * factor_common[offset_common]; //mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
-    
+
     alphaval_loc = alphaval[i_sls]; // (i_sls)
     betaval_loc = betaval[i_sls];
     gammaval_loc = gammaval[i_sls];
-    
+
     // term in xx
     Sn   = factor_loc * epsilondev_xx[ijk_ispec]; //(i,j,k,ispec)
     Snp1   = factor_loc * epsilondev_xx_loc; //(i,j,k)
-    
-    //R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + 
+
+    //R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) +
     //                          betaval_loc * Sn + gammaval_loc * Snp1;
 
-    R_xx[offset_sls] = alphaval_loc * R_xx[offset_sls] + 
+    R_xx[offset_sls] = alphaval_loc * R_xx[offset_sls] +
                        betaval_loc * Sn + gammaval_loc * Snp1;
 
     // term in yy
     Sn   = factor_loc * epsilondev_yy[ijk_ispec];
     Snp1   = factor_loc * epsilondev_yy_loc;
-    R_yy[offset_sls] = alphaval_loc * R_yy[offset_sls] + 
+    R_yy[offset_sls] = alphaval_loc * R_yy[offset_sls] +
                         betaval_loc * Sn + gammaval_loc * Snp1;
     // term in zz not computed since zero trace
     // term in xy
     Sn   = factor_loc * epsilondev_xy[ijk_ispec];
     Snp1   = factor_loc * epsilondev_xy_loc;
-    R_xy[offset_sls] = alphaval_loc * R_xy[offset_sls] + 
+    R_xy[offset_sls] = alphaval_loc * R_xy[offset_sls] +
                         betaval_loc * Sn + gammaval_loc * Snp1;
     // term in xz
     Sn   = factor_loc * epsilondev_xz[ijk_ispec];
     Snp1   = factor_loc * epsilondev_xz_loc;
-    R_xz[offset_sls] = alphaval_loc * R_xz[offset_sls] + 
+    R_xz[offset_sls] = alphaval_loc * R_xz[offset_sls] +
                         betaval_loc * Sn + gammaval_loc * Snp1;
     // term in yz
     Sn   = factor_loc * epsilondev_yz[ijk_ispec];
     Snp1   = factor_loc * epsilondev_yz_loc;
-    R_yz[offset_sls] = alphaval_loc * R_yz[offset_sls] + 
+    R_yz[offset_sls] = alphaval_loc * R_yz[offset_sls] +
                         betaval_loc * Sn + gammaval_loc * Snp1;
   }
-  return;  
+  return;
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -436,129 +440,139 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-void Kernel_2(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
-              int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE,int ATTENUATION)
-  {
-    
+void Kernel_2(int nb_blocks_to_compute,
+              Mesh* mp,
+              int d_iphase,
+              int COMPUTE_AND_STORE_STRAIN,
+              int SIMULATION_TYPE,
+              int ATTENUATION){
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("before kernel Kernel 2");
 #endif
-    
-    /* if the grid can handle the number of blocks, we let it be 1D */
-    /* grid_2_x = nb_elem_color; */
-    /* nb_elem_color is just how many blocks we are computing now */    
 
-    int num_blocks_x = nb_blocks_to_compute;
-    int num_blocks_y = 1;
-    while(num_blocks_x > 65535) {
-      num_blocks_x = ceil(num_blocks_x/2.0);
-      num_blocks_y = num_blocks_y*2;
-    }
-    
-    int threads_2 = 128;//BLOCK_SIZE_K2;
-    dim3 grid_2(num_blocks_x,num_blocks_y);    
+  /* if the grid can handle the number of blocks, we let it be 1D */
+  /* grid_2_x = nb_elem_color; */
+  /* nb_elem_color is just how many blocks we are computing now */
 
-    // debugging
-    //printf("Starting with grid %dx%d for %d blocks\n",num_blocks_x,num_blocks_y,nb_blocks_to_compute);
-    float* d_debug;
+  int num_blocks_x = nb_blocks_to_compute;
+  int num_blocks_y = 1;
+  while(num_blocks_x > 65535) {
+    num_blocks_x = ceil(num_blocks_x/2.0);
+    num_blocks_y = num_blocks_y*2;
+  }
+
+  //int threads_2 = 128;//BLOCK_SIZE_K2;
+  //dim3 grid_2(num_blocks_x,num_blocks_y);
+
+  int blocksize = 128;
+  dim3 grid(num_blocks_x,num_blocks_y);
+  dim3 threads(blocksize,1,1);
+
+  // debugging
+  //printf("Starting with grid %dx%d for %d blocks\n",num_blocks_x,num_blocks_y,nb_blocks_to_compute);
+  float* d_debug;
 //    float* h_debug;
 //    h_debug = (float*)calloc(128,sizeof(float));
 //    cudaMalloc((void**)&d_debug,128*sizeof(float));
 //    cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-    
-    // Cuda timing
-    // cudaEvent_t start, stop; 
-    // float time; 
-    // cudaEventCreate(&start); 
-    // cudaEventCreate(&stop); 
-    // cudaEventRecord( start, 0 );
-    
-    Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
-						 mp->d_phase_ispec_inner_elastic,
-						 mp->d_num_phase_ispec_elastic, d_iphase,
-						 mp->d_displ, mp->d_accel,
-						 mp->d_xix, mp->d_xiy, mp->d_xiz,
-						 mp->d_etax, mp->d_etay, mp->d_etaz,
-						 mp->d_gammax, mp->d_gammay, mp->d_gammaz,
-						 mp->d_kappav, mp->d_muv,
-             d_debug,
-						 COMPUTE_AND_STORE_STRAIN,
-						 mp->d_epsilondev_xx,
-						 mp->d_epsilondev_yy,
-						 mp->d_epsilondev_xy,
-						 mp->d_epsilondev_xz,
-						 mp->d_epsilondev_yz,
-						 mp->d_epsilon_trace_over_3,
-						 SIMULATION_TYPE,
-             ATTENUATION,mp->NSPEC_AB,
-             mp->d_one_minus_sum_beta,mp->d_factor_common,
-             mp->d_R_xx,mp->d_R_yy,mp->d_R_xy,mp->d_R_xz,mp->d_R_yz,
-             mp->d_alphaval,mp->d_betaval,mp->d_gammaval
-             );
-    
 
-    // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
-    // int procid;
-    // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
-    // if(procid==0) {
-    //   for(int i=0;i<17;i++) {
-    // 	printf("cudadebug[%d] = %e\n",i,h_debug[i]);
-    //   }
-    // }
+  // Cuda timing
+  // cudaEvent_t start, stop;
+  // float time;
+  // cudaEventCreate(&start);
+  // cudaEventCreate(&stop);
+  // cudaEventRecord( start, 0 );
+
+  //Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+  Kernel_2_impl<<<grid,threads>>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+                                               mp->d_phase_ispec_inner_elastic,
+                                               mp->num_phase_ispec_elastic,
+                                               d_iphase,
+                                               mp->d_displ, mp->d_accel,
+                                               mp->d_xix, mp->d_xiy, mp->d_xiz,
+                                               mp->d_etax, mp->d_etay, mp->d_etaz,
+                                               mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+                                               mp->d_kappav, mp->d_muv,
+                                               d_debug,
+                                               COMPUTE_AND_STORE_STRAIN,
+                                               mp->d_epsilondev_xx,
+                                               mp->d_epsilondev_yy,
+                                               mp->d_epsilondev_xy,
+                                               mp->d_epsilondev_xz,
+                                               mp->d_epsilondev_yz,
+                                               mp->d_epsilon_trace_over_3,
+                                               SIMULATION_TYPE,
+                                               ATTENUATION,mp->NSPEC_AB,
+                                               mp->d_one_minus_sum_beta,mp->d_factor_common,
+                                               mp->d_R_xx,mp->d_R_yy,mp->d_R_xy,mp->d_R_xz,mp->d_R_yz,
+                                               mp->d_alphaval,mp->d_betaval,mp->d_gammaval
+                                               );
+
+
+  // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+  // int procid;
+  // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+  // if(procid==0) {
+  //   for(int i=0;i<17;i++) {
+  //  printf("cudadebug[%d] = %e\n",i,h_debug[i]);
+  //   }
+  // }
 //    free(h_debug);
 //    cudaFree(d_debug);
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-    exit_on_cuda_error("Kernel_2_impl");
- #endif
-    
-    if(SIMULATION_TYPE == 3) {
-      Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
-						   mp->d_phase_ispec_inner_elastic,
-						   mp->d_num_phase_ispec_elastic, d_iphase,
-						   mp->d_b_displ, mp->d_b_accel,
-						   mp->d_xix, mp->d_xiy, mp->d_xiz,
-						   mp->d_etax, mp->d_etay, mp->d_etaz,
-						   mp->d_gammax, mp->d_gammay, mp->d_gammaz,
-						   mp->d_kappav, mp->d_muv,
-               d_debug,
-						   COMPUTE_AND_STORE_STRAIN,
-						   mp->d_b_epsilondev_xx,
-						   mp->d_b_epsilondev_yy,
-						   mp->d_b_epsilondev_xy,
-						   mp->d_b_epsilondev_xz,
-						   mp->d_b_epsilondev_yz,
-						   mp->d_b_epsilon_trace_over_3,
-						   SIMULATION_TYPE,
-               ATTENUATION,mp->NSPEC_AB,
-               mp->d_one_minus_sum_beta,mp->d_factor_common,               
-               mp->d_b_R_xx,mp->d_b_R_yy,mp->d_b_R_xy,mp->d_b_R_xz,mp->d_b_R_yz,
-               mp->d_b_alphaval,mp->d_b_betaval,mp->d_b_gammaval
-               );
-    }
-    
-    // cudaEventRecord( stop, 0 );
-    // cudaEventSynchronize( stop );
-    // cudaEventElapsedTime( &time, start, stop );
-    // cudaEventDestroy( start );
-    // cudaEventDestroy( stop );
-    // printf("Kernel2 Execution Time: %f ms\n",time);
-    
-    // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
-    // for(int i=0;i<10;i++) {
-    // printf("debug[%d]=%e\n",i,h_debug[i]);
-    // }            
-    
-    /* cudaThreadSynchronize(); */
-    /* LOG("Kernel 2 finished"); */
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING    
-    exit_on_cuda_error("Kernel_2_impl SIM_TYPE==3");    
- #endif    
+// #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+//    exit_on_cuda_error("Kernel_2_impl");
+// #endif
 
+  if(SIMULATION_TYPE == 3) {
+    //Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+    Kernel_2_impl<<< grid,threads>>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+                                                 mp->d_phase_ispec_inner_elastic,
+                                                 mp->num_phase_ispec_elastic,
+                                                 d_iphase,
+                                                 mp->d_b_displ, mp->d_b_accel,
+                                                 mp->d_xix, mp->d_xiy, mp->d_xiz,
+                                                 mp->d_etax, mp->d_etay, mp->d_etaz,
+                                                 mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+                                                 mp->d_kappav, mp->d_muv,
+                                                 d_debug,
+                                                 COMPUTE_AND_STORE_STRAIN,
+                                                 mp->d_b_epsilondev_xx,
+                                                 mp->d_b_epsilondev_yy,
+                                                 mp->d_b_epsilondev_xy,
+                                                 mp->d_b_epsilondev_xz,
+                                                 mp->d_b_epsilondev_yz,
+                                                 mp->d_b_epsilon_trace_over_3,
+                                                 SIMULATION_TYPE,
+                                                 ATTENUATION,mp->NSPEC_AB,
+                                                 mp->d_one_minus_sum_beta,mp->d_factor_common,
+                                                 mp->d_b_R_xx,mp->d_b_R_yy,mp->d_b_R_xy,mp->d_b_R_xz,mp->d_b_R_yz,
+                                                 mp->d_b_alphaval,mp->d_b_betaval,mp->d_b_gammaval
+                                                 );
   }
 
+  // cudaEventRecord( stop, 0 );
+  // cudaEventSynchronize( stop );
+  // cudaEventElapsedTime( &time, start, stop );
+  // cudaEventDestroy( start );
+  // cudaEventDestroy( stop );
+  // printf("Kernel2 Execution Time: %f ms\n",time);
+
+  // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+  // for(int i=0;i<10;i++) {
+  // printf("debug[%d]=%e\n",i,h_debug[i]);
+  // }
+
+  /* cudaThreadSynchronize(); */
+  /* LOG("Kernel 2 finished"); */
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("Kernel_2_impl ");
+#endif
+}
+
 /* ----------------------------------------------------------------------------------------------- */
 
-//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic, 
+//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic,
 //                            int num_phase_ispec_elastic, int d_iphase, int* d_ibool) {
 //  int bx = blockIdx.x;
 //  int tx = threadIdx.x;
@@ -577,7 +591,7 @@
 //    /* d_debug_output[1] = d_ibool[working_element*NGLL3_ALIGN + tx]-1; */
 //  }
 //  /* d_debug_output[1+tx+128*bx] = 69.0; */
-//  
+//
 //}
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -592,10 +606,10 @@
 
 __global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
                               int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,
-                              float* d_displ, float* d_accel, 
-                              float* d_xix, float* d_xiy, float* d_xiz, 
-                              float* d_etax, float* d_etay, float* d_etaz, 
-                              float* d_gammax, float* d_gammay, float* d_gammaz, 
+                              float* d_displ, float* d_accel,
+                              float* d_xix, float* d_xiy, float* d_xiz,
+                              float* d_etax, float* d_etay, float* d_etaz,
+                              float* d_gammax, float* d_gammay, float* d_gammaz,
                               float* d_kappav, float* d_muv,
                               float* d_debug,
                               int COMPUTE_AND_STORE_STRAIN,
@@ -604,29 +618,29 @@
                               float* epsilon_trace_over_3,
                               int SIMULATION_TYPE,
                               int ATTENUATION, int NSPEC,
-                              float* one_minus_sum_beta,float* factor_common,                                   
+                              float* one_minus_sum_beta,float* factor_common,
                               float* R_xx, float* R_yy, float* R_xy, float* R_xz, float* R_yz,
                               float* alphaval,float* betaval,float* gammaval
                               ){
-    
+
   /* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
   int bx = blockIdx.y*gridDim.x+blockIdx.x;
   /* int bx = blockIdx.x; */
-  int tx = threadIdx.x;  
-  
+  int tx = threadIdx.x;
+
   //const int NGLLX = 5;
-  // const int NGLL2 = 25; 
+  // const int NGLL2 = 25;
   const int NGLL3 = 125;
   const int NGLL3_ALIGN = 128;
-    
+
   int K = (tx/NGLL2);
   int J = ((tx-K*NGLL2)/NGLLX);
   int I = (tx-K*NGLL2-J*NGLLX);
-    
+
   int active,offset;
   int iglob = 0;
   int working_element;
-  
+
   reald tempx1l,tempx2l,tempx3l,tempy1l,tempy2l,tempy3l,tempz1l,tempz2l,tempz3l;
   reald xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl;
   reald duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl;
@@ -640,11 +654,11 @@
     int l;
     float hp1,hp2,hp3;
 #endif
-    
+
     __shared__ reald s_dummyx_loc[NGLL3];
     __shared__ reald s_dummyy_loc[NGLL3];
     __shared__ reald s_dummyz_loc[NGLL3];
-    
+
     __shared__ reald s_tempx1[NGLL3];
     __shared__ reald s_tempx2[NGLL3];
     __shared__ reald s_tempx3[NGLL3];
@@ -654,19 +668,19 @@
     __shared__ reald s_tempz1[NGLL3];
     __shared__ reald s_tempz2[NGLL3];
     __shared__ reald s_tempz3[NGLL3];
-    
+
 // use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
 // because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
     active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
-    
+
 // copy from global memory to shared memory
 // each thread writes one of the NGLL^3 = 125 data points
     if (active) {
       // iphase-1 and working_element-1 for Fortran->C array conventions
       working_element = d_phase_ispec_inner_elastic[bx + num_phase_ispec_elastic*(d_iphase-1)]-1;
-      // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;                 
+      // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
       iglob = d_ibool[working_element*125 + tx]-1;
-      
+
 #ifdef USE_TEXTURES
       s_dummyx_loc[tx] = tex1Dfetch(tex_displ, iglob);
       s_dummyy_loc[tx] = tex1Dfetch(tex_displ, iglob + NGLOB);
@@ -708,7 +722,7 @@
           tempx1l += s_dummyx_loc[offset]*hp1;
           tempy1l += s_dummyy_loc[offset]*hp1;
           tempz1l += s_dummyz_loc[offset]*hp1;
-    
+
           hp2 = d_hprime_xx[l*NGLLX+J];
           offset = K*NGLL2+l*NGLLX+I;
           tempx2l += s_dummyx_loc[offset]*hp2;
@@ -724,10 +738,10 @@
     // if(working_element == 169 && tx == 0) {
     //   atomicAdd(&d_debug[0],1.0);
     //   d_debug[1+3*l] = tempz3l;
-    //   d_debug[2+3*l] = s_dummyz_loc[offset];	      
-    //   d_debug[3+3*l] = hp3;	      
+    //   d_debug[2+3*l] = s_dummyz_loc[offset];
+    //   d_debug[3+3*l] = hp3;
     // }
-    
+
       }
 #else
 
@@ -735,7 +749,7 @@
               + s_dummyx_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
               + s_dummyx_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
               + s_dummyx_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
-              + s_dummyx_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];	    
+              + s_dummyx_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
 
       tempy1l = s_dummyy_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
               + s_dummyy_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
@@ -800,7 +814,7 @@
       gammayl = d_gammay[offset];
       gammazl = d_gammaz[offset];
 
-      duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l;	
+      duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l;
       duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l;
       duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l;
 
@@ -831,12 +845,12 @@
         epsilondev_xz[offset] = 0.5f * duzdxl_plus_duxdzl;
         epsilondev_yz[offset] = 0.5f * duzdyl_plus_duydzl;
               */
-        // local storage: stresses at this current time step      
+        // local storage: stresses at this current time step
         epsilondev_xx_loc = duxdxl - templ;
         epsilondev_yy_loc = duydyl - templ;
         epsilondev_xy_loc = 0.5f * duxdyl_plus_duydxl;
         epsilondev_xz_loc = 0.5f * duzdxl_plus_duxdzl;
-        epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl;        
+        epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl;
 
         if(SIMULATION_TYPE == 3) {
           epsilon_trace_over_3[tx + working_element*125] = templ;
@@ -852,8 +866,8 @@
         // use unrelaxed parameters if attenuation
         mul  = mul * one_minus_sum_beta[tx+working_element*125]; // (i,j,k,ispec)
       }
-          
-      // isotropic case    
+
+      // isotropic case
       lambdalplus2mul = kappal + 1.33333333333333333333f * mul;  // 4./3. = 1.3333333
       lambdal = lambdalplus2mul - 2.0f * mul;
 
@@ -861,7 +875,7 @@
       sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl;
       sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl;
       sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl;
-	
+
       sigma_xy = mul*duxdyl_plus_duydxl;
       sigma_xz = mul*duzdxl_plus_duxdzl;
       sigma_yz = mul*duzdyl_plus_duydzl;
@@ -872,11 +886,11 @@
                                    R_xx,R_yy,R_xy,R_xz,R_yz,
                                    &sigma_xx,&sigma_yy,&sigma_zz,&sigma_xy,&sigma_xz,&sigma_yz);
       }
-          
+
       jacobianl = 1.0f / (xixl*(etayl*gammazl-etazl*gammayl)-xiyl*(etaxl*gammazl-etazl*gammaxl)+xizl*(etaxl*gammayl-etayl*gammaxl));
 
 // form the dot product with the test vector
-      s_tempx1[tx] = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl);	
+      s_tempx1[tx] = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl);
       s_tempy1[tx] = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_yz*xizl);
       s_tempz1[tx] = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl);
 
@@ -887,7 +901,7 @@
       s_tempx3[tx] = jacobianl * (sigma_xx*gammaxl + sigma_xy*gammayl + sigma_xz*gammazl);
       s_tempy3[tx] = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_yz*gammazl);
       s_tempz3[tx] = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl);
-	
+
     }
 
 // synchronize all the threads (one thread for each of the NGLL grid points of the
@@ -911,14 +925,14 @@
       tempy3l = 0.f;
       tempz3l = 0.f;
 
-      for (l=0;l<NGLLX;l++) {	  	  
-	  
+      for (l=0;l<NGLLX;l++) {
+
         fac1 = d_hprimewgll_xx[I*NGLLX+l];
         offset = K*NGLL2+J*NGLLX+l;
         tempx1l += s_tempx1[offset]*fac1;
         tempy1l += s_tempy1[offset]*fac1;
         tempz1l += s_tempz1[offset]*fac1;
-          
+
         fac2 = d_hprimewgll_yy[J*NGLLX+l];
         offset = K*NGLL2+l*NGLLX+I;
         tempx2l += s_tempx2[offset]*fac2;
@@ -1007,12 +1021,12 @@
       d_accel[iglob + NGLOB] = tex1Dfetch(tex_accel, iglob + NGLOB) - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
       d_accel[iglob + 2*NGLOB] = tex1Dfetch(tex_accel, iglob + 2*NGLOB) - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
 #else
-	/* OLD/To be implemented version that uses coloring to get around race condition. About 1.6x faster */
+  /* OLD/To be implemented version that uses coloring to get around race condition. About 1.6x faster */
       // d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
       // d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
-      // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);		
+      // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
 
-      if(iglob*3+2 == 41153) {
+      //if(iglob*3+2 == 41153) {
         // int ot = d_debug[5];
         // d_debug[0+1+ot] = d_accel[iglob*3+2];
         // // d_debug[1+1+ot] = fac1*tempz1l;
@@ -1024,12 +1038,12 @@
         // d_debug[4+1+ot] = d_accel[iglob*3+2]-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
         // atomicAdd(&d_debug[0],1.0);
         // d_debug[6+ot] = d_displ[iglob*3+2];
-      }
-	
-      atomicAdd(&d_accel[iglob*3],-(fac1*tempx1l + fac2*tempx2l + fac3*tempx3l));		
+      //}
+
+      atomicAdd(&d_accel[iglob*3],-(fac1*tempx1l + fac2*tempx2l + fac3*tempx3l));
       atomicAdd(&d_accel[iglob*3+1],-(fac1*tempy1l + fac2*tempy2l + fac3*tempy3l));
       atomicAdd(&d_accel[iglob*3+2],-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l));
-	
+
 #endif
 
       // update memory variables based upon the Runge-Kutta scheme
@@ -1045,9 +1059,9 @@
       // save deviatoric strain for Runge-Kutta scheme
       if( COMPUTE_AND_STORE_STRAIN ){
         int ijk_ispec = tx + working_element*125;
-        
+
         // fortran: epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
-        epsilondev_xx[ijk_ispec] = epsilondev_xx_loc;        
+        epsilondev_xx[ijk_ispec] = epsilondev_xx_loc;
         epsilondev_yy[ijk_ispec] = epsilondev_yy_loc;
         epsilondev_xy[ijk_ispec] = epsilondev_xy_loc;
         epsilondev_xz[ijk_ispec] = epsilondev_xz_loc;
@@ -1069,88 +1083,89 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void kernel_3_cuda_device(real* veloc,
-                                     real* accel, int size,
-                                     real deltatover2, real* rmass) {
+__global__ void kernel_3_cuda_device(realw* veloc,
+                                     realw* accel, int size,
+                                     realw deltatover2,
+                                     realw* rmass) {
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-  
+
   /* because of block and grid sizing problems, there is a small */
   /* amount of buffer at the end of the calculation */
   if(id < size) {
-    accel[3*id] = accel[3*id]*rmass[id]; 
-    accel[3*id+1] = accel[3*id+1]*rmass[id]; 
+    accel[3*id] = accel[3*id]*rmass[id];
+    accel[3*id+1] = accel[3*id+1]*rmass[id];
     accel[3*id+2] = accel[3*id+2]*rmass[id];
-    
+
     veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
     veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
-    veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];      
+    veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
   }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void kernel_3_accel_cuda_device(real* accel, 
+__global__ void kernel_3_accel_cuda_device(realw* accel,
                                            int size,
-                                           real* rmass) {
+                                           realw* rmass) {
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-  
+
   /* because of block and grid sizing problems, there is a small */
   /* amount of buffer at the end of the calculation */
   if(id < size) {
-    accel[3*id] = accel[3*id]*rmass[id]; 
-    accel[3*id+1] = accel[3*id+1]*rmass[id]; 
-    accel[3*id+2] = accel[3*id+2]*rmass[id];    
+    accel[3*id] = accel[3*id]*rmass[id];
+    accel[3*id+1] = accel[3*id+1]*rmass[id];
+    accel[3*id+2] = accel[3*id+2]*rmass[id];
   }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void kernel_3_veloc_cuda_device(real* veloc,
-                                           real* accel, 
+__global__ void kernel_3_veloc_cuda_device(realw* veloc,
+                                           realw* accel,
                                            int size,
-                                           real deltatover2) {
+                                           realw deltatover2) {
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-  
+
   /* because of block and grid sizing problems, there is a small */
   /* amount of buffer at the end of the calculation */
   if(id < size) {
     veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
     veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
-    veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];      
+    veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
   }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(kernel_3_a_cuda,
               KERNEL_3_A_CUDA)(long* Mesh_pointer,
-                               int* size_F, 
-                               float* deltatover2_F, 
-                               int* SIMULATION_TYPE_f, 
+                               int* size_F,
+                               float* deltatover2_F,
+                               int* SIMULATION_TYPE_f,
                                float* b_deltatover2_F,
                                int* OCEANS) {
-TRACE("kernel_3_a_cuda");  
+TRACE("kernel_3_a_cuda");
 
    Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
    int size = *size_F;
    int SIMULATION_TYPE = *SIMULATION_TYPE_f;
-   real deltatover2 = *deltatover2_F;
-   real b_deltatover2 = *b_deltatover2_F;
-  
+   realw deltatover2 = *deltatover2_F;
+   realw b_deltatover2 = *b_deltatover2_F;
+
    int blocksize=128;
    int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
-   
+
    int num_blocks_x = size_padded/blocksize;
    int num_blocks_y = 1;
    while(num_blocks_x > 65535) {
      num_blocks_x = ceil(num_blocks_x/2.0);
      num_blocks_y = num_blocks_y*2;
    }
-   
+
    dim3 grid(num_blocks_x,num_blocks_y);
    dim3 threads(blocksize,1,1);
-   
+
    // check whether we can update accel and veloc, or only accel at this point
    if( *OCEANS == 0 ){
      // updates both, accel and veloc
@@ -1160,14 +1175,14 @@
        kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc, mp->d_b_accel, size, b_deltatover2,mp->d_rmass);
      }
    }else{
-     // updates only accel 
+     // updates only accel
      kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel, size, mp->d_rmass);
-     
+
      if(SIMULATION_TYPE == 3) {
        kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_b_accel, size, mp->d_rmass);
-     }     
+     }
    }
-   
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
    //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
   exit_on_cuda_error("after kernel 3 a");
@@ -1176,21 +1191,21 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(kernel_3_b_cuda,
               KERNEL_3_B_CUDA)(long* Mesh_pointer,
-                             int* size_F, 
-                             float* deltatover2_F, 
-                             int* SIMULATION_TYPE_f, 
+                             int* size_F,
+                             float* deltatover2_F,
+                             int* SIMULATION_TYPE_f,
                              float* b_deltatover2_F) {
-  TRACE("kernel_3_b_cuda");  
-  
+  TRACE("kernel_3_b_cuda");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
   int size = *size_F;
   int SIMULATION_TYPE = *SIMULATION_TYPE_f;
-  real deltatover2 = *deltatover2_F;
-  real b_deltatover2 = *b_deltatover2_F;
-  
+  realw deltatover2 = *deltatover2_F;
+  realw b_deltatover2 = *b_deltatover2_F;
+
   int blocksize=128;
   int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
 
@@ -1203,14 +1218,14 @@
 
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-  
+
   // updates only veloc at this point
   kernel_3_veloc_cuda_device<<< grid, threads>>>(mp->d_veloc,mp->d_accel,size,deltatover2);
-    
+
   if(SIMULATION_TYPE == 3) {
     kernel_3_veloc_cuda_device<<< grid, threads>>>(mp->d_b_veloc,mp->d_b_accel,size,b_deltatover2);
-  }     
-  
+  }
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
   exit_on_cuda_error("after kernel 3 b");
@@ -1220,24 +1235,24 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-/* note: 
+/* note:
  constant arrays when used in compute_forces_acoustic_cuda.cu routines stay zero,
  constant declaration and cudaMemcpyToSymbol would have to be in the same file...
- 
+
  extern keyword doesn't work for __constant__ declarations.
- 
+
  also:
  cudaMemcpyToSymbol("deviceCaseParams", caseParams, sizeof(CaseParams));
  ..
  and compile with -arch=sm_20
- 
+
  see also: http://stackoverflow.com/questions/4008031/how-to-use-cuda-constant-memory-in-a-programmer-pleasant-way
  doesn't seem to work.
- 
+
  we could keep arrays separated for acoustic and elastic routines...
- 
+
  for now, we store pointers with cudaGetSymbolAddress() function calls.
- 
+
  */
 
 
@@ -1245,7 +1260,7 @@
 
 void setConst_hprime_xx(float* array,Mesh* mp)
 {
-  
+
   cudaError_t err = cudaMemcpyToSymbol(d_hprime_xx, array, NGLL2*sizeof(float));
   if (err != cudaSuccess)
   {
@@ -1253,17 +1268,17 @@
     fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
     exit(1);
   }
-  
+
   err = cudaGetSymbolAddress((void**)&(mp->d_hprime_xx),"d_hprime_xx");
   if(err != cudaSuccess) {
     fprintf(stderr, "Error with d_hprime_xx: %s\n", cudaGetErrorString(err));
     exit(1);
-  }      
+  }
 }
 
 void setConst_hprime_yy(float* array,Mesh* mp)
 {
-  
+
   cudaError_t err = cudaMemcpyToSymbol(d_hprime_yy, array, NGLL2*sizeof(float));
   if (err != cudaSuccess)
   {
@@ -1271,17 +1286,17 @@
     fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
     exit(1);
   }
-  
+
   err = cudaGetSymbolAddress((void**)&(mp->d_hprime_yy),"d_hprime_yy");
   if(err != cudaSuccess) {
     fprintf(stderr, "Error with d_hprime_yy: %s\n", cudaGetErrorString(err));
     exit(1);
-  }      
+  }
 }
 
 void setConst_hprime_zz(float* array,Mesh* mp)
 {
-  
+
   cudaError_t err = cudaMemcpyToSymbol(d_hprime_zz, array, NGLL2*sizeof(float));
   if (err != cudaSuccess)
   {
@@ -1289,12 +1304,12 @@
     fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
     exit(1);
   }
-  
+
   err = cudaGetSymbolAddress((void**)&(mp->d_hprime_zz),"d_hprime_zz");
   if(err != cudaSuccess) {
     fprintf(stderr, "Error with d_hprime_zz: %s\n", cudaGetErrorString(err));
     exit(1);
-  }      
+  }
 }
 
 
@@ -1306,12 +1321,12 @@
     fprintf(stderr, "Error in setConst_hprimewgll_xx: %s\n", cudaGetErrorString(err));
     exit(1);
   }
-  
+
   err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_xx),"d_hprimewgll_xx");
   if(err != cudaSuccess) {
     fprintf(stderr, "Error with d_hprimewgll_xx: %s\n", cudaGetErrorString(err));
     exit(1);
-  }        
+  }
 }
 
 void setConst_hprimewgll_yy(float* array,Mesh* mp)
@@ -1322,12 +1337,12 @@
     fprintf(stderr, "Error in setConst_hprimewgll_yy: %s\n", cudaGetErrorString(err));
     exit(1);
   }
-  
+
   err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_yy),"d_hprimewgll_yy");
   if(err != cudaSuccess) {
     fprintf(stderr, "Error with d_hprimewgll_yy: %s\n", cudaGetErrorString(err));
     exit(1);
-  }        
+  }
 }
 
 void setConst_hprimewgll_zz(float* array,Mesh* mp)
@@ -1338,12 +1353,12 @@
     fprintf(stderr, "Error in setConst_hprimewgll_zz: %s\n", cudaGetErrorString(err));
     exit(1);
   }
-  
+
   err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_zz),"d_hprimewgll_zz");
   if(err != cudaSuccess) {
     fprintf(stderr, "Error with d_hprimewgll_zz: %s\n", cudaGetErrorString(err));
     exit(1);
-  }        
+  }
 }
 
 void setConst_wgllwgll_xy(float* array,Mesh* mp)
@@ -1359,8 +1374,8 @@
   if(err != cudaSuccess) {
     fprintf(stderr, "Error with d_wgllwgll_xy: %s\n", cudaGetErrorString(err));
     exit(1);
-  }      
-  
+  }
+
 }
 
 void setConst_wgllwgll_xz(float* array,Mesh* mp)
@@ -1376,8 +1391,8 @@
   if(err != cudaSuccess) {
     fprintf(stderr, "Error with d_wgllwgll_xz: %s\n", cudaGetErrorString(err));
     exit(1);
-  }      
-  
+  }
+
 }
 
 void setConst_wgllwgll_yz(float* array,Mesh* mp)
@@ -1393,21 +1408,21 @@
   if(err != cudaSuccess) {
     fprintf(stderr, "Error with d_wgllwgll_yz: %s\n", cudaGetErrorString(err));
     exit(1);
-  }      
-  
+  }
+
 }
 
 
 /* ----------------------------------------------------------------------------------------------- */
 
-/* KERNEL for ocean load on free surface */
+/* OCEANS load on free surface */
 
 /* ----------------------------------------------------------------------------------------------- */
 
 
-__global__ void elastic_ocean_load_cuda_kernel(float* accel, 
+__global__ void elastic_ocean_load_cuda_kernel(float* accel,
                                                  float* rmass,
-                                                 float* rmass_ocean_load, 
+                                                 float* rmass_ocean_load,
                                                  int num_free_surface_faces,
                                                  int* free_surface_ispec,
                                                  int* free_surface_ijk,
@@ -1416,109 +1431,118 @@
                                                  int* updated_dof_ocean_load) {
   // gets spectral element face id
   int igll = threadIdx.x ;  //  threadIdx.y*blockDim.x will be always = 0 for thread block (25,1,1)
-  int iface = blockIdx.x + gridDim.x*blockIdx.y;   
+  int iface = blockIdx.x + gridDim.x*blockIdx.y;
   realw nx,ny,nz;
   realw force_normal_comp,additional_term;
-  
+
   // for all faces on free surface
   if( iface < num_free_surface_faces ){
-    
+
     int ispec = free_surface_ispec[iface]-1;
-        
-    // gets global point index            
+
+    // gets global point index
     int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface)
     int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
     int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-      
+
     int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
-      
+
+    //if(igll == 0 ) printf("igll %d %d %d %d\n",igll,i,j,k,iglob);
+
     // only update this global point once
     // daniel: todo - workaround to not use the temporary update array
     //            atomicExch returns the old value, i.e. 0 indicates that we still have to do this point
+
+
+    //if(igll == 0 ) printf("updated_dof %d %d\n",iglob,updated_dof_ocean_load[iglob]);
+
     if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){
-      
+
       // get normal
       nx = free_surface_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; //(1,igll,iface)
       ny = free_surface_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
-      nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];      
-      
+      nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
+
       // make updated component of right-hand side
       // we divide by rmass() which is 1 / M
       // we use the total force which includes the Coriolis term above
       force_normal_comp = ( accel[iglob*3]*nx + accel[iglob*3+1]*ny + accel[iglob*3+2]*nz ) / rmass[iglob];
-      
+
       additional_term = (rmass_ocean_load[iglob] - rmass[iglob]) * force_normal_comp;
-      
+
       // daniel: probably wouldn't need atomicAdd anymore, but just to be sure...
       atomicAdd(&accel[iglob*3], + additional_term * nx);
       atomicAdd(&accel[iglob*3+1], + additional_term * ny);
-      atomicAdd(&accel[iglob*3+2], + additional_term * nz);      
+      atomicAdd(&accel[iglob*3+2], + additional_term * nz);
     }
-  }  
+  }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
 void FC_FUNC_(elastic_ocean_load_cuda,
-              ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f, 
+              ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f,
                                        int* SIMULATION_TYPE) {
-  
+
 TRACE("elastic_ocean_load_cuda");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
   // checks if anything to do
   if( mp->num_free_surface_faces == 0 ) return;
-  
+
   // block sizes: exact blocksize to match NGLLSQUARE
   int blocksize = 25;
-  
+
   int num_blocks_x = mp->num_free_surface_faces;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
 
-  // temporary global array: used to synchronize updates on global accel array
-  int* d_updated_dof_ocean_load;
-  print_CUDA_error_if_any(cudaMalloc((void**)&(d_updated_dof_ocean_load),sizeof(int)*mp->NGLOB_AB),88501);
-  // initializes array
-  cudaMemset((void*)d_updated_dof_ocean_load,0,sizeof(int)*mp->NGLOB_AB);
-  
-  elastic_ocean_load_cuda_kernel<<<grid,threads>>>(mp->d_accel, 
-                                                   mp->d_rmass, 
+
+  // initializes temporary array to zero
+  print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
+                                     sizeof(int)*mp->NGLOB_AB),88501);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("before kernel elastic_ocean_load_cuda");
+#endif
+
+  elastic_ocean_load_cuda_kernel<<<grid,threads>>>(mp->d_accel,
+                                                   mp->d_rmass,
                                                    mp->d_rmass_ocean_load,
                                                    mp->num_free_surface_faces,
-                                                   mp->d_free_surface_ispec, 
+                                                   mp->d_free_surface_ispec,
                                                    mp->d_free_surface_ijk,
                                                    mp->d_free_surface_normal,
                                                    mp->d_ibool,
-                                                   d_updated_dof_ocean_load);
+                                                   mp->d_updated_dof_ocean_load);
   // for backward/reconstructed potentials
   if(*SIMULATION_TYPE == 3) {
     // re-initializes array
-    cudaMemset(d_updated_dof_ocean_load,0,sizeof(int)*mp->NGLOB_AB);
+    print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
+                                       sizeof(int)*mp->NGLOB_AB),88502);
 
-    elastic_ocean_load_cuda_kernel<<<grid,threads>>>(mp->d_b_accel, 
-                                                       mp->d_rmass, 
+    elastic_ocean_load_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
+                                                       mp->d_rmass,
                                                        mp->d_rmass_ocean_load,
                                                        mp->num_free_surface_faces,
-                                                       mp->d_free_surface_ispec, 
+                                                       mp->d_free_surface_ispec,
                                                        mp->d_free_surface_ijk,
                                                        mp->d_free_surface_normal,
                                                        mp->d_ibool,
-                                                       d_updated_dof_ocean_load);      
-      
+                                                       mp->d_updated_dof_ocean_load);
+
   }
-  
-  cudaFree(d_updated_dof_ocean_load);
-  
+
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("enforce_free_surface_cuda");
-#endif  
+  exit_on_cuda_error("elastic_ocean_load_cuda");
+#endif
 }

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -45,20 +45,21 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void compute_kernels_cudakernel(int* ispec_is_elastic, int* ibool,
+__global__ void compute_kernels_cudakernel(int* ispec_is_elastic,
+                                           int* ibool,
                                            float* accel,
                                            float* b_displ,
-                                           float* epsilondev_xx,  
-                                           float* epsilondev_yy,  
-                                           float* epsilondev_xy,  
-                                           float* epsilondev_xz,  
-                                           float* epsilondev_yz,  
+                                           float* epsilondev_xx,
+                                           float* epsilondev_yy,
+                                           float* epsilondev_xy,
+                                           float* epsilondev_xz,
+                                           float* epsilondev_yz,
                                            float* b_epsilondev_xx,
                                            float* b_epsilondev_yy,
                                            float* b_epsilondev_xy,
                                            float* b_epsilondev_xz,
                                            float* b_epsilondev_yz,
-                                           float* rho_kl,					   
+                                           float* rho_kl,
                                            float deltat,
                                            float* mu_kl,
                                            float* kappa_kl,
@@ -70,33 +71,33 @@
   int ispec = blockIdx.x + blockIdx.y*gridDim.x;
 
   // handles case when there is 1 extra block (due to rectangular grid)
-  if(ispec < NSPEC_AB) { 
+  if(ispec < NSPEC_AB) {
 
     // elastic elements only
-    if(ispec_is_elastic[ispec] == 1) { 
-  
+    if( ispec_is_elastic[ispec] ) {
+
       int ijk = threadIdx.x;
       int ijk_ispec = ijk + 125*ispec;
       int iglob = ibool[ijk_ispec] - 1 ;
 
-      // debug      
+      // debug
 //      if(ijk_ispec == 9480531) {
-//      	d_debug[0] = rho_kl[ijk_ispec];
-//      	d_debug[1] = accel[3*iglob];
-//      	d_debug[2] = b_displ[3*iglob];
+//        d_debug[0] = rho_kl[ijk_ispec];
+//        d_debug[1] = accel[3*iglob];
+//        d_debug[2] = b_displ[3*iglob];
 //        d_debug[3] = deltat * (accel[3*iglob]*b_displ[3*iglob]+
 //                               accel[3*iglob+1]*b_displ[3*iglob+1]+
 //                               accel[3*iglob+2]*b_displ[3*iglob+2]);
 //      }
-      
-      
-      // isotropic kernels:      
+
+
+      // isotropic kernels:
       // density kernel
       rho_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_displ[3*iglob]+
                                      accel[3*iglob+1]*b_displ[3*iglob+1]+
                                      accel[3*iglob+2]*b_displ[3*iglob+2]);
 
-      
+
       // debug
       // if(rho_kl[ijk_ispec] < 1.9983e+18) {
       // atomicAdd(&d_debug[3],1.0);
@@ -105,7 +106,7 @@
       // d_debug[1] = accel[3*iglob];
       // d_debug[2] = b_displ[3*iglob];
       // }
-      
+
       // shear modulus kernel
       mu_kl[ijk_ispec] += deltat * (epsilondev_xx[ijk_ispec]*b_epsilondev_xx[ijk_ispec]+ // 1*b1
                                     epsilondev_yy[ijk_ispec]*b_epsilondev_yy[ijk_ispec]+ // 2*b2
@@ -114,37 +115,38 @@
                                     2*(epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+
                                        epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+
                                        epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec]));
-      
+
       // bulk modulus kernel
       kappa_kl[ijk_ispec] += deltat*(9*epsilon_trace_over_3[ijk_ispec]*
                                      b_epsilon_trace_over_3[ijk_ispec]);
-    
+
     }
   }
 }
-					   
-/* ----------------------------------------------------------------------------------------------- */					   
 
-extern "C" 
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
 void FC_FUNC_(compute_kernels_elastic_cuda,
               COMPUTE_KERNELS_ELASTIC_CUDA)(long* Mesh_pointer,
-                                            float* deltat) {
+                                            float* deltat_f) {
 TRACE("compute_kernels_elastic_cuda");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
 
   int blocksize = 125; // NGLLX*NGLLY*NGLLZ
-  
+  float deltat = *deltat_f;
+
   int num_blocks_x = mp->NSPEC_AB;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-  
+
   float* d_debug;
   /*
   float* h_debug;
@@ -152,7 +154,7 @@
   cudaMalloc((void**)&d_debug,128*sizeof(float));
   cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
   */
-  
+
   compute_kernels_cudakernel<<<grid,threads>>>(mp->d_ispec_is_elastic,mp->d_ibool,
                                                mp->d_accel, mp->d_b_displ,
                                                mp->d_epsilondev_xx,
@@ -166,7 +168,7 @@
                                                mp->d_b_epsilondev_xz,
                                                mp->d_b_epsilondev_yz,
                                                mp->d_rho_kl,
-                                               *deltat,
+                                               deltat,
                                                mp->d_mu_kl,
                                                mp->d_kappa_kl,
                                                mp->d_epsilon_trace_over_3,
@@ -193,9 +195,9 @@
   // number_big_values++;
   // }
   // }
-  
+
   // printf("maval rho = %e, number>1e10 = %d vs. %d\n",maxval,number_big_values,mp->NSPEC_AB*125);
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("compute_kernels_elastic_cuda");
 #endif
@@ -209,33 +211,33 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 
-__global__ void compute_kernels_strength_noise_cuda_kernel(float* displ, 
+__global__ void compute_kernels_strength_noise_cuda_kernel(float* displ,
                                                            int* free_surface_ispec,
                                                            int* free_surface_ijk,
-                                                           int* ibool, 
-                                                           float* noise_surface_movie, 
-                                                           float* normal_x_noise, 
-                                                           float* normal_y_noise, 
-                                                           float* normal_z_noise, 
-                                                           float* Sigma_kl, 
+                                                           int* ibool,
+                                                           float* noise_surface_movie,
+                                                           float* normal_x_noise,
+                                                           float* normal_y_noise,
+                                                           float* normal_z_noise,
+                                                           float* Sigma_kl,
                                                            float deltat,
-                                                           int num_free_surface_faces, 
+                                                           int num_free_surface_faces,
                                                            float* d_debug) {
   int iface = blockIdx.x + blockIdx.y*gridDim.x;
-  
+
   if(iface < num_free_surface_faces) {
 
     int ispec = free_surface_ispec[iface]-1;
-    int igll = threadIdx.x;        
+    int igll = threadIdx.x;
     int ipoin = igll + 25*iface;
     int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1 ;
     int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
     int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-    
+
     int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1 ;
-    
+
     float eta = ( noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)]*normal_x_noise[ipoin]+
-                 noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)]*normal_y_noise[ipoin]+ 
+                 noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)]*normal_y_noise[ipoin]+
                  noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)]*normal_z_noise[ipoin]);
 
     // if(ijk_ispec == 78496) {
@@ -244,29 +246,29 @@
     //   d_debug[2] = normal_x_noise[ipoin];
     //   d_debug[3] = normal_y_noise[ipoin];
     //   d_debug[4] = normal_z_noise[ipoin];
-    //   d_debug[5] = displ[3*iglob+2];      
+    //   d_debug[5] = displ[3*iglob+2];
     //   d_debug[6] = deltat*eta*normal_z_noise[ipoin]*displ[2+3*iglob];
     //   d_debug[7] = 0.008*1.000000e-24*normal_z_noise[ipoin]*3.740546e-13;
     // }
-    
+
     Sigma_kl[INDEX4(5,5,5,i,j,k,ispec)] += deltat*eta*(normal_x_noise[ipoin]*displ[3*iglob]+
                                                        normal_y_noise[ipoin]*displ[1+3*iglob]+
                                                        normal_z_noise[ipoin]*displ[2+3*iglob]);
   }
-    
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(compute_kernels_strength_noise_cuda,
-              COMPUTE_KERNELS_STRENGTH_NOISE_CUDA)(long* Mesh_pointer, 
+extern "C"
+void FC_FUNC_(compute_kernels_strgth_noise_cu,
+              COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
                                                     float* h_noise_surface_movie,
                                                     int* num_free_surface_faces_f,
                                                     float* deltat) {
 
-TRACE("compute_kernels_strength_noise_cuda");
-                                                    
+TRACE("compute_kernels_strgth_noise_cu");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
   int num_free_surface_faces = *num_free_surface_faces_f;
 
@@ -287,7 +289,7 @@
   float* d_debug;
   // cudaMalloc((void**)&d_debug,128*sizeof(float));
   // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-  
+
   compute_kernels_strength_noise_cuda_kernel<<<grid,threads>>>(mp->d_displ,
                                                                mp->d_free_surface_ispec,
                                                                mp->d_free_surface_ijk,
@@ -304,11 +306,11 @@
   // for(int i=0;i<8;i++) {
   //   printf("debug[%d]= %e\n",i,h_debug[i]);
   // }
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("compute_kernels_strength_noise_cuda_kernel");
 #endif
-  
+
 }
 
 
@@ -327,30 +329,30 @@
                                         float* hprime_xx,
                                         float* hprime_yy,
                                         float* hprime_zz,
-                                        float* d_xix, 
-                                        float* d_xiy, 
-                                        float* d_xiz, 
-                                        float* d_etax, 
-                                        float* d_etay, 
-                                        float* d_etaz, 
-                                        float* d_gammax, 
-                                        float* d_gammay, 
+                                        float* d_xix,
+                                        float* d_xiy,
+                                        float* d_xiz,
+                                        float* d_etax,
+                                        float* d_etay,
+                                        float* d_etaz,
+                                        float* d_gammax,
+                                        float* d_gammay,
                                         float* d_gammaz,
                                         float rhol) {
-  
+
   float temp1l,temp2l,temp3l;
   float hp1,hp2,hp3;
   float xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl;
   float rho_invl;
   int l,offset,offset1,offset2,offset3;
-  
+
   //const int NGLLX = 5;
   const int NGLL3_ALIGN = 128;
-  
+
   int K = (ijk/NGLL2);
   int J = ((ijk-K*NGLL2)/NGLLX);
   int I = (ijk-K*NGLL2-J*NGLLX);
-  
+
   // derivative along x
   temp1l = 0.f;
   for( l=0; l<NGLLX;l++){
@@ -358,7 +360,7 @@
     offset1 = K*NGLL2+J*NGLLX+l;
     temp1l += scalar_field[offset1]*hp1;
   }
-  
+
   // derivative along y
   temp2l = 0.f;
   for( l=0; l<NGLLX;l++){
@@ -366,18 +368,18 @@
     offset2 = K*NGLL2+l*NGLLX+I;
     temp2l += scalar_field[offset2]*hp2;
   }
-  
-  // derivative along z    
+
+  // derivative along z
   temp3l = 0.f;
   for( l=0; l<NGLLX;l++){
     hp3 = hprime_zz[l*NGLLX+K];
     offset3 = l*NGLL2+J*NGLLX+I;
     temp3l += scalar_field[offset3]*hp3;
-    
+
   }
-  
+
   offset = ispec*NGLL3_ALIGN + ijk;
-  
+
   xixl = d_xix[offset];
   xiyl = d_xiy[offset];
   xizl = d_xiz[offset];
@@ -387,149 +389,326 @@
   gammaxl = d_gammax[offset];
   gammayl = d_gammay[offset];
   gammazl = d_gammaz[offset];
-  
+
   rho_invl = 1.0f / rhol;
-  
+
   // derivatives of acoustic scalar potential field on GLL points
   vector_field_element[0] = (temp1l*xixl + temp2l*etaxl + temp3l*gammaxl) * rho_invl;
   vector_field_element[1] = (temp1l*xiyl + temp2l*etayl + temp3l*gammayl) * rho_invl;
-  vector_field_element[2] = (temp1l*xizl + temp2l*etazl + temp3l*gammazl) * rho_invl;  
-  
+  vector_field_element[2] = (temp1l*xizl + temp2l*etazl + temp3l*gammazl) * rho_invl;
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 
-__global__ void compute_kernels_acoustic_kernel(int* ispec_is_acoustic, 
+__global__ void compute_kernels_acoustic_kernel(int* ispec_is_acoustic,
                                                 int* ibool,
                                                 float* rhostore,
                                                 float* kappastore,
                                                 float* hprime_xx,
                                                 float* hprime_yy,
                                                 float* hprime_zz,
-                                                float* d_xix, 
-                                                float* d_xiy, 
-                                                float* d_xiz, 
-                                                float* d_etax, 
-                                                float* d_etay, 
-                                                float* d_etaz, 
-                                                float* d_gammax, 
-                                                float* d_gammay, 
-                                                float* d_gammaz,                                                
+                                                float* d_xix,
+                                                float* d_xiy,
+                                                float* d_xiz,
+                                                float* d_etax,
+                                                float* d_etay,
+                                                float* d_etaz,
+                                                float* d_gammax,
+                                                float* d_gammay,
+                                                float* d_gammaz,
                                                 float* potential_dot_dot_acoustic,
                                                 float* b_potential_acoustic,
                                                 float* b_potential_dot_dot_acoustic,
-                                                float* rho_ac_kl,					   
+                                                float* rho_ac_kl,
                                                 float* kappa_ac_kl,
                                                 float deltat,
                                                 int NSPEC_AB) {
-  
+
   int ispec = blockIdx.x + blockIdx.y*gridDim.x;
 
-  // handles case when there is 1 extra block (due to rectangular grid)    
+  // handles case when there is 1 extra block (due to rectangular grid)
   if( ispec < NSPEC_AB ){
-  
+
     // acoustic elements only
-    if( ispec_is_acoustic[ispec] == 1) { 
-    
+    if( ispec_is_acoustic[ispec] ) {
+
       int ijk = threadIdx.x;
-      
+
       // local and global indices
       int ijk_ispec = ijk + 125*ispec;
       int ijk_ispec_padded = ijk + 128*ispec;
       int iglob = ibool[ijk_ispec] - 1;
-      
+
       float accel_elm[3];
       float b_displ_elm[3];
       float rhol,kappal;
-      
+
       // shared memory between all threads within this block
-      __shared__ float scalar_field_displ[125];    
-      __shared__ float scalar_field_accel[125];          
-      
+      __shared__ float scalar_field_displ[125];
+      __shared__ float scalar_field_accel[125];
+
       // copy field values
       scalar_field_displ[ijk] = b_potential_acoustic[iglob];
       scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob];
       __syncthreads();
-      
+
       // gets material parameter
       rhol = rhostore[ijk_ispec_padded];
-      
+
       // displacement vector from backward field
       compute_gradient_kernel(ijk,ispec,scalar_field_displ,b_displ_elm,
                               hprime_xx,hprime_yy,hprime_zz,
                               d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
                               rhol);
-      
+
       // acceleration vector
       compute_gradient_kernel(ijk,ispec,scalar_field_accel,accel_elm,
                               hprime_xx,hprime_yy,hprime_zz,
                               d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
                               rhol);
-      
+
       // density kernel
       rho_ac_kl[ijk_ispec] -= deltat * rhol * (accel_elm[0]*b_displ_elm[0] +
                                                accel_elm[1]*b_displ_elm[1] +
                                                accel_elm[2]*b_displ_elm[2]);
-      
+
       // bulk modulus kernel
       kappal = kappastore[ijk_ispec];
-      kappa_ac_kl[ijk_ispec] -= deltat / kappal * potential_dot_dot_acoustic[iglob] 
-                                                * b_potential_dot_dot_acoustic[iglob];    
+      kappa_ac_kl[ijk_ispec] -= deltat / kappal * potential_dot_dot_acoustic[iglob]
+                                                * b_potential_dot_dot_acoustic[iglob];
     }
-  }  
+  }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 
-extern "C" 
+extern "C"
 void FC_FUNC_(compute_kernels_acoustic_cuda,
               COMPUTE_KERNELS_ACOUSTIC_CUDA)(
-                                             long* Mesh_pointer, 
-                                             float* deltat) {
-  
+                                             long* Mesh_pointer,
+                                             float* deltat_f) {
+
 TRACE("compute_kernels_acoustic_cuda");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-  
+
   int blocksize = 125; // NGLLX*NGLLY*NGLLZ
+  float deltat = *deltat_f;
+
   int num_blocks_x = mp->NSPEC_AB;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-  
+
   compute_kernels_acoustic_kernel<<<grid,threads>>>(mp->d_ispec_is_acoustic,
                                                     mp->d_ibool,
                                                     mp->d_rhostore,
                                                     mp->d_kappastore,
                                                     mp->d_hprime_xx,
                                                     mp->d_hprime_yy,
-                                                    mp->d_hprime_zz,                                                    
-                                                    mp->d_xix, 
-                                                    mp->d_xiy, 
+                                                    mp->d_hprime_zz,
+                                                    mp->d_xix,
+                                                    mp->d_xiy,
                                                     mp->d_xiz,
-                                                    mp->d_etax, 
-                                                    mp->d_etay, 
+                                                    mp->d_etax,
+                                                    mp->d_etay,
                                                     mp->d_etaz,
-                                                    mp->d_gammax, 
-                                                    mp->d_gammay, 
-                                                    mp->d_gammaz,                                                    
-                                                    mp->d_potential_dot_dot_acoustic, 
+                                                    mp->d_gammax,
+                                                    mp->d_gammay,
+                                                    mp->d_gammaz,
+                                                    mp->d_potential_dot_dot_acoustic,
                                                     mp->d_b_potential_acoustic,
                                                     mp->d_b_potential_dot_dot_acoustic,
                                                     mp->d_rho_ac_kl,
                                                     mp->d_kappa_ac_kl,
-                                                    *deltat,
+                                                    deltat,
                                                     mp->NSPEC_AB);
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("compute_kernels_acoustic_kernel");
 #endif
 }
 
+/* ----------------------------------------------------------------------------------------------- */
+
+// preconditioner (approximate Hessian kernel)
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_kernels_hess_el_cudakernel(int* ispec_is_elastic,
+                                                   int* ibool,
+                                                   float* accel,
+                                                   float* b_accel,
+                                                   float* hess_kl,
+                                                   float deltat,
+                                                   int NSPEC_AB) {
+
+  int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+
+  // handles case when there is 1 extra block (due to rectangular grid)
+  if(ispec < NSPEC_AB) {
+
+    // elastic elements only
+    if( ispec_is_elastic[ispec] ) {
+
+      int ijk = threadIdx.x;
+      int ijk_ispec = ijk + 125*ispec;
+      int iglob = ibool[ijk_ispec] - 1 ;
+
+      // approximate hessian
+      hess_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_accel[3*iglob]+
+                                      accel[3*iglob+1]*b_accel[3*iglob+1]+
+                                      accel[3*iglob+2]*b_accel[3*iglob+2]);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_kernels_hess_ac_cudakernel(int* ispec_is_acoustic,
+                                                   int* ibool,
+                                                   float* potential_dot_dot_acoustic,
+                                                   float* b_potential_dot_dot_acoustic,
+                                                   float* rhostore,
+                                                   float* hprime_xx,
+                                                   float* hprime_yy,
+                                                   float* hprime_zz,
+                                                   float* d_xix,
+                                                   float* d_xiy,
+                                                   float* d_xiz,
+                                                   float* d_etax,
+                                                   float* d_etay,
+                                                   float* d_etaz,
+                                                   float* d_gammax,
+                                                   float* d_gammay,
+                                                   float* d_gammaz,
+                                                   float* hess_kl,
+                                                   float deltat,
+                                                   int NSPEC_AB) {
+
+  int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+
+  // handles case when there is 1 extra block (due to rectangular grid)
+  if(ispec < NSPEC_AB) {
+
+    // acoustic elements only
+    if( ispec_is_acoustic[ispec] ){
+
+      // local and global indices
+      int ijk = threadIdx.x;
+      int ijk_ispec = ijk + 125*ispec;
+      int iglob = ibool[ijk_ispec] - 1 ;
+
+      int ijk_ispec_padded = ijk + 128*ispec;
+
+      float accel_elm[3];
+      float b_accel_elm[3];
+      float rhol;
+
+      // shared memory between all threads within this block
+      __shared__ float scalar_field_accel[125];
+      __shared__ float scalar_field_b_accel[125];
+
+      // copy field values
+      scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob];
+      scalar_field_b_accel[ijk] = b_potential_dot_dot_acoustic[iglob];
+      __syncthreads();
+
+      // gets material parameter
+      rhol = rhostore[ijk_ispec_padded];
+
+      // acceleration vector
+      compute_gradient_kernel(ijk,ispec,
+                              scalar_field_accel,accel_elm,
+                              hprime_xx,hprime_yy,hprime_zz,
+                              d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
+                              rhol);
+
+      // acceleration vector from backward field
+      compute_gradient_kernel(ijk,ispec,
+                              scalar_field_b_accel,b_accel_elm,
+                              hprime_xx,hprime_yy,hprime_zz,
+                              d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
+                              rhol);
+      // approximates hessian
+      hess_kl[ijk_ispec] += deltat * (accel_elm[0]*b_accel_elm[0] +
+                                      accel_elm[1]*b_accel_elm[1] +
+                                      accel_elm[2]*b_accel_elm[2]);
+
+    } // ispec_is_acoustic
+
+  }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_kernels_hess_cuda,
+              COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
+                                         float* deltat_f,
+                                         int* ELASTIC_SIMULATION,
+                                         int* ACOUSTIC_SIMULATION) {
+  TRACE("compute_kernels_hess_cuda");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+  int blocksize = 125; // NGLLX*NGLLY*NGLLZ
+  float deltat = *deltat_f;
+
+  int num_blocks_x = mp->NSPEC_AB;
+  int num_blocks_y = 1;
+  while(num_blocks_x > 65535) {
+    num_blocks_x = ceil(num_blocks_x/2.0);
+    num_blocks_y = num_blocks_y*2;
+  }
+
+  dim3 grid(num_blocks_x,num_blocks_y);
+  dim3 threads(blocksize,1,1);
+
+  if( *ELASTIC_SIMULATION ) {
+    compute_kernels_hess_el_cudakernel<<<grid,threads>>>(mp->d_ispec_is_elastic,
+                                                         mp->d_ibool,
+                                                         mp->d_accel,
+                                                         mp->d_b_accel,
+                                                         mp->d_hess_el_kl,
+                                                         deltat,
+                                                         mp->NSPEC_AB);
+  }
+
+  if( *ACOUSTIC_SIMULATION ) {
+    compute_kernels_hess_ac_cudakernel<<<grid,threads>>>(mp->d_ispec_is_acoustic,
+                                                         mp->d_ibool,
+                                                         mp->d_potential_dot_dot_acoustic,
+                                                         mp->d_b_potential_dot_dot_acoustic,
+                                                         mp->d_rhostore,
+                                                         mp->d_hprime_xx,
+                                                         mp->d_hprime_yy,
+                                                         mp->d_hprime_zz,
+                                                         mp->d_xix,
+                                                         mp->d_xiy,
+                                                         mp->d_xiz,
+                                                         mp->d_etax,
+                                                         mp->d_etay,
+                                                         mp->d_etaz,
+                                                         mp->d_gammax,
+                                                         mp->d_gammay,
+                                                         mp->d_gammaz,
+                                                         mp->d_hess_ac_kl,
+                                                         deltat,
+                                                         mp->NSPEC_AB);
+  }
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("compute_kernels_hess_cuda");
+#endif
+}
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -40,72 +40,71 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void compute_stacey_acoustic_kernel(float* potential_dot_acoustic, 
-                                               float* potential_dot_dot_acoustic, 
+__global__ void compute_stacey_acoustic_kernel(float* potential_dot_acoustic,
+                                               float* potential_dot_dot_acoustic,
                                                int* abs_boundary_ispec,
-                                               int* abs_boundary_ijk, 
-                                               real* abs_boundary_jacobian2Dw,
+                                               int* abs_boundary_ijk,
+                                               realw* abs_boundary_jacobian2Dw,
                                                int* ibool,
                                                float* rhostore,
                                                float* kappastore,
-                                               int* ispec_is_inner, 
+                                               int* ispec_is_inner,
                                                int* ispec_is_acoustic,
                                                int phase_is_inner,
                                                int SIMULATION_TYPE, int SAVE_FORWARD,
                                                int num_abs_boundary_faces,
                                                float* b_potential_dot_acoustic,
                                                float* b_potential_dot_dot_acoustic,
-                                               float* b_absorb_potential 
+                                               float* b_absorb_potential
                                                ) {
 
-  int igll = threadIdx.x; 
-  int iface = blockIdx.x + gridDim.x*blockIdx.y; 
-  
+  int igll = threadIdx.x;
+  int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
   int i,j,k,iglob,ispec;
   realw rhol,kappal,cpl;
   realw jacobianw;
-  
-  
-  // don't compute points outside NGLLSQUARE==NGLL2==25  
+
+
+  // don't compute points outside NGLLSQUARE==NGLL2==25
   // way 2: no further check needed since blocksize = 25
   if( iface < num_abs_boundary_faces){
-  
-  //  if(igll<NGLL2 && iface < num_abs_boundary_faces) {    
-    
+
+  //  if(igll<NGLL2 && iface < num_abs_boundary_faces) {
+
     // "-1" from index values to convert from Fortran-> C indexing
     ispec = abs_boundary_ispec[iface]-1;
-    
-    if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec]==1) {
 
+    if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] ) {
+
       i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
       j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
       k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
       iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-      
+
       // determines bulk sound speed
       rhol = rhostore[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
 
       kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
 
       cpl = sqrt( kappal / rhol );
-            
-      // gets associated, weighted jacobian      
-      jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];            
 
+      // gets associated, weighted jacobian
+      jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
 //daniel
 //if( igll == 0 ) printf("gpu: %i %i %i %i %i %e %e %e\n",i,j,k,ispec,iglob,rhol,kappal,jacobianw);
-         
+
       // Sommerfeld condition
-      atomicAdd(&potential_dot_dot_acoustic[iglob],-potential_dot_acoustic[iglob]*jacobianw/cpl/rhol);      
-      
+      atomicAdd(&potential_dot_dot_acoustic[iglob],-potential_dot_acoustic[iglob]*jacobianw/cpl/rhol);
+
       // adjoint simulations
       if( SIMULATION_TYPE == 3 ){
         // Sommerfeld condition
         atomicAdd(&b_potential_dot_dot_acoustic[iglob],-b_absorb_potential[INDEX2(NGLL2,igll,iface)]);
-      }else if( SIMULATION_TYPE == 1 && SAVE_FORWARD == 1 ){  
-         b_absorb_potential[INDEX2(NGLL2,igll,iface)] = potential_dot_acoustic[iglob]*jacobianw/cpl/rhol; 
+      }else if( SIMULATION_TYPE == 1 && SAVE_FORWARD ){
+         b_absorb_potential[INDEX2(NGLL2,igll,iface)] = potential_dot_acoustic[iglob]*jacobianw/cpl/rhol;
       }
-      
     }
 //  }
   }
@@ -113,12 +112,12 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
+extern "C"
 void FC_FUNC_(compute_stacey_acoustic_cuda,
               COMPUTE_STACEY_ACOUSTIC_CUDA)(
-                                    long* Mesh_pointer_f, 
-                                    int* phase_is_innerf, 
-                                    int* SIMULATION_TYPEf, 
+                                    long* Mesh_pointer_f,
+                                    int* phase_is_innerf,
+                                    int* SIMULATION_TYPEf,
                                     int* SAVE_FORWARDf,
                                     float* h_b_absorb_potential) {
 TRACE("compute_stacey_acoustic_cuda");
@@ -127,16 +126,16 @@
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
   int phase_is_inner          = *phase_is_innerf;
   int SIMULATION_TYPE         = *SIMULATION_TYPEf;
-  int SAVE_FORWARD            = *SAVE_FORWARDf;              
+  int SAVE_FORWARD            = *SAVE_FORWARDf;
 
   // way 1: Elapsed time: 4.385948e-03
   // > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
-  //  int blocksize = 32; 
-  
+  //  int blocksize = 32;
+
   // way 2: Elapsed time: 4.379034e-03
   // > NGLLSQUARE==NGLL2==25, no further check inside kernel
-  int blocksize = 25; 
-  
+  int blocksize = 25;
+
   int num_blocks_x = mp->d_num_abs_boundary_faces;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
@@ -148,37 +147,37 @@
   dim3 threads(blocksize,1,1);
 
   //  adjoint simulations: reads in absorbing boundary
-  if (SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0 ){  
+  if (SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0 ){
     // copies array to GPU
     print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,h_b_absorb_potential,
-                                       mp->d_b_reclen_potential,cudaMemcpyHostToDevice),7700);    
+                                       mp->d_b_reclen_potential,cudaMemcpyHostToDevice),7700);
   }
-      
+
   compute_stacey_acoustic_kernel<<<grid,threads>>>(mp->d_potential_dot_acoustic,
                                                    mp->d_potential_dot_dot_acoustic,
-                                                   mp->d_abs_boundary_ispec, 
-                                                   mp->d_abs_boundary_ijk, 
-                                                   mp->d_abs_boundary_jacobian2Dw, 
-                                                   mp->d_ibool, 
-                                                   mp->d_rhostore, 
-                                                   mp->d_kappastore, 
-                                                   mp->d_ispec_is_inner, 
-                                                   mp->d_ispec_is_acoustic, 
+                                                   mp->d_abs_boundary_ispec,
+                                                   mp->d_abs_boundary_ijk,
+                                                   mp->d_abs_boundary_jacobian2Dw,
+                                                   mp->d_ibool,
+                                                   mp->d_rhostore,
+                                                   mp->d_kappastore,
+                                                   mp->d_ispec_is_inner,
+                                                   mp->d_ispec_is_acoustic,
                                                    phase_is_inner,
                                                    SIMULATION_TYPE,SAVE_FORWARD,
                                                    mp->d_num_abs_boundary_faces,
                                                    mp->d_b_potential_dot_acoustic,
                                                    mp->d_b_potential_dot_dot_acoustic,
                                                    mp->d_b_absorb_potential);
-  
+
   //  adjoint simulations: stores absorbed wavefield part
-  if (SIMULATION_TYPE == 1 && SAVE_FORWARD == 1 && mp->d_num_abs_boundary_faces > 0 ){  
-    // copies array to CPU  
+  if (SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ){
+    // copies array to CPU
     print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_potential,mp->d_b_absorb_potential,
                                        mp->d_b_reclen_potential,cudaMemcpyDeviceToHost),7701);
   }
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING  
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("compute_stacey_acoustic_kernel");
 #endif
 }

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -40,81 +40,74 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void compute_stacey_elastic_kernel(real* veloc, 
-                                              real* accel, 
+__global__ void compute_stacey_elastic_kernel(realw* veloc,
+                                              realw* accel,
                                               int* abs_boundary_ispec,
-                                              int* abs_boundary_ijk, 
-                                              real* abs_boundary_normal,
-                                              real* abs_boundary_jacobian2Dw,
+                                              int* abs_boundary_ijk,
+                                              realw* abs_boundary_normal,
+                                              realw* abs_boundary_jacobian2Dw,
                                               int* ibool,
-                                              real* rho_vp, 
-                                              real* rho_vs,
-                                              int* ispec_is_inner, 
+                                              realw* rho_vp,
+                                              realw* rho_vs,
+                                              int* ispec_is_inner,
                                               int* ispec_is_elastic,
                                               int phase_is_inner,
-                                              int SIMULATION_TYPE,int SAVE_FORWARD,
+                                              int SIMULATION_TYPE,
+                                              int SAVE_FORWARD,
                                               int num_abs_boundary_faces,
-                                              real* b_accel, 
-                                              real* b_absorb_field,                                              
-                                              float* debug_val,
-                                              int* debug_val_int                                              
+                                              realw* b_accel,
+                                              realw* b_absorb_field //,float* debug_val,int* debug_val_int
                                               ) {
 
   int igll = threadIdx.x; // tx
   int iface = blockIdx.x + gridDim.x*blockIdx.y; // bx
-  int i;
-  int j;
-  int k;
-  int iglob;
-  int ispec;
+
+  int i,j,k,iglob,ispec;
   realw vx,vy,vz,vn;
   realw nx,ny,nz;
   realw rho_vp_temp,rho_vs_temp;
   realw tx,ty,tz;
   realw jacobianw;
-  
 
-  // don't compute points outside NGLLSQUARE==NGLL2==25  
+  // don't compute points outside NGLLSQUARE==NGLL2==25
   // way 2: no further check needed since blocksize = 25
   if( iface < num_abs_boundary_faces){
-    
-  //if(igll < NGLL2 && iface < num_abs_boundary_faces) {    
-    
+
+  //if(igll < NGLL2 && iface < num_abs_boundary_faces) {
+
     // "-1" from index values to convert from Fortran-> C indexing
     ispec = abs_boundary_ispec[iface]-1;
-    i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
-    j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
-    k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
-    iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
-    
-    if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec]==1) {
 
+    if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] ) {
+
       i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
       j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
       k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
       iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
-      
+
       // gets associated velocity
-      
+
       vx = veloc[iglob*3+0];
       vy = veloc[iglob*3+1];
       vz = veloc[iglob*3+2];
-      
+
       // gets associated normal
       nx = abs_boundary_normal[INDEX3(NDIM,NGLL2,0,igll,iface)];
       ny = abs_boundary_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
       nz = abs_boundary_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
-      
+
       // // velocity component in normal direction (normal points out of element)
       vn = vx*nx + vy*ny + vz*nz;
+
       rho_vp_temp = rho_vp[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
       rho_vs_temp = rho_vs[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+
       tx = rho_vp_temp*vn*nx + rho_vs_temp*(vx-vn*nx);
       ty = rho_vp_temp*vn*ny + rho_vs_temp*(vy-vn*ny);
       tz = rho_vp_temp*vn*nz + rho_vs_temp*(vz-vn*nz);
-      
-      jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];            
-   
+
+      jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
       atomicAdd(&accel[iglob*3],-tx*jacobianw);
       atomicAdd(&accel[iglob*3+1],-ty*jacobianw);
       atomicAdd(&accel[iglob*3+2],-tz*jacobianw);
@@ -128,10 +121,10 @@
         b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)] = tx*jacobianw;
         b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)] = ty*jacobianw;
         b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)] = tz*jacobianw;
-      }
-      
+      } // SIMULATION_TYPE
     }
-  }
+  } // num_abs_boundary_faces
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -139,28 +132,31 @@
 
 extern "C"
 void FC_FUNC_(compute_stacey_elastic_cuda,
-              COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f, 
-                                           int* phase_is_innerf, 
-                                           int* SIMULATION_TYPEf, 
+              COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
+                                           int* phase_is_innerf,
+                                           int* SIMULATION_TYPEf,
                                            int* SAVE_FORWARDf,
                                            float* h_b_absorb_field) {
 
-TRACE("compute_stacey_elastic_cuda");  
-  
+TRACE("compute_stacey_elastic_cuda");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
-  int phase_is_inner	  = *phase_is_innerf;
-  int SIMULATION_TYPE	= *SIMULATION_TYPEf;
-  int SAVE_FORWARD     = *SAVE_FORWARDf;              
+  // check
+  if( mp->d_num_abs_boundary_faces == 0 ) return;
 
+  int phase_is_inner    = *phase_is_innerf;
+  int SIMULATION_TYPE   = *SIMULATION_TYPEf;
+  int SAVE_FORWARD      = *SAVE_FORWARDf;
+
   // way 1
   // > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
   //int blocksize = 32;
-  
-  // way 2: seems sligthly faster 
+
+  // way 2: seems sligthly faster
   // > NGLLSQUARE==NGLL2==25, no further check inside kernel
   int blocksize = 25;
-  
+
   int num_blocks_x = mp->d_num_abs_boundary_faces;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
@@ -171,57 +167,56 @@
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
 
-  float* d_debug_val;
-  int* d_debug_val_int;
+  //float* d_debug_val;
+  //int* d_debug_val_int;
 
   if(SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0) {
     // int val = NSTEP-it+1;
-    // read_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&val);    
+    // read_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&val);
     // The read is done in fortran
     print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field,h_b_absorb_field,
                                        mp->d_b_reclen_field,cudaMemcpyHostToDevice),7700);
   }
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("between cudamemcpy and compute_stacey_elastic_kernel");
 #endif
-  
+
   compute_stacey_elastic_kernel<<<grid,threads>>>(mp->d_veloc,
                                                   mp->d_accel,
-                                                  mp->d_abs_boundary_ispec, 
-                                                  mp->d_abs_boundary_ijk, 
-                                                  mp->d_abs_boundary_normal, 
-                                                  mp->d_abs_boundary_jacobian2Dw, 
-                                                  mp->d_ibool, 
-                                                  mp->d_rho_vp, 
-                                                  mp->d_rho_vs, 
-                                                  mp->d_ispec_is_inner, 
-                                                  mp->d_ispec_is_elastic, 
+                                                  mp->d_abs_boundary_ispec,
+                                                  mp->d_abs_boundary_ijk,
+                                                  mp->d_abs_boundary_normal,
+                                                  mp->d_abs_boundary_jacobian2Dw,
+                                                  mp->d_ibool,
+                                                  mp->d_rho_vp,
+                                                  mp->d_rho_vs,
+                                                  mp->d_ispec_is_inner,
+                                                  mp->d_ispec_is_elastic,
                                                   phase_is_inner,
                                                   SIMULATION_TYPE,SAVE_FORWARD,
                                                   mp->d_num_abs_boundary_faces,
                                                   mp->d_b_accel,
-                                                  mp->d_b_absorb_field,
-                                                  d_debug_val,
-                                                  d_debug_val_int);
-  
+                                                  mp->d_b_absorb_field //,d_debug_val,d_debug_val_int
+                                                  );
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("compute_stacey_elastic_kernel");  
+  exit_on_cuda_error("compute_stacey_elastic_kernel");
 #endif
 
   // ! adjoint simulations: stores absorbed wavefield part
   // if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) &
   //   write(IOABS,rec=it) b_reclen_field,b_absorb_field,b_reclen_field
-  
-  if(SIMULATION_TYPE==1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces>0) {
+
+  if(SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ) {
     print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_field,mp->d_b_absorb_field,
                                        mp->d_b_reclen_field,cudaMemcpyDeviceToHost),7701);
     // The write is done in fortran
-    // write_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&it);    
+    // write_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&it);
   }
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("after compute_stacey_elastic after cudamemcpy");  
+  exit_on_cuda_error("after compute_stacey_elastic after cudamemcpy");
 #endif
 }
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -35,8 +35,8 @@
 #include "mesh_constants_cuda.h"
 
 
-#define CUBLAS_ERROR(s,n)  if (s != CUBLAS_STATUS_SUCCESS) {	\
-fprintf (stderr, "CUBLAS Memory Write Error @ %d\n",n);	\
+#define CUBLAS_ERROR(s,n)  if (s != CUBLAS_STATUS_SUCCESS) {  \
+fprintf (stderr, "CUBLAS Memory Write Error @ %d\n",n); \
 exit(EXIT_FAILURE); }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -45,15 +45,19 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-  
-__global__ void UpdateDispVeloc_kernel(real* displ, real* veloc,
-         real* accel, int size,
-         real deltat, real deltatsqover2, real deltatover2) {
+
+__global__ void UpdateDispVeloc_kernel(realw* displ,
+                                       realw* veloc,
+                                       realw* accel,
+                                       int size,
+                                       realw deltat,
+                                       realw deltatsqover2,
+                                       realw deltatover2) {
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
 
   /* because of block and grid sizing problems, there is a small */
   /* amount of buffer at the end of the calculation */
-  if(id < size) {     
+  if(id < size) {
     displ[id] = displ[id] + deltat*veloc[id] + deltatsqover2*accel[id];
     veloc[id] = veloc[id] + deltatover2*accel[id];
     accel[id] = 0; // can do this using memset...not sure if faster
@@ -63,72 +67,76 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(it_update_displacement_scheme_cuda,
-              IT_UPDATE_DISPLACMENT_SCHEME_CUDA)(long* Mesh_pointer_f,
-                                                 int* size_F, 
-                                                 float* deltat_F, 
-                                                 float* deltatsqover2_F, 
+void FC_FUNC_(it_update_displacement_cuda,
+              IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f,
+                                                 int* size_F,
+                                                 float* deltat_F,
+                                                 float* deltatsqover2_F,
                                                  float* deltatover2_F,
-                                                 int* SIMULATION_TYPE, 
-                                                 float* b_deltat_F, 
-                                                 float* b_deltatsqover2_F, 
+                                                 int* SIMULATION_TYPE,
+                                                 float* b_deltat_F,
+                                                 float* b_deltatsqover2_F,
                                                  float* b_deltatover2_F) {
 
-TRACE("it_update_displacement_scheme_cuda");  
+TRACE("it_update_displacement_cuda");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-  
+
   //int i,device;
 
   int size = *size_F;
-  real deltat = *deltat_F;
-  real deltatsqover2 = *deltatsqover2_F;
-  real deltatover2 = *deltatover2_F;
-  real b_deltat = *b_deltat_F;
-  real b_deltatsqover2 = *b_deltatsqover2_F;
-  real b_deltatover2 = *b_deltatover2_F;
+  realw deltat = *deltat_F;
+  realw deltatsqover2 = *deltatsqover2_F;
+  realw deltatover2 = *deltatover2_F;
+  realw b_deltat = *b_deltat_F;
+  realw b_deltatsqover2 = *b_deltatsqover2_F;
+  realw b_deltatover2 = *b_deltatover2_F;
   //cublasStatus status;
-  
+
   int blocksize = 128;
-  int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;  
-  
-  int num_blocks_x = size_padded/blocksize;  
+  int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+  int num_blocks_x = size_padded/blocksize;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
 
-  
-  exit_on_cuda_error("Before UpdateDispVeloc_kernel");
 
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+//  exit_on_cuda_error("Before UpdateDispVeloc_kernel");
+//#endif
+
   //launch kernel
   UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ,mp->d_veloc,mp->d_accel,
-					   size,deltat,deltatsqover2,deltatover2);
+                                           size,deltat,deltatsqover2,deltatover2);
 
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
-  // sync and check to catch errors from previous async operations
-  exit_on_cuda_error("UpdateDispVeloc_kernel");
-#endif
+  //cudaThreadSynchronize();
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+//  //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+//  // sync and check to catch errors from previous async operations
+//  exit_on_cuda_error("UpdateDispVeloc_kernel");
+//#endif
 
-
   // kernel for backward fields
   if(*SIMULATION_TYPE == 3) {
-    
+
     UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_b_displ,mp->d_b_veloc,mp->d_b_accel,
-					     size,b_deltat, b_deltatsqover2, b_deltatover2);
+                                             size,b_deltat,b_deltatsqover2,b_deltatover2);
 
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+//    //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+//    exit_on_cuda_error("after SIM_TYPE==3 UpdateDispVeloc_kernel");
+//#endif
+  }
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-    //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
-    exit_on_cuda_error("after SIM_TYPE==3 UpdateDispVeloc_kernel");
+  exit_on_cuda_error("it_update_displacement_cuda");
 #endif
-  }
-  
-  cudaThreadSynchronize();  
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -138,84 +146,84 @@
 // KERNEL 1
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void UpdatePotential_kernel(real* potential_acoustic, 
-                                       real* potential_dot_acoustic,
-                                       real* potential_dot_dot_acoustic, 
+__global__ void UpdatePotential_kernel(realw* potential_acoustic,
+                                       realw* potential_dot_acoustic,
+                                       realw* potential_dot_dot_acoustic,
                                        int size,
-                                       real deltat, 
-                                       real deltatsqover2, 
-                                       real deltatover2) {
+                                       realw deltat,
+                                       realw deltatsqover2,
+                                       realw deltatover2) {
   int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-  
+
   /* because of block and grid sizing problems, there is a small */
   /* amount of buffer at the end of the calculation */
-  if(id < size) {     
-    potential_acoustic[id] = potential_acoustic[id] 
-                            + deltat*potential_dot_acoustic[id] 
+  if(id < size) {
+    potential_acoustic[id] = potential_acoustic[id]
+                            + deltat*potential_dot_acoustic[id]
                             + deltatsqover2*potential_dot_dot_acoustic[id];
-    
-    potential_dot_acoustic[id] = potential_dot_acoustic[id] 
+
+    potential_dot_acoustic[id] = potential_dot_acoustic[id]
                                 + deltatover2*potential_dot_dot_acoustic[id];
-    
-    potential_dot_dot_acoustic[id] = 0; 
+
+    potential_dot_dot_acoustic[id] = 0;
   }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(it_update_displacement_scheme_acoustic_cuda,
-              IT_UPDATE_DISPLACEMENT_SCHEME_ACOUSTIC_CUDA)(long* Mesh_pointer_f, 
-                                                           int* size_F,
-                                                           float* deltat_F, 
-                                                           float* deltatsqover2_F, 
-                                                           float* deltatover2_F,
-                                                           int* SIMULATION_TYPE, 
-                                                           float* b_deltat_F, 
-                                                           float* b_deltatsqover2_F, 
-                                                           float* b_deltatover2_F) {
-TRACE("it_update_displacement_scheme_acoustic_cuda");  
+void FC_FUNC_(it_update_displacement_ac_cuda,
+              it_update_displacement_ac_cuda)(long* Mesh_pointer_f,
+                                               int* size_F,
+                                               float* deltat_F,
+                                               float* deltatsqover2_F,
+                                               float* deltatover2_F,
+                                               int* SIMULATION_TYPE,
+                                               float* b_deltat_F,
+                                               float* b_deltatsqover2_F,
+                                               float* b_deltatover2_F) {
+TRACE("it_update_displacement_ac_cuda");
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-  
-  //int i,device;  
+
+  //int i,device;
   int size = *size_F;
-  real deltat = *deltat_F;
-  real deltatsqover2 = *deltatsqover2_F;
-  real deltatover2 = *deltatover2_F;
-  real b_deltat = *b_deltat_F;
-  real b_deltatsqover2 = *b_deltatsqover2_F;
-  real b_deltatover2 = *b_deltatover2_F;
+  realw deltat = *deltat_F;
+  realw deltatsqover2 = *deltatsqover2_F;
+  realw deltatover2 = *deltatover2_F;
+  realw b_deltat = *b_deltat_F;
+  realw b_deltatsqover2 = *b_deltatsqover2_F;
+  realw b_deltatover2 = *b_deltatover2_F;
   //cublasStatus status;
-  
+
   int blocksize = 128;
-  int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;  
-  
-  int num_blocks_x = size_padded/blocksize;  
+  int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+  int num_blocks_x = size_padded/blocksize;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-  
+
   //launch kernel
   UpdatePotential_kernel<<<grid,threads>>>(mp->d_potential_acoustic,
                                            mp->d_potential_dot_acoustic,
                                            mp->d_potential_dot_dot_acoustic,
                                            size,deltat,deltatsqover2,deltatover2);
-  
+
   if(*SIMULATION_TYPE == 3) {
     UpdatePotential_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic,
                                              mp->d_b_potential_dot_acoustic,
                                              mp->d_b_potential_dot_dot_acoustic,
                                              size,b_deltat,b_deltatsqover2,b_deltatover2);
   }
-  
-  cudaThreadSynchronize();
+
+  //cudaThreadSynchronize();
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
-  exit_on_cuda_error("it_update_displacement_scheme_acoustic_cuda");
+  exit_on_cuda_error("it_update_displacement_ac_cuda");
 #endif
 }

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2011-10-30 02:25:28 UTC (rev 19129)
@@ -26,6 +26,23 @@
  !=====================================================================
  */
 
+/* daniel: trivia
+
+- for most real working arrays we use float (single exception so far is stf_pre_compute).
+  TODO: we could use "realw" instead of "float" type declaration to make it easier to switch
+              between a real or double precision simulation (matching CUSTOM_REAL == 4 or 8 in fortran routines).
+
+- instead of boolean "logical" declared in fortran routines, in C (or Cuda-C) we have to use "int" variables.
+
+  ifort / gfortran caveat:
+    to check whether it is true or false, do not check for == 1 to test for true values since ifort just uses
+    non-zero values for true (e.g. can be -1 for true). however, false will be always == 0.
+
+  thus, rather use: if( var ) {...}  for testing if true instead of if( var == 1){...} (alternative: one could use if( var != 0 ){...}
+
+*/
+
+
 #ifndef GPU_MESH_
 #define GPU_MESH_
 #include <sys/types.h>
@@ -40,7 +57,7 @@
 
 #define DEBUG 0
 #if DEBUG == 1
-#define TRACE(x) printf("%s\n",x)
+#define TRACE(x) printf("%s\n",x);
 #else
 #define TRACE(x) // printf("%s\n",x);
 #endif
@@ -57,6 +74,7 @@
 #endif
 
 
+//#undef ENABLE_VERY_SLOW_ERROR_CHECKING
 #define ENABLE_VERY_SLOW_ERROR_CHECKING
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -85,7 +103,7 @@
 
 void print_CUDA_error_if_any(cudaError_t err, int num);
 
-void pause_for_debugger(int pause); 
+void pause_for_debugger(int pause);
 
 void exit_on_cuda_error(char* kernel_name);
 
@@ -100,7 +118,7 @@
 #define NGLL2 25
 #define N_SLS 3
 
-typedef float real;   // type of variables passed into function
+//typedef float real;   // type of variables passed into function
 typedef float realw;  // type of "working" variables
 
 // double precision temporary variables leads to 10% performance
@@ -118,18 +136,21 @@
   // mesh resolution
   int NSPEC_AB;
   int NGLOB_AB;
-  
+
   // interpolators
   float* d_xix; float* d_xiy; float* d_xiz;
   float* d_etax; float* d_etay; float* d_etaz;
   float* d_gammax; float* d_gammay; float* d_gammaz;
 
-  // model parameters  
+  // model parameters
   float* d_kappav; float* d_muv;
 
-  // global indexing  
+  // global indexing
   int* d_ibool;
 
+  // inner / outer elements
+  int* d_ispec_is_inner;
+
   // pointers to constant memory arrays
   float* d_hprime_xx; float* d_hprime_yy; float* d_hprime_zz;
   float* d_hprimewgll_xx; float* d_hprimewgll_yy; float* d_hprimewgll_zz;
@@ -138,22 +159,26 @@
   // ------------------------------------------------------------------ //
   // elastic wavefield parameters
   // ------------------------------------------------------------------ //
-  
-  // displacement, velocity, acceleration  
+
+  // displacement, velocity, acceleration
   float* d_displ; float* d_veloc; float* d_accel;
-  // backward/reconstructed elastic wavefield  
+  // backward/reconstructed elastic wavefield
   float* d_b_displ; float* d_b_veloc; float* d_b_accel;
 
-  // elastic domain parameters    
+  // elastic elements
+  int* d_ispec_is_elastic;
+
+  // elastic domain parameters
   int* d_phase_ispec_inner_elastic;
-  int d_num_phase_ispec_elastic;
+  int num_phase_ispec_elastic;
+
   float* d_rmass;
   float* d_send_accel_buffer;
 
-  // interfaces  
+  // interfaces
   int* d_nibool_interfaces_ext_mesh;
   int* d_ibool_interfaces_ext_mesh;
-    
+
   //used for absorbing stacey boundaries
   int d_num_abs_boundary_faces;
   int* d_abs_boundary_ispec;
@@ -163,15 +188,12 @@
 
   float* d_b_absorb_field;
   int d_b_reclen_field;
-  
+
   float* d_rho_vp;
   float* d_rho_vs;
-  
-  // inner / outer elements  
-  int* d_ispec_is_inner;
-  int* d_ispec_is_elastic;
 
-  // sources  
+  // sources
+  int nsources_local;
   float* d_sourcearrays;
   double* d_stf_pre_compute;
   int* d_islice_selected_source;
@@ -179,17 +201,17 @@
 
   // receivers
   int* d_number_receiver_global;
-  int* d_ispec_selected_rec;  
+  int* d_ispec_selected_rec;
   int* d_islice_selected_rec;
   int nrec_local;
   float* d_station_seismo_field;
-  float* h_station_seismo_field;  
-  
+  float* h_station_seismo_field;
+
   // surface elements (to save for noise tomography and acoustic simulations)
   int* d_free_surface_ispec;
   int* d_free_surface_ijk;
   int num_free_surface_faces;
-  
+
   // surface movie elements to save for noise tomography
   float* d_noise_surface_movie;
 
@@ -202,11 +224,11 @@
 
   float* d_one_minus_sum_beta;
   float* d_factor_common;
-  
+
   float* d_alphaval;
   float* d_betaval;
   float* d_gammaval;
-  
+
   // attenuation & kernel
   float* d_epsilondev_xx;
   float* d_epsilondev_yy;
@@ -214,7 +236,7 @@
   float* d_epsilondev_xz;
   float* d_epsilondev_yz;
   float* d_epsilon_trace_over_3;
-      
+
   // noise
   float* d_normal_x_noise;
   float* d_normal_y_noise;
@@ -230,7 +252,7 @@
   float* d_b_R_xy;
   float* d_b_R_xz;
   float* d_b_R_yz;
-  
+
   float* d_b_epsilondev_xx;
   float* d_b_epsilondev_yy;
   float* d_b_epsilondev_xy;
@@ -241,19 +263,23 @@
   float* d_b_alphaval;
   float* d_b_betaval;
   float* d_b_gammaval;
-  
+
   // sensitivity kernels
   float* d_rho_kl;
   float* d_mu_kl;
   float* d_kappa_kl;
-  
+
   // noise sensitivity kernel
   float* d_Sigma_kl;
 
+  // approximative hessian for preconditioning kernels
+  float* d_hess_el_kl;
+
   // oceans
   float* d_rmass_ocean_load;
   float* d_free_surface_normal;
-  
+  int* d_updated_dof_ocean_load;
+
   // ------------------------------------------------------------------ //
   // acoustic wavefield
   // ------------------------------------------------------------------ //
@@ -261,29 +287,33 @@
   float* d_potential_acoustic; float* d_potential_dot_acoustic; float* d_potential_dot_dot_acoustic;
   // backward/reconstructed wavefield
   float* d_b_potential_acoustic; float* d_b_potential_dot_acoustic; float* d_b_potential_dot_dot_acoustic;
-  
-  // acoustic domain parameters  
-  int* d_phase_ispec_inner_acoustic;  
+
+  // acoustic domain parameters
+  int* d_ispec_is_acoustic;
+
+  int* d_phase_ispec_inner_acoustic;
   int num_phase_ispec_acoustic;
-  
+
   float* d_rhostore;
   float* d_kappastore;
   float* d_rmass_acoustic;
-  
+
   float* d_send_potential_dot_dot_buffer;
-  int* d_ispec_is_acoustic;
-    
+
   float* d_b_absorb_potential;
   int d_b_reclen_potential;
-  
+
   // for writing seismograms
   float* d_station_seismo_potential;
   float* h_station_seismo_potential;
-  
+
   // sensitivity kernels
   float* d_rho_ac_kl;
   float* d_kappa_ac_kl;
-  
+
+  // approximative hessian for preconditioning kernels
+  float* d_hess_ac_kl;
+
   // coupling acoustic-elastic
   int* d_coupling_ac_el_ispec;
   int* d_coupling_ac_el_ijk;
@@ -291,7 +321,7 @@
   float* d_coupling_ac_el_jacobian2Dw;
 
 
-  
+
 } Mesh;
 
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -40,21 +40,23 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void transfer_surface_to_host_kernel(int* free_surface_ispec,int* free_surface_ijk, int num_free_surface_faces, int* ibool, real* displ, real* noise_surface_movie) {
+__global__ void transfer_surface_to_host_kernel(int* free_surface_ispec,int* free_surface_ijk,
+                                                int num_free_surface_faces, int* ibool,
+                                                realw* displ, realw* noise_surface_movie) {
   int igll = threadIdx.x;
   int iface = blockIdx.x + blockIdx.y*gridDim.x;
-  
+
   // int id = tx + blockIdx.x*blockDim.x + blockIdx.y*blockDim.x*gridDim.x;
-  
+
   if(iface < num_free_surface_faces) {
     int ispec = free_surface_ispec[iface]-1; //-1 for C-based indexing
 
     int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
     int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
     int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
-        
-    int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;    
-    
+
+    int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
     noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)] = displ[iglob*3];
     noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)] = displ[iglob*3+1];
     noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)] = displ[iglob*3+2];
@@ -65,8 +67,8 @@
 
 extern "C"
 void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){
-TRACE("fortranflush");  
-  
+TRACE("fortranflush");
+
   fflush(stdout);
   fflush(stderr);
   printf("Flushing proc %d!\n",*rank);
@@ -76,7 +78,7 @@
 
 extern "C"
 void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id) {
-TRACE("fortranprint");  
+TRACE("fortranprint");
 
   int procid;
   MPI_Comm_rank(MPI_COMM_WORLD,&procid);
@@ -87,7 +89,7 @@
 
 extern "C"
 void FC_FUNC_(fortranprintf,FORTRANPRINTF)(float* val) {
-TRACE("fortranprintf");  
+TRACE("fortranprintf");
 
   int procid;
   MPI_Comm_rank(MPI_COMM_WORLD,&procid);
@@ -98,7 +100,7 @@
 
 extern "C"
 void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val) {
-TRACE("fortranprintd");  
+TRACE("fortranprintd");
 
   int procid;
   MPI_Comm_rank(MPI_COMM_WORLD,&procid);
@@ -110,9 +112,9 @@
 // randomize displ for testing
 extern "C"
 void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,float* h_displ) {
-TRACE("make_displ_rand");  
+TRACE("make_displ_rand");
 
-  Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper  
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
   // float* displ_rnd = (float*)malloc(mp->NGLOB_AB*3*sizeof(float));
   for(int i=0;i<mp->NGLOB_AB*3;i++) {
     h_displ[i] = rand();
@@ -124,10 +126,12 @@
 
 extern "C"
 void FC_FUNC_(transfer_surface_to_host,
-              TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,real* h_noise_surface_movie,int* num_free_surface_faces) {
-TRACE("transfer_surface_to_host");  
- 
-  Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper  
+              TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,
+                                        realw* h_noise_surface_movie,
+                                        int* num_free_surface_faces) {
+TRACE("transfer_surface_to_host");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
   int num_blocks_x = *num_free_surface_faces;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
@@ -135,65 +139,72 @@
     num_blocks_y = num_blocks_y*2;
   }
   dim3 grid(num_blocks_x,num_blocks_y,1);
-  dim3 threads(25,1,1);  
-  
-  transfer_surface_to_host_kernel<<<grid,threads>>>(mp->d_free_surface_ispec,mp->d_free_surface_ijk, *num_free_surface_faces, mp->d_ibool, mp->d_displ, mp->d_noise_surface_movie);  
+  dim3 threads(25,1,1);
 
-  cudaMemcpy(h_noise_surface_movie,mp->d_noise_surface_movie,3*25*(*num_free_surface_faces)*sizeof(real),cudaMemcpyDeviceToHost);  
+  transfer_surface_to_host_kernel<<<grid,threads>>>(mp->d_free_surface_ispec,
+                                                    mp->d_free_surface_ijk,
+                                                    *num_free_surface_faces,
+                                                    mp->d_ibool,
+                                                    mp->d_displ,
+                                                    mp->d_noise_surface_movie);
 
+  cudaMemcpy(h_noise_surface_movie,mp->d_noise_surface_movie,
+             3*25*(*num_free_surface_faces)*sizeof(realw),cudaMemcpyDeviceToHost);
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("transfer_surface_to_host");
-#endif  
+#endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void noise_read_add_surface_movie_cuda_kernel(real* accel, int* ibool, 
-                                                         int* free_surface_ispec,int* free_surface_ijk, 
-                                                         int num_free_surface_faces, 
-                                                         real* noise_surface_movie, 
-                                                         real* normal_x_noise, 
-                                                         real* normal_y_noise, 
-                                                         real* normal_z_noise, 
-                                                         real* mask_noise, 
-                                                         real* free_surface_jacobian2Dw, 
-                                                         real* wgllwgll_xy,
+__global__ void noise_read_add_surface_movie_cuda_kernel(realw* accel, int* ibool,
+                                                         int* free_surface_ispec,
+                                                         int* free_surface_ijk,
+                                                         int num_free_surface_faces,
+                                                         realw* noise_surface_movie,
+                                                         realw* normal_x_noise,
+                                                         realw* normal_y_noise,
+                                                         realw* normal_z_noise,
+                                                         realw* mask_noise,
+                                                         realw* free_surface_jacobian2Dw,
+                                                         realw* wgllwgll_xy,
                                                          float* d_debug) {
 
   int iface = blockIdx.x + gridDim.x*blockIdx.y; // surface element id
 
   // when nspec_top > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
-  if(iface < num_free_surface_faces) { 
+  if(iface < num_free_surface_faces) {
     int ispec = free_surface_ispec[iface]-1;
-    
+
     int igll = threadIdx.x;
-    
+
     int ipoin = 25*iface + igll;
     int i=free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
     int j=free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
-    int k=free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;    
-    
+    int k=free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
+
     int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-    
-    real normal_x = normal_x_noise[ipoin];
-    real normal_y = normal_y_noise[ipoin];
-    real normal_z = normal_z_noise[ipoin];
 
-    real eta = (noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)]*normal_x + 
+    realw normal_x = normal_x_noise[ipoin];
+    realw normal_y = normal_y_noise[ipoin];
+    realw normal_z = normal_z_noise[ipoin];
+
+    realw eta = (noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)]*normal_x +
                 noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)]*normal_y +
                 noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)]*normal_z);
-    
+
     // error from cuda-memcheck and ddt seems "incorrect", because we
     // are passing a __constant__ variable pointer around like it was
     // made using cudaMalloc, which *may* be "incorrect", but produces
     // correct results.
-    
+
     // ========= Invalid __global__ read of size
     // 4 ========= at 0x00000cd8 in
     // compute_add_sources_cuda.cu:260:noise_read_add_surface_movie_cuda_kernel
     // ========= by thread (0,0,0) in block (3443,0) ========= Address
     // 0x203000c8 is out of bounds
-    
+
     // non atomic version for speed testing -- atomic updates are needed for correctness
     // accel[3*iglob] +=   eta*mask_noise[ipoin] * normal_x * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + 25*ispec2D];
     // accel[3*iglob+1] += eta*mask_noise[ipoin] * normal_y * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + 25*ispec2D];
@@ -201,38 +212,42 @@
 
     // Fortran version in SVN -- note deletion of wgllwgll_xy?
     // accel(1,iglob) = accel(1,iglob) + eta * mask_noise(ipoin) * normal_x_noise(ipoin) &
-    // * free_surface_jacobian2Dw(igll,iface) 
+    // * free_surface_jacobian2Dw(igll,iface)
     // accel(2,iglob) = accel(2,iglob) + eta * mask_noise(ipoin) * normal_y_noise(ipoin) &
     // * free_surface_jacobian2Dw(igll,iface)
     // accel(3,iglob) = accel(3,iglob) + eta * mask_noise(ipoin) * normal_z_noise(ipoin) &
     // * free_surface_jacobian2Dw(igll,iface) ! wgllwgll_xy(i,j) * jacobian2D_top(i,j,iface)
-    
+
     // atomicAdd(&accel[iglob*3]  ,eta*mask_noise[ipoin]*normal_x*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
     // atomicAdd(&accel[iglob*3+1],eta*mask_noise[ipoin]*normal_y*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
     // atomicAdd(&accel[iglob*3+2],eta*mask_noise[ipoin]*normal_z*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
-    
+
     atomicAdd(&accel[iglob*3]  ,eta*mask_noise[ipoin]*normal_x*free_surface_jacobian2Dw[igll+25*iface]);
     atomicAdd(&accel[iglob*3+1],eta*mask_noise[ipoin]*normal_y*free_surface_jacobian2Dw[igll+25*iface]);
     atomicAdd(&accel[iglob*3+2],eta*mask_noise[ipoin]*normal_z*free_surface_jacobian2Dw[igll+25*iface]);
-    
+
   }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(noise_read_add_surface_movie_cuda,
-              NOISE_READ_ADD_SURFACE_MOVIE_CUDA)(long* Mesh_pointer_f, real* h_noise_surface_movie, int* num_free_surface_faces_f,int* NOISE_TOMOGRAPHYf) {
-TRACE("noise_read_add_surface_movie_cuda");  
+void FC_FUNC_(noise_read_add_surface_movie_cu,
+              NOISE_READ_ADD_SURFACE_MOVIE_CU)(long* Mesh_pointer_f,
+                                                 realw* h_noise_surface_movie,
+                                                 int* num_free_surface_faces_f,
+                                                 int* NOISE_TOMOGRAPHYf) {
+TRACE("noise_read_add_surface_movie_cu");
 
-  // EPIK_TRACER("noise_read_add_surface_movie_cuda");
-  
+  // EPIK_TRACER("noise_read_add_surface_movie_cu");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
   int num_free_surface_faces = *num_free_surface_faces_f;
   int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
   float* d_noise_surface_movie;
   cudaMalloc((void**)&d_noise_surface_movie,3*25*num_free_surface_faces*sizeof(float));
-  cudaMemcpy(d_noise_surface_movie, h_noise_surface_movie,3*25*num_free_surface_faces*sizeof(real),cudaMemcpyHostToDevice);
+  cudaMemcpy(d_noise_surface_movie, h_noise_surface_movie,
+             3*25*num_free_surface_faces*sizeof(realw),cudaMemcpyHostToDevice);
 
   int num_blocks_x = num_free_surface_faces;
   int num_blocks_y = 1;
@@ -247,45 +262,45 @@
   float* d_debug;
   // cudaMalloc((void**)&d_debug,128*sizeof(float));
   // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-  
+
   if(NOISE_TOMOGRAPHY == 2) { // add surface source to forward field
     noise_read_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_accel,
-							       mp->d_ibool,
-							       mp->d_free_surface_ispec,
-							       mp->d_free_surface_ijk,
-							       num_free_surface_faces,
-							       d_noise_surface_movie,
-							       mp->d_normal_x_noise,
-							       mp->d_normal_y_noise,
-							       mp->d_normal_z_noise,
-							       mp->d_mask_noise,
-							       mp->d_free_surface_jacobian2Dw,
-							       mp->d_wgllwgll_xy,
-							       d_debug);
+                     mp->d_ibool,
+                     mp->d_free_surface_ispec,
+                     mp->d_free_surface_ijk,
+                     num_free_surface_faces,
+                     d_noise_surface_movie,
+                     mp->d_normal_x_noise,
+                     mp->d_normal_y_noise,
+                     mp->d_normal_z_noise,
+                     mp->d_mask_noise,
+                     mp->d_free_surface_jacobian2Dw,
+                     mp->d_wgllwgll_xy,
+                     d_debug);
   }
-  else if(NOISE_TOMOGRAPHY==3) { // add surface source to adjoint (backward) field
+  else if(NOISE_TOMOGRAPHY == 3) { // add surface source to adjoint (backward) field
     noise_read_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
-							       mp->d_ibool,
-							       mp->d_free_surface_ispec,
-							       mp->d_free_surface_ijk,
-							       num_free_surface_faces,
-							       d_noise_surface_movie,
-							       mp->d_normal_x_noise,
-							       mp->d_normal_y_noise,
-							       mp->d_normal_z_noise,
-							       mp->d_mask_noise,
-							       mp->d_free_surface_jacobian2Dw,
-							       mp->d_wgllwgll_xy,
-							       d_debug);
+                     mp->d_ibool,
+                     mp->d_free_surface_ispec,
+                     mp->d_free_surface_ijk,
+                     num_free_surface_faces,
+                     d_noise_surface_movie,
+                     mp->d_normal_x_noise,
+                     mp->d_normal_y_noise,
+                     mp->d_normal_z_noise,
+                     mp->d_mask_noise,
+                     mp->d_free_surface_jacobian2Dw,
+                     mp->d_wgllwgll_xy,
+                     d_debug);
   }
-  
 
+
   // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
   // for(int i=0;i<8;i++) {
   // printf("debug[%d]= %e\n",i,h_debug[i]);
   // }
   // MPI_Abort(MPI_COMM_WORLD,1);
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING  
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("noise_read_add_surface_movie_cuda_kernel");
 #endif
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h	2011-10-30 02:25:28 UTC (rev 19129)
@@ -91,10 +91,10 @@
   void bindTexturesPotential(float* d_potential_acoustic)
   {
     cudaError_t err;
-    
+
     cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<float>();
-    
-    err = cudaBindTexture(NULL,tex_potential_acoustic, d_potential_acoustic, 
+
+    err = cudaBindTexture(NULL,tex_potential_acoustic, d_potential_acoustic,
                           channelDescFloat, NGLOB*sizeof(float));
     if (err != cudaSuccess)
     {
@@ -106,10 +106,10 @@
   void bindTexturesPotential_dot_dot(float* d_potential_dot_dot_acoustic)
   {
     cudaError_t err;
-    
+
     cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<float>();
-    
-    err = cudaBindTexture(NULL,tex_potential_dot_dot_acoustic, d_potential_dot_dot_acoustic, 
+
+    err = cudaBindTexture(NULL,tex_potential_dot_dot_acoustic, d_potential_dot_dot_acoustic,
                           channelDescFloat, NGLOB*sizeof(float));
     if (err != cudaSuccess)
     {

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -57,7 +57,7 @@
 
 extern "C"
 void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)() {
-TRACE("pause_for_debug");  
+TRACE("pause_for_debug");
 
   pause_for_debugger(1);
 }
@@ -67,7 +67,7 @@
 void pause_for_debugger(int pause) {
   if(pause) {
     int myrank;
-    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);  
+    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
     printf("I'm rank %d\n",myrank);
     int i = 0;
     char hostname[256];
@@ -89,7 +89,7 @@
   cudaThreadSynchronize();
   cudaError_t err = cudaGetLastError();
   if (err != cudaSuccess)
-    {      
+    {
       fprintf(stderr,"Error after %s: %s\n", kernel_name, cudaGetErrorString(err));
       pause_for_debugger(0);
       exit(1);
@@ -128,20 +128,20 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 void get_free_memory(double* free_db, double* used_db, double* total_db) {
-  
+
   // gets memory usage in byte
   size_t free_byte ;
   size_t total_byte ;
   cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
   if ( cudaSuccess != cuda_status ){
     printf("Error: cudaMemGetInfo fails, %s \n", cudaGetErrorString(cuda_status) );
-    exit(1); 
+    exit(1);
   }
-  
+
   *free_db = (double)free_byte ;
   *total_db = (double)total_byte ;
   *used_db = *total_db - *free_db ;
-  return;  
+  return;
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -149,17 +149,17 @@
 // Saves GPU memory usage to file
 void output_free_memory(char* info_str) {
   int myrank;
-  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);  
+  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
   FILE* fp;
   char filename[BUFSIZ];
   double free_db,used_db,total_db;
 
   get_free_memory(&free_db,&used_db,&total_db);
-  
+
   sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_mem_usage_proc_%03d.txt",myrank);
   fp = fopen(filename,"a+");
   fprintf(fp,"%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
-	 used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+   used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
   fclose(fp);
 }
 
@@ -169,8 +169,8 @@
 extern "C"
 void FC_FUNC_(output_free_device_memory,
               OUTPUT_FREE_DEVICE_MEMORY)(int* id) {
-TRACE("output_free_device_memory");  
-              
+TRACE("output_free_device_memory");
+
   char info[6];
   sprintf(info,"f %d:",*id);
   output_free_memory(info);
@@ -184,12 +184,12 @@
   int myrank;
   MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
   double free_db,used_db,total_db;
-  
+
   get_free_memory(&free_db,&used_db,&total_db);
-  
+
   printf("%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
-	 used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
-  
+   used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -197,7 +197,7 @@
 extern "C"
 void FC_FUNC_(show_free_device_memory,
               SHOW_FREE_DEVICE_MEMORY)() {
-TRACE("show_free_device_memory");  
+TRACE("show_free_device_memory");
 
   show_free_memory("from fortran");
 }
@@ -206,12 +206,12 @@
 extern "C"
 void FC_FUNC_(get_free_device_memory,
               get_FREE_DEVICE_MEMORY)(float* free, float* used, float* total ) {
-TRACE("get_free_device_memory");  
+TRACE("get_free_device_memory");
 
   double free_db,used_db,total_db;
-  
+
   get_free_memory(&free_db,&used_db,&total_db);
-  
+
   // converts to MB
   *free = (float) free_db/1024.0/1024.0;
   *used = (float) used_db/1024.0/1024.0;
@@ -220,9 +220,201 @@
 }
 
 
+/* ----------------------------------------------------------------------------------------------- */
+//daniel
+/*
+__global__ void check_phase_ispec_kernel(int num_phase_ispec,
+                                         int* phase_ispec,
+                                         int NSPEC_AB,
+                                         int* ier) {
 
+  int i,ispec,iphase,count0,count1;
+  *ier = 0;
+
+  for(iphase=0; iphase < 2; iphase++){
+    count0 = 0;
+    count1 = 0;
+
+    for(i=0; i < num_phase_ispec; i++){
+      ispec = phase_ispec[iphase*num_phase_ispec + i] - 1;
+      if( ispec < -1 || ispec >= NSPEC_AB ){
+        printf("Error in d_phase_ispec_inner_elastic %d %d\n",i,ispec);
+        *ier = 1;
+        return;
+      }
+      if( ispec >= 0 ){ count0++;}
+      if( ispec < 0 ){ count1++;}
+    }
+
+    printf("check_phase_ispec done: phase %d, count = %d %d \n",iphase,count0,count1);
+
+  }
+}
+
+void check_phase_ispec(long* Mesh_pointer_f,int type){
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  printf("check phase_ispec for type=%d\n",type);
+
+  dim3 grid(1,1);
+  dim3 threads(1,1,1);
+
+  int* h_debug = (int*) calloc(1,sizeof(int));
+  int* d_debug;
+  cudaMalloc((void**)&d_debug,sizeof(int));
+
+  if( type == 1 ){
+    check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_elastic,
+                                             mp->d_phase_ispec_inner_elastic,
+                                             mp->NSPEC_AB,
+                                             d_debug);
+  }else if( type == 2 ){
+    check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_acoustic,
+                                               mp->d_phase_ispec_inner_acoustic,
+                                               mp->NSPEC_AB,
+                                               d_debug);
+  }
+
+  cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+  cudaFree(d_debug);
+  if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+  free(h_debug);
+  fflush(stdout);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("check_phase_ispec");
+#endif
+
+}
+*/
+
 /* ----------------------------------------------------------------------------------------------- */
+//daniel
+/*
+__global__ void check_ispec_is_kernel(int NSPEC_AB,
+                                      int* ispec_is,
+                                      int* ier) {
 
+  int ispec,count0,count1;
+
+  *ier = 0;
+  count0 = 0;
+  count1 = 0;
+  for(ispec=0; ispec < NSPEC_AB; ispec++){
+    if( ispec_is[ispec] < -1 || ispec_is[ispec] > 1 ){
+      printf("Error in ispec_is %d %d\n",ispec,ispec_is[ispec]);
+      *ier = 1;
+      return;
+      //exit(1);
+    }
+    if( ispec_is[ispec] == 0 ){count0++;}
+    if( ispec_is[ispec] != 0 ){count1++;}
+  }
+  printf("check_ispec_is done: count = %d %d\n",count0,count1);
+}
+
+void check_ispec_is(long* Mesh_pointer_f,int type){
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  printf("check ispec_is for type=%d\n",type);
+
+  dim3 grid(1,1);
+  dim3 threads(1,1,1);
+
+  int* h_debug = (int*) calloc(1,sizeof(int));
+  int* d_debug;
+  cudaMalloc((void**)&d_debug,sizeof(int));
+
+  if( type == 0 ){
+    check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+                                            mp->d_ispec_is_inner,
+                                            d_debug);
+  }else if( type == 1 ){
+    check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+                                            mp->d_ispec_is_elastic,
+                                            d_debug);
+  }else if( type == 2 ){
+    check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+                                            mp->d_ispec_is_acoustic,
+                                            d_debug);
+  }
+
+  cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+  cudaFree(d_debug);
+  if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+  free(h_debug);
+  fflush(stdout);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("check_ispec_is");
+#endif
+}
+*/
+/* ----------------------------------------------------------------------------------------------- */
+//daniel
+/*
+__global__ void check_array_ispec_kernel(int num_array_ispec,
+                                         int* array_ispec,
+                                         int NSPEC_AB,
+                                         int* ier) {
+
+  int i,ispec,count0,count1;
+
+  *ier = 0;
+  count0 = 0;
+  count1 = 0;
+
+  for(i=0; i < num_array_ispec; i++){
+    ispec = array_ispec[i] - 1;
+    if( ispec < -1 || ispec >= NSPEC_AB ){
+      printf("Error in d_array_ispec %d %d\n",i,ispec);
+      *ier = 1;
+      return;
+    }
+    if( ispec >= 0 ){ count0++;}
+    if( ispec < 0 ){ count1++;}
+  }
+
+  printf("check_array_ispec done: count = %d %d \n",count0,count1);
+}
+
+void check_array_ispec(long* Mesh_pointer_f,int type){
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  printf("check array_ispec for type=%d\n",type);
+
+  dim3 grid(1,1);
+  dim3 threads(1,1,1);
+
+  int* h_debug = (int*) calloc(1,sizeof(int));
+  int* d_debug;
+  cudaMalloc((void**)&d_debug,sizeof(int));
+
+  if( type == 1 ){
+    check_array_ispec_kernel<<<grid,threads>>>(mp->d_num_abs_boundary_faces,
+                                               mp->d_abs_boundary_ispec,
+                                               mp->NSPEC_AB,
+                                               d_debug);
+  }
+
+  cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+  cudaFree(d_debug);
+  if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+  free(h_debug);
+  fflush(stdout);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("check_array_ispec");
+#endif
+
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+
 // GPU preparation
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -230,25 +422,28 @@
 extern "C"
 void FC_FUNC_(prepare_constants_device,
               PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
-                                        int* h_NGLLX, 
+                                        int* h_NGLLX,
                                         int* NSPEC_AB, int* NGLOB_AB,
                                         float* h_xix, float* h_xiy, float* h_xiz,
                                         float* h_etax, float* h_etay, float* h_etaz,
                                         float* h_gammax, float* h_gammay, float* h_gammaz,
                                         float* h_kappav, float* h_muv,
-                                        int* h_ibool, 
-                                        int* num_interfaces_ext_mesh, int* max_nibool_interfaces_ext_mesh,
-                                        int* h_nibool_interfaces_ext_mesh, int* h_ibool_interfaces_ext_mesh,
-                                        float* h_hprime_xx,float* h_hprime_yy,float* h_hprime_zz, 
+                                        int* h_ibool,
+                                        int* num_interfaces_ext_mesh,
+                                        int* max_nibool_interfaces_ext_mesh,
+                                        int* h_nibool_interfaces_ext_mesh,
+                                        int* h_ibool_interfaces_ext_mesh,
+                                        float* h_hprime_xx,float* h_hprime_yy,float* h_hprime_zz,
                                         float* h_hprimewgll_xx,float* h_hprimewgll_yy,float* h_hprimewgll_zz,
-                                        float* h_wgllwgll_xy,float* h_wgllwgll_xz,float* h_wgllwgll_yz,        
-                                        int* ABSORBING_CONDITIONS,    
+                                        float* h_wgllwgll_xy,float* h_wgllwgll_xz,float* h_wgllwgll_yz,
+                                        int* ABSORBING_CONDITIONS,
                                         int* h_abs_boundary_ispec, int* h_abs_boundary_ijk,
                                         float* h_abs_boundary_normal,
                                         float* h_abs_boundary_jacobian2Dw,
                                         int* h_num_abs_boundary_faces,
-                                        int* h_ispec_is_inner, 
+                                        int* h_ispec_is_inner,
                                         int* NSOURCES,
+                                        int* nsources_local_f,
                                         float* h_sourcearrays,
                                         int* h_islice_selected_source,
                                         int* h_ispec_selected_source,
@@ -256,49 +451,48 @@
                                         int* h_ispec_selected_rec,
                                         int* nrec_f,
                                         int* nrec_local_f,
-                                        int* SIMULATION_TYPE) {
+                                        int* SIMULATION_TYPE,
+                                        int* ncuda_devices) {
 
 TRACE("prepare_constants_device");
-  
+
   int procid;
   int device_count = 0;
-  
+
   // cuda initialization (needs -lcuda library)
   //cuInit(0);
   CUresult status = cuInit(0);
   if ( CUDA_SUCCESS != status ) exit_on_error("CUDA device initialization failed");
-      
-  // Gets number of GPU devices     
+
+  // Gets number of GPU devices
   cudaGetDeviceCount(&device_count);
   //printf("Cuda Devices: %d\n", device_count);
-  if (device_count == 0) exit_on_error("There is no device supporting CUDA\n");    
-  
+  if (device_count == 0) exit_on_error("There is no device supporting CUDA\n");
+  *ncuda_devices = device_count;
+
   // Gets rank number of MPI process
   MPI_Comm_rank(MPI_COMM_WORLD, &procid);
 
-  // Sets the active device 
+  // Sets the active device
   if(device_count > 1) {
-    // daniel: todo - generalize for more GPUs per node?
-    // assumes we have 2 GPU devices per node and running 2 MPI processes per node as well
-    cudaSetDevice((procid)%2);
-    exit_on_cuda_error("cudaSetDevice");   
+    // generalized for more GPUs per node
+    cudaSetDevice((procid)%device_count);
+    exit_on_cuda_error("cudaSetDevice");
   }
 
-  // allocates mesh parameter structure  
+  // allocates mesh parameter structure
   Mesh* mp = (Mesh*)malloc(sizeof(Mesh));
-  if (mp == NULL) exit_on_error("error allocating mesh pointer"); 
+  if (mp == NULL) exit_on_error("error allocating mesh pointer");
   *Mesh_pointer = (long)mp;
 
   // checks if NGLLX == 5
   if( *h_NGLLX != NGLLX ){
-    exit_on_error("NGLLX must be 5 for CUDA devices");   
+    exit_on_error("NGLLX must be 5 for CUDA devices");
   }
-  
-  // sets global parameters  
+
+  // sets global parameters
   mp->NSPEC_AB = *NSPEC_AB;
   mp->NGLOB_AB = *NGLOB_AB;
-  
-  //mp->d_num_phase_ispec_elastic = *num_phase_ispec_elastic;
 
   // sets constant arrays
   setConst_hprime_xx(h_hprime_xx,mp);
@@ -310,143 +504,153 @@
   setConst_wgllwgll_xy(h_wgllwgll_xy,mp);
   setConst_wgllwgll_xz(h_wgllwgll_xz,mp);
   setConst_wgllwgll_yz(h_wgllwgll_yz,mp);
-    
+
   /* Assuming NGLLX=5. Padded is then 128 (5^3+3) */
-  int size_padded = 128 * (*NSPEC_AB);
-  int size = 125 * (*NSPEC_AB);
+  int size_padded = 128 * (mp->NSPEC_AB);
+  int size = 125 * (mp->NSPEC_AB);
 
-  // mesh    
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix, size_padded*sizeof(float)),1001);	 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy, size_padded*sizeof(float)),1002);	 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz, size_padded*sizeof(float)),1003);	 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax, size_padded*sizeof(float)),1004);	 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay, size_padded*sizeof(float)),1005);	 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz, size_padded*sizeof(float)),1006);	 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax, size_padded*sizeof(float)),1007);	 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay, size_padded*sizeof(float)),1008);	 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz, size_padded*sizeof(float)),1009);	 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappav, size_padded*sizeof(float)),1010); 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muv, size_padded*sizeof(float)),1011);	 
+  // mesh
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix, size_padded*sizeof(float)),1001);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy, size_padded*sizeof(float)),1002);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz, size_padded*sizeof(float)),1003);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax, size_padded*sizeof(float)),1004);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay, size_padded*sizeof(float)),1005);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz, size_padded*sizeof(float)),1006);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax, size_padded*sizeof(float)),1007);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay, size_padded*sizeof(float)),1008);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz, size_padded*sizeof(float)),1009);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappav, size_padded*sizeof(float)),1010);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muv, size_padded*sizeof(float)),1011);
 
+  // transfer constant element data with padding
+  for(int i=0;i < mp->NSPEC_AB;i++) {
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_xix + i*128, &h_xix[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1501);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_xiy+i*128,   &h_xiy[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1502);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_xiz+i*128,   &h_xiz[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1503);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_etax+i*128,  &h_etax[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1504);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_etay+i*128,  &h_etay[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1505);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_etaz+i*128,  &h_etaz[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1506);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_gammax+i*128,&h_gammax[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1507);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_gammay+i*128,&h_gammay[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1508);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaz+i*128,&h_gammaz[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1509);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_kappav+i*128,&h_kappav[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1510);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_muv+i*128,   &h_muv[i*125],
+                                       125*sizeof(float),cudaMemcpyHostToDevice),1511);
+  }
+
   // global indexing
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool, size_padded*sizeof(int)),1021);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool,size_padded*sizeof(int)),1021);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool, h_ibool,
-                                     size*sizeof(int)  ,cudaMemcpyHostToDevice),1022);    
+                                     size*sizeof(int),cudaMemcpyHostToDevice),1022);
 
-    
+
   // prepare interprocess-edge exchange information
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_ext_mesh,
-				     *num_interfaces_ext_mesh*sizeof(int)),1201);
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_ext_mesh,h_nibool_interfaces_ext_mesh,
-				     *num_interfaces_ext_mesh*sizeof(int),cudaMemcpyHostToDevice),1202);
+  if( *num_interfaces_ext_mesh > 0 ){
+    print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_ext_mesh,
+                                       (*num_interfaces_ext_mesh)*sizeof(int)),1201);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_ext_mesh,h_nibool_interfaces_ext_mesh,
+                                       (*num_interfaces_ext_mesh)*sizeof(int),cudaMemcpyHostToDevice),1202);
 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_ext_mesh,
-                                     *num_interfaces_ext_mesh* *max_nibool_interfaces_ext_mesh*
-                                     sizeof(int)),1203);
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_ext_mesh,h_ibool_interfaces_ext_mesh,
-				     *num_interfaces_ext_mesh* *max_nibool_interfaces_ext_mesh*sizeof(int),
-				     cudaMemcpyHostToDevice),1204);
+    print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_ext_mesh,
+                                       (*num_interfaces_ext_mesh)*(*max_nibool_interfaces_ext_mesh)*sizeof(int)),1203);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_ext_mesh,h_ibool_interfaces_ext_mesh,
+                                       (*num_interfaces_ext_mesh)*(*max_nibool_interfaces_ext_mesh)*sizeof(int),
+                                       cudaMemcpyHostToDevice),1204);
+  }
 
-  // inner elements 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,*NSPEC_AB*sizeof(int)),1205);
+  // inner elements
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,mp->NSPEC_AB*sizeof(int)),1205);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_inner, h_ispec_is_inner,
-				     *NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),1206);
-              
+                                     mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),1206);
 
+  // daniel: check
+  //check_ispec_is(Mesh_pointer,0);
+
   // absorbing boundaries
   mp->d_num_abs_boundary_faces = *h_num_abs_boundary_faces;
-  if( *ABSORBING_CONDITIONS == 1 && mp->d_num_abs_boundary_faces > 0 ){  
-    print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_ispec,
+  if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0 ){
+    print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ispec),
                                        (mp->d_num_abs_boundary_faces)*sizeof(int)),1101);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ispec, h_abs_boundary_ispec,
                                        (mp->d_num_abs_boundary_faces)*sizeof(int),
                                        cudaMemcpyHostToDevice),1102);
-    
-    print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_ijk,
+
+    // daniel: check
+    //check_array_ispec(Mesh_pointer,1);
+
+
+    print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ijk),
                                        3*25*(mp->d_num_abs_boundary_faces)*sizeof(int)),1103);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ijk, h_abs_boundary_ijk,
                                        3*25*(mp->d_num_abs_boundary_faces)*sizeof(int),
                                        cudaMemcpyHostToDevice),1104);
-    
-    print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_normal,
-                                       3*25*(mp->d_num_abs_boundary_faces)*sizeof(int)),1105);
+
+    print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_normal),
+                                       3*25*(mp->d_num_abs_boundary_faces)*sizeof(float)),1105);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_normal, h_abs_boundary_normal,
-                                       3*25*(mp->d_num_abs_boundary_faces)*sizeof(int),
+                                       3*25*(mp->d_num_abs_boundary_faces)*sizeof(float),
                                        cudaMemcpyHostToDevice),1106);
-    
-    print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_jacobian2Dw,
+
+    print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_jacobian2Dw),
                                        25*(mp->d_num_abs_boundary_faces)*sizeof(float)),1107);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_jacobian2Dw, h_abs_boundary_jacobian2Dw,
                                        25*(mp->d_num_abs_boundary_faces)*sizeof(float),
-                                       cudaMemcpyHostToDevice),1108);  
+                                       cudaMemcpyHostToDevice),1108);
   }
-  
+
   // sources
+  mp->nsources_local = *nsources_local_f;
   if (*SIMULATION_TYPE == 1  || *SIMULATION_TYPE == 3){
     // not needed in case of pure adjoint simulations (SIMULATION_TYPE == 2)
-    print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_sourcearrays, sizeof(float)* *NSOURCES*3*125),1301);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_sourcearrays, h_sourcearrays, sizeof(float)* *NSOURCES*3*125,
-                                       cudaMemcpyHostToDevice),1302);
+    print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_sourcearrays,
+                                       sizeof(float)* *NSOURCES*3*125),1301);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_sourcearrays, h_sourcearrays,
+                                       sizeof(float)* *NSOURCES*3*125,cudaMemcpyHostToDevice),1302);
 
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_stf_pre_compute),
+    print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_stf_pre_compute,
                                        *NSOURCES*sizeof(double)),1303);
   }
 
-  print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_source, sizeof(int) * *NSOURCES),1401);
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_source, h_islice_selected_source, sizeof(int)* *NSOURCES,
-				     cudaMemcpyHostToDevice),1402);
+  print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_source,
+                                     sizeof(int) * *NSOURCES),1401);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_source, h_islice_selected_source,
+                                     sizeof(int)* *NSOURCES,cudaMemcpyHostToDevice),1402);
 
-  print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_ispec_selected_source, sizeof(int)* *NSOURCES),1403);
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_source, h_ispec_selected_source,sizeof(int)* *NSOURCES,
-				     cudaMemcpyHostToDevice),1404);
+  print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_ispec_selected_source,
+                                     sizeof(int)* *NSOURCES),1403);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_source, h_ispec_selected_source,
+                                     sizeof(int)* *NSOURCES,cudaMemcpyHostToDevice),1404);
 
-  
-  // transfer constant element data with padding
-  for(int i=0;i<*NSPEC_AB;i++) {
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_xix + i*128, &h_xix[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1501);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_xiy+i*128,   &h_xiy[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1502);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_xiz+i*128,   &h_xiz[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1503);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_etax+i*128,  &h_etax[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1504);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_etay+i*128,  &h_etay[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1505);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_etaz+i*128,  &h_etaz[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1506);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_gammax+i*128,&h_gammax[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1507);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_gammay+i*128,&h_gammay[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1508);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaz+i*128,&h_gammaz[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1509);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_kappav+i*128,&h_kappav[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1510);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_muv+i*128,   &h_muv[i*125],
-				       125*sizeof(float),cudaMemcpyHostToDevice),1511);
-      
-  }
-        
 
   // receiver stations
   int nrec = *nrec_f; // total number of receivers
-  int nrec_local = *nrec_local_f;	// number of receiver located in this partition
+  mp->nrec_local = *nrec_local_f; // number of receiver located in this partition
+  //int nrec_local = *nrec_local_f;
   // note that:
   // size(number_receiver_global) = nrec_local
-  // size(ispec_selected_rec) = nrec  
-  mp->nrec_local = nrec_local;
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_number_receiver_global),nrec_local*sizeof(int)),1);
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_number_receiver_global,h_number_receiver_global,
-                                     nrec_local*sizeof(int),cudaMemcpyHostToDevice),1512);  
-  
+  // size(ispec_selected_rec) = nrec
+  if( mp->nrec_local > 0 ){
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_number_receiver_global),mp->nrec_local*sizeof(int)),1);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_number_receiver_global,h_number_receiver_global,
+                                     mp->nrec_local*sizeof(int),cudaMemcpyHostToDevice),1512);
+  }
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_selected_rec),nrec*sizeof(int)),1513);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_rec,h_ispec_selected_rec,
-                                     nrec*sizeof(int),cudaMemcpyHostToDevice),1514);  
+                                     nrec*sizeof(int),cudaMemcpyHostToDevice),1514);
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("prepare_constants_device");
-#endif            
+#endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -455,30 +659,30 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(prepare_adjoint_sim2_or_3_constants_device,
-              PREPARE_ADJOINT_SIM2_OR_3_CONSTANTS_DEVICE)(
-                                                          long* Mesh_pointer_f,
-                                                          int* islice_selected_rec,
-                                                          int* islice_selected_rec_size) {
-  
-TRACE("prepare_adjoint_sim2_or_3_constants_device");
-  
+extern "C"
+void FC_FUNC_(prepare_sim2_or_3_const_device,
+              PREPARE_SIM2_OR_3_CONST_DEVICE)(
+                                              long* Mesh_pointer_f,
+                                              int* islice_selected_rec,
+                                              int* islice_selected_rec_size) {
+
+TRACE("prepare_sim2_or_3_const_device");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-  
+
   // allocates arrays for receivers
   print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_rec,
                                      *islice_selected_rec_size*sizeof(int)),7001);
-  
+
   // copies arrays to GPU device
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_rec,islice_selected_rec, 
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_rec,islice_selected_rec,
                                      *islice_selected_rec_size*sizeof(int),cudaMemcpyHostToDevice),7002);
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING       
-  exit_on_cuda_error("prepare_adjoint_sim2_or_3_constants_device");  
-#endif  
-}  
 
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("prepare_sim2_or_3_const_device");
+#endif
+}
+
 /* ----------------------------------------------------------------------------------------------- */
 
 // for ACOUSTIC simulations
@@ -487,11 +691,11 @@
 
 extern "C"
 void FC_FUNC_(prepare_fields_acoustic_device,
-              PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f, 
-                                              float* rmass_acoustic, 
+              PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
+                                              float* rmass_acoustic,
                                               float* rhostore,
                                               float* kappastore,
-                                              int* num_phase_ispec_acoustic, 
+                                              int* num_phase_ispec_acoustic,
                                               int* phase_ispec_inner_acoustic,
                                               int* ispec_is_acoustic,
                                               int* NOISE_TOMOGRAPHY,
@@ -501,255 +705,281 @@
                                               int* ABSORBING_CONDITIONS,
                                               int* b_reclen_potential,
                                               float* b_absorb_potential,
-                                              int* SIMULATION_TYPE,
-                                              float* rho_ac_kl,
-                                              float* kappa_ac_kl,
                                               int* ELASTIC_SIMULATION,
                                               int* num_coupling_ac_el_faces,
                                               int* coupling_ac_el_ispec,
                                               int* coupling_ac_el_ijk,
                                               float* coupling_ac_el_normal,
-                                              float* coupling_ac_el_jacobian2Dw                                              
+                                              float* coupling_ac_el_jacobian2Dw
                                               ) {
-  
+
   TRACE("prepare_fields_acoustic_device");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f);
   /* Assuming NGLLX==5. Padded is then 128 (5^3+3) */
   int size_padded = 128 * mp->NSPEC_AB;
   int size_nonpadded = 125 * mp->NSPEC_AB;
   int size = mp->NGLOB_AB;
-  
+
   // allocates arrays on device (GPU)
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_acoustic),sizeof(float)*size),9001);
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_acoustic),sizeof(float)*size),9002);
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_dot_acoustic),sizeof(float)*size),9003);
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_potential_dot_dot_buffer),sizeof(float)*size),9004);
-  
+
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(float)*size),9005);
-  
+
   // padded array
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rhostore),size_padded*sizeof(float)),9006); 
-  
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rhostore),size_padded*sizeof(float)),9006);
+
   // non-padded array
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_nonpadded*sizeof(float)),9007); 
-  
-  mp->num_phase_ispec_acoustic = *num_phase_ispec_acoustic;
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic), mp->num_phase_ispec_acoustic*2*sizeof(int)),9008);
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic),mp->NSPEC_AB*sizeof(int)),9009);
-  
-  // free surface
-  if( *NOISE_TOMOGRAPHY == 0 ){
-    // allocate surface arrays
-    mp->num_free_surface_faces = *num_free_surface_faces;
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),mp->num_free_surface_faces*sizeof(int)),9201);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),3*25*mp->num_free_surface_faces*sizeof(int)),9202);
-    
-    // transfers values onto GPU
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
-                                       mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9203);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
-                                       3*25*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9204);    
-  }
-  
-  // absorbing boundaries
-  if( *ABSORBING_CONDITIONS == 1 ){
-    mp->d_b_reclen_potential = *b_reclen_potential;
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),9301); 
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential,
-                                       mp->d_b_reclen_potential,cudaMemcpyHostToDevice),9302);    
-  }
-  
-  // kernel simulations
-  if( *SIMULATION_TYPE == 3 ){
-    // allocates backward/reconstructed arrays on device (GPU)
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_acoustic),sizeof(float)*size),9014);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_acoustic),sizeof(float)*size),9015);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_dot_acoustic),sizeof(float)*size),9016);    
-    
-    // allocates kernels  
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_ac_kl),125*mp->NSPEC_AB*sizeof(float)),9017);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_ac_kl),125*mp->NSPEC_AB*sizeof(float)),9018);
-    // copies over initial values
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_ac_kl,rho_ac_kl, 
-                                       125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),9019);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_kappa_ac_kl,kappa_ac_kl, 
-                                       125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),9020);
-    
-  }
-  
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_nonpadded*sizeof(float)),9007);
+
   // transfer element data
   print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_acoustic,rmass_acoustic,
                                      sizeof(float)*size,cudaMemcpyHostToDevice),9100);
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic, 
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic,
                                      mp->num_phase_ispec_acoustic*2*sizeof(int),cudaMemcpyHostToDevice),9101);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_acoustic,ispec_is_acoustic,
                                      mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),9102);
-                                     
+
   print_CUDA_error_if_any(cudaMemcpy(mp->d_kappastore,kappastore,
                                      size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),9105);
-  
+
   // transfer constant element data with padding
-  for(int i=0;i<mp->NSPEC_AB;i++) {  
+  for(int i=0; i < mp->NSPEC_AB; i++) {
     print_CUDA_error_if_any(cudaMemcpy(mp->d_rhostore+i*128, &rhostore[i*125],
                                        125*sizeof(float),cudaMemcpyHostToDevice),9106);
   }
-  
+
+  // phase elements
+  mp->num_phase_ispec_acoustic = *num_phase_ispec_acoustic;
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic),
+                                      mp->num_phase_ispec_acoustic*2*sizeof(int)),9008);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic),
+                                     mp->NSPEC_AB*sizeof(int)),9009);
+
+  // free surface
+  if( *NOISE_TOMOGRAPHY == 0 &&  *ELASTIC_SIMULATION == 0){
+    // allocate surface arrays
+    mp->num_free_surface_faces = *num_free_surface_faces;
+    if( mp->num_free_surface_faces > 0 ){
+      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
+                                       mp->num_free_surface_faces*sizeof(int)),9201);
+      print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
+                                       mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9203);
+
+      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
+                                       3*25*mp->num_free_surface_faces*sizeof(int)),9202);
+      print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+                                       3*25*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9204);
+    }
+  }
+
+  // absorbing boundaries
+  if( *ABSORBING_CONDITIONS ){
+    mp->d_b_reclen_potential = *b_reclen_potential;
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),9301);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential,
+                                       mp->d_b_reclen_potential,cudaMemcpyHostToDevice),9302);
+  }
+
+
   // for seismograms
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_potential),
-                                     mp->nrec_local*125*sizeof(float)),9107);
-  mp->h_station_seismo_potential = (float*)malloc(mp->nrec_local*125*sizeof(float));
-  if( mp->h_station_seismo_potential == NULL) exit_on_error("error allocating h_station_seismo_potential");
+  if( mp->nrec_local > 0 ){
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_potential),
+                                       mp->nrec_local*125*sizeof(float)),9107);
+    mp->h_station_seismo_potential = (float*)malloc(mp->nrec_local*125*sizeof(float));
+    if( mp->h_station_seismo_potential == NULL) exit_on_error("error allocating h_station_seismo_potential");
+  }
 
 
   // coupling with elastic parts
-  if( *ELASTIC_SIMULATION == 1 && *num_coupling_ac_el_faces > 0 ){
+  if( *ELASTIC_SIMULATION && *num_coupling_ac_el_faces > 0 ){
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ispec),
-                                       (*num_coupling_ac_el_faces)*sizeof(int)),9601); 
+                                       (*num_coupling_ac_el_faces)*sizeof(int)),9601);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ispec,coupling_ac_el_ispec,
-                                       (*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),9602);    
+                                       (*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),9602);
 
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ijk),
-                                       3*25*(*num_coupling_ac_el_faces)*sizeof(int)),9603); 
+                                       3*25*(*num_coupling_ac_el_faces)*sizeof(int)),9603);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ijk,coupling_ac_el_ijk,
-                                       3*25*(*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),9604);    
+                                       3*25*(*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),9604);
 
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_normal),
-                                        3*25*(*num_coupling_ac_el_faces)*sizeof(float)),9605); 
+                                        3*25*(*num_coupling_ac_el_faces)*sizeof(float)),9605);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_normal,coupling_ac_el_normal,
-                                        3*25*(*num_coupling_ac_el_faces)*sizeof(float),cudaMemcpyHostToDevice),9606);    
+                                        3*25*(*num_coupling_ac_el_faces)*sizeof(float),cudaMemcpyHostToDevice),9606);
 
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_jacobian2Dw),
-                                        25*(*num_coupling_ac_el_faces)*sizeof(float)),9607);  
+                                        25*(*num_coupling_ac_el_faces)*sizeof(float)),9607);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_jacobian2Dw,coupling_ac_el_jacobian2Dw,
                                         25*(*num_coupling_ac_el_faces)*sizeof(float),cudaMemcpyHostToDevice),9608);
-  
+
   }
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING       
-  exit_on_cuda_error("prepare_fields_acoustic_device");  
-#endif    
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("prepare_fields_acoustic_device");
+#endif
 }
 
 
 /* ----------------------------------------------------------------------------------------------- */
 
+extern "C"
+void FC_FUNC_(prepare_fields_acoustic_adj_dev,
+              PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
+                                              int* SIMULATION_TYPE,
+                                              float* rho_ac_kl,
+                                              float* kappa_ac_kl,
+                                              int* APPROXIMATE_HESS_KL) {
+
+  TRACE("prepare_fields_acoustic_adj_dev");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+  int size = mp->NGLOB_AB;
+
+  // kernel simulations
+  if( *SIMULATION_TYPE != 3 ) return;
+
+  // allocates backward/reconstructed arrays on device (GPU)
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_acoustic),sizeof(float)*size),9014);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_acoustic),sizeof(float)*size),9015);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_dot_acoustic),sizeof(float)*size),9016);
+
+  // allocates kernels
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_ac_kl),125*mp->NSPEC_AB*sizeof(float)),9017);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_ac_kl),125*mp->NSPEC_AB*sizeof(float)),9018);
+
+  // copies over initial values
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_ac_kl,rho_ac_kl,
+                                     125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),9019);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_kappa_ac_kl,kappa_ac_kl,
+                                     125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),9020);
+
+
+  if( *APPROXIMATE_HESS_KL ){
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hess_ac_kl),125*mp->NSPEC_AB*sizeof(float)),9030);
+    // initializes with zeros
+    print_CUDA_error_if_any(cudaMemset(mp->d_hess_ac_kl,0,
+                                       125*mp->NSPEC_AB*sizeof(float)),9031);
+  }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("prepare_fields_acoustic_adj_dev");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
 // for ELASTIC simulations
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
 void FC_FUNC_(prepare_fields_elastic_device,
-              PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f, 
+              PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
                                              int* size,
                                              float* rmass,
-                                             float* rho_vp,  
+                                             float* rho_vp,
                                              float* rho_vs,
-                                             int* num_phase_ispec_elastic, 
+                                             int* num_phase_ispec_elastic,
                                              int* phase_ispec_inner_elastic,
                                              int* ispec_is_elastic,
                                              int* ABSORBING_CONDITIONS,
                                              float* h_b_absorb_field,
                                              int* h_b_reclen_field,
-                                             int* SIMULATION_TYPE,
-                                             float* rho_kl,
-                                             float* mu_kl,
-                                             float* kappa_kl,
+                                             int* SIMULATION_TYPE,int* SAVE_FORWARD,
                                              int* COMPUTE_AND_STORE_STRAIN,
                                              float* epsilondev_xx,float* epsilondev_yy,float* epsilondev_xy,
                                              float* epsilondev_xz,float* epsilondev_yz,
-                                             float* epsilon_trace_over_3,
-                                             float* b_epsilondev_xx,float* b_epsilondev_yy,float* b_epsilondev_xy,
-                                             float* b_epsilondev_xz,float* b_epsilondev_yz,
-                                             float* b_epsilon_trace_over_3,
-                                             int* ATTENUATION, int* R_size,
+                                             int* ATTENUATION,
+                                             int* R_size,
                                              float* R_xx,float* R_yy,float* R_xy,float* R_xz,float* R_yz,
-                                             float* b_R_xx,float* b_R_yy,float* b_R_xy,float* b_R_xz,float* b_R_yz,
                                              float* one_minus_sum_beta,float* factor_common,
                                              float* alphaval,float* betaval,float* gammaval,
-                                             float* b_alphaval,float* b_betaval,float* b_gammaval,
-                                             int* OCEANS,float* rmass_ocean_load,
-                                             float* free_surface_normal,int* num_free_surface_faces){
-  
+                                             int* OCEANS,
+                                             float* rmass_ocean_load,
+                                             int* NOISE_TOMOGRAPHY,
+                                             float* free_surface_normal,
+                                             int* free_surface_ispec,
+                                             int* free_surface_ijk,
+                                             int* num_free_surface_faces){
+
 TRACE("prepare_fields_elastic_device");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-  /* Assuming NGLLX==5. Padded is then 128 (5^3+3) */  
+  /* Assuming NGLLX==5. Padded is then 128 (5^3+3) */
   //int size_padded = 128 * mp->NSPEC_AB;
   int size_nonpadded = 125 * mp->NSPEC_AB;
-  
+
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ),sizeof(float)*(*size)),8001);
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc),sizeof(float)*(*size)),8002);
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel),sizeof(float)*(*size)),8003);
+
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer),sizeof(float)*(*size)),8004);
-  
+
+  // mass matrix
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass),sizeof(float)*mp->NGLOB_AB),8005);
-  
-  // non-padded arrays
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vp),size_nonpadded*sizeof(float)),8006); 
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vs),size_nonpadded*sizeof(float)),8007); 
-  
+  // transfer element data
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,rmass,
+                                     sizeof(float)*mp->NGLOB_AB,cudaMemcpyHostToDevice),8010);
+
+
   // element indices
-  mp->d_num_phase_ispec_elastic = *num_phase_ispec_elastic;
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_elastic), 
-                                     mp->d_num_phase_ispec_elastic*2*sizeof(int)),8008);
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_elastic),mp->NSPEC_AB*sizeof(int)),8009);
-  
-  // transfer element data
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,rmass,
-                                     sizeof(float)*mp->NGLOB_AB,cudaMemcpyHostToDevice),8010);  
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic, 
-                                     mp->d_num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),8011);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_elastic,ispec_is_elastic,
                                      mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),8012);
-  
-  // daniel: not sure if rho_vp, rho_vs needs padding... they are needed for stacey boundary condition
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp, rho_vp,
-                                     size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),8013);
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs, rho_vs,
-                                     size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),8014);
-  
+
+  // daniel: check
+  //check_ispec_is(Mesh_pointer_f,1);
+
+  // phase elements
+  mp->num_phase_ispec_elastic = *num_phase_ispec_elastic;
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_elastic),
+                                     mp->num_phase_ispec_elastic*2*sizeof(int)),8008);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic,
+                                     mp->num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),8011);
+
+  //daniel: check
+  //check_phase_ispec(Mesh_pointer_f,1);
+
   // for seismograms
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),
+  if( mp->nrec_local > 0 ){
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),
                                      3*125*(mp->nrec_local)*sizeof(float)),8015);
-  mp->h_station_seismo_field = (float*)malloc(3*125*(mp->nrec_local)*sizeof(float));
-  
+    mp->h_station_seismo_field = (float*)malloc(3*125*(mp->nrec_local)*sizeof(float));
+  }
+
   // absorbing conditions
-  if( *ABSORBING_CONDITIONS == 1 && mp->d_num_abs_boundary_faces > 0){
-    mp->d_b_reclen_field = *h_b_reclen_field;
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_field), 
+  if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0){
+    // non-padded arrays
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vp),size_nonpadded*sizeof(float)),8006);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vs),size_nonpadded*sizeof(float)),8007);
+
+    // rho_vp, rho_vs non-padded; they are needed for stacey boundary condition
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp, rho_vp,
+                                       size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),8013);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs, rho_vs,
+                                       size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),8014);
+
+    // absorb_field array used for file i/o
+    if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD )){
+      mp->d_b_reclen_field = *h_b_reclen_field;
+      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_field),
                                        mp->d_b_reclen_field),8016);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field,
+      print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field,
                                        mp->d_b_reclen_field,cudaMemcpyHostToDevice),8017);
+    }
   }
 
-  // kernel simulations
-  if( *SIMULATION_TYPE == 3 ){
-    // allocates backward/reconstructed arrays on device (GPU)
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ),sizeof(float)*(*size)),8201);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc),sizeof(float)*(*size)),8202);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel),sizeof(float)*(*size)),8203);
-    
-    // allocates kernels  
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl),125*mp->NSPEC_AB*sizeof(float)),8204);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_mu_kl),125*mp->NSPEC_AB*sizeof(float)),8205);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_kl),125*mp->NSPEC_AB*sizeof(float)),8206);
-    
-    // copies over initial values
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_kl,rho_kl, 
-                                       125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8207);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_mu_kl,mu_kl, 
-                                       125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8208);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_kappa_kl,kappa_kl, 
-                                       125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8209);
-    
-  }
-  
   // strains used for attenuation and kernel simulations
-  if( *COMPUTE_AND_STORE_STRAIN == 1 ){
+  if( *COMPUTE_AND_STORE_STRAIN ){
     // strains
     int epsilondev_size = 125*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing
-    
+
     print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xx,
                                        epsilondev_size*sizeof(float)),8301);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size*sizeof(float),
@@ -770,45 +1000,11 @@
                                        epsilondev_size*sizeof(float)),8308);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size*sizeof(float),
                                        cudaMemcpyHostToDevice),8309);
-    
-    if( *SIMULATION_TYPE == 3 ){  
-      // solid pressure
-      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3),
-                                         125*mp->NSPEC_AB*sizeof(float)),8310);
-      print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3,
-                                         125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8311);
-      // backward solid pressure
-      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3),
-                                         125*mp->NSPEC_AB*sizeof(float)),8312);
-      print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3,
-                                         125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8313);
-      // prepares backward strains
-      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx),
-                                         epsilondev_size*sizeof(float)),8321);
-      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy),
-                                         epsilondev_size*sizeof(float)),8322);
-      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy),
-                                         epsilondev_size*sizeof(float)),8323);
-      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz),
-                                         epsilondev_size*sizeof(float)),8324);
-      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz),
-                                         epsilondev_size*sizeof(float)),8325);
 
-      print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,
-                                         epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8326);
-      print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,
-                                         epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8327);
-      print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,
-                                         epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8328);
-      print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,
-                                         epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8329);
-      print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,
-                                         epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8330);            
-    }
   }
-  
+
   // attenuation memory variables
-  if( *ATTENUATION == 1 ){
+  if( *ATTENUATION ){
     // memory arrays
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xx),
                                        (*R_size)*sizeof(float)),8401);
@@ -833,39 +1029,13 @@
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yz),
                                        (*R_size)*sizeof(float)),8409);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz,R_yz,(*R_size)*sizeof(float),
-                                       cudaMemcpyHostToDevice),8410);    
-    if( *SIMULATION_TYPE == 3 ){
-        print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xx),
-                                           (*R_size)*sizeof(float)),8421);
-        print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xx,b_R_xx,(*R_size)*sizeof(float),
-                                           cudaMemcpyHostToDevice),8422);
-        
-        print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yy),
-                                           (*R_size)*sizeof(float)),8423);
-        print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yy,b_R_yy,(*R_size)*sizeof(float),
-                                           cudaMemcpyHostToDevice),8424);
-        
-        print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xy),
-                                           (*R_size)*sizeof(float)),8425);
-        print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xy,b_R_xy,(*R_size)*sizeof(float),
-                                           cudaMemcpyHostToDevice),8426);
-        
-        print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xz),
-                                           (*R_size)*sizeof(float)),8427);
-        print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xz,b_R_xz,(*R_size)*sizeof(float),
-                                           cudaMemcpyHostToDevice),8428);
-        
-        print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yz),
-                                           (*R_size)*sizeof(float)),8429);
-        print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yz,b_R_yz,(*R_size)*sizeof(float),
-                                           cudaMemcpyHostToDevice),8420);              
-    }
-    
+                                       cudaMemcpyHostToDevice),8410);
+
     // attenuation factors
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_one_minus_sum_beta),
                                        125*mp->NSPEC_AB*sizeof(float)),8430);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta ,one_minus_sum_beta,
-                                       125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8431);          
+                                       125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8431);
 
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_factor_common),
                                        N_SLS*125*mp->NSPEC_AB*sizeof(float)),8432);
@@ -877,52 +1047,196 @@
                                        N_SLS*sizeof(float)),8434);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_alphaval ,alphaval,
                                        N_SLS*sizeof(float),cudaMemcpyHostToDevice),8435);
-                                       
+
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_betaval),
                                        N_SLS*sizeof(float)),8436);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_betaval ,betaval,
                                        N_SLS*sizeof(float),cudaMemcpyHostToDevice),8437);
-                                       
+
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_gammaval),
                                        N_SLS*sizeof(float)),8438);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaval ,gammaval,
                                        N_SLS*sizeof(float),cudaMemcpyHostToDevice),8439);
-    
-    if( *SIMULATION_TYPE == 3 ){
-      // alpha,beta,gamma factors for backward fields
-      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_alphaval),
-                                         N_SLS*sizeof(float)),8434);
-      print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval,
-                                         N_SLS*sizeof(float),cudaMemcpyHostToDevice),8435);
-      
-      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_betaval),
-                                         N_SLS*sizeof(float)),8436);
-      print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval,
-                                         N_SLS*sizeof(float),cudaMemcpyHostToDevice),8437);
-      
-      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_gammaval),
-                                         N_SLS*sizeof(float)),8438);
-      print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval,
-                                         N_SLS*sizeof(float),cudaMemcpyHostToDevice),8439);
+
+  }
+
+  if( *OCEANS ){
+    // oceans needs a free surface
+    mp->num_free_surface_faces = *num_free_surface_faces;
+    if( mp->num_free_surface_faces > 0 ){
+      // mass matrix
+      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load),
+                                         sizeof(float)*mp->NGLOB_AB),8501);
+      print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,rmass_ocean_load,
+                                         sizeof(float)*mp->NGLOB_AB,cudaMemcpyHostToDevice),8502);
+      // surface normal
+      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_normal),
+                                         3*25*(mp->num_free_surface_faces)*sizeof(float)),8503);
+      print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_normal,free_surface_normal,
+                                         3*25*(mp->num_free_surface_faces)*sizeof(float),cudaMemcpyHostToDevice),8504);
+
+      // temporary global array: used to synchronize updates on global accel array
+      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_updated_dof_ocean_load),
+                                         sizeof(int)*mp->NGLOB_AB),8505);
+
+      if( *NOISE_TOMOGRAPHY == 0){
+        print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
+                                          mp->num_free_surface_faces*sizeof(int)),9201);
+        print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
+                                          mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9203);
+        print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
+                                          3*25*mp->num_free_surface_faces*sizeof(int)),9202);
+        print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+                                          3*25*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9204);
+      }
     }
   }
 
-  if( *OCEANS == 1 ){
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load),sizeof(float)*mp->NGLOB_AB),8501);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,rmass_ocean_load,
-                                       sizeof(float)*mp->NGLOB_AB,cudaMemcpyHostToDevice),8502);    
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("prepare_fields_elastic_device");
+#endif
+}
 
-    mp->num_free_surface_faces = *num_free_surface_faces;
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_normal),
-                                       3*25*(mp->num_free_surface_faces)*sizeof(float)),8503); 
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_normal,free_surface_normal,
-                                       3*25*(mp->num_free_surface_faces)*sizeof(float),cudaMemcpyHostToDevice),8504);    
-    
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_fields_elastic_adj_dev,
+              PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
+                                             int* size,
+                                             int* SIMULATION_TYPE,
+                                             float* rho_kl,
+                                             float* mu_kl,
+                                             float* kappa_kl,
+                                             int* COMPUTE_AND_STORE_STRAIN,
+                                             float* epsilon_trace_over_3,
+                                             float* b_epsilondev_xx,float* b_epsilondev_yy,float* b_epsilondev_xy,
+                                             float* b_epsilondev_xz,float* b_epsilondev_yz,
+                                             float* b_epsilon_trace_over_3,
+                                             int* ATTENUATION,
+                                             int* R_size,
+                                             float* b_R_xx,float* b_R_yy,float* b_R_xy,float* b_R_xz,float* b_R_yz,
+                                             float* b_alphaval,float* b_betaval,float* b_gammaval,
+                                             int* APPROXIMATE_HESS_KL){
+
+  TRACE("prepare_fields_elastic_adj_dev");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+  // checks if kernel simulation
+  if( *SIMULATION_TYPE != 3 ) return;
+
+  // kernel simulations
+  // allocates backward/reconstructed arrays on device (GPU)
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ),sizeof(float)*(*size)),8201);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc),sizeof(float)*(*size)),8202);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel),sizeof(float)*(*size)),8203);
+
+  // allocates kernels
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl),125*mp->NSPEC_AB*sizeof(float)),8204);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_mu_kl),125*mp->NSPEC_AB*sizeof(float)),8205);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_kl),125*mp->NSPEC_AB*sizeof(float)),8206);
+
+  // copies over initial values
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_kl,rho_kl,
+                                     125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8207);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_mu_kl,mu_kl,
+                                     125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8208);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_kappa_kl,kappa_kl,
+                                     125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8209);
+
+  // strains used for attenuation and kernel simulations
+  if( *COMPUTE_AND_STORE_STRAIN ){
+    // strains
+    int epsilondev_size = 125*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing
+
+    // solid pressure
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3),
+                                       125*mp->NSPEC_AB*sizeof(float)),8310);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3,
+                                       125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8311);
+    // backward solid pressure
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3),
+                                       125*mp->NSPEC_AB*sizeof(float)),8312);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3,
+                                       125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8313);
+    // prepares backward strains
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx),
+                                       epsilondev_size*sizeof(float)),8321);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy),
+                                       epsilondev_size*sizeof(float)),8322);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy),
+                                       epsilondev_size*sizeof(float)),8323);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz),
+                                       epsilondev_size*sizeof(float)),8324);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz),
+                                       epsilondev_size*sizeof(float)),8325);
+
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,
+                                       epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8326);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,
+                                       epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8327);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,
+                                       epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8328);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,
+                                       epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8329);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,
+                                       epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8330);
   }
-  
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING       
-  exit_on_cuda_error("prepare_fields_elastic_device");  
-#endif    
+
+  // attenuation memory variables
+  if( *ATTENUATION ){
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xx),
+                                       (*R_size)*sizeof(float)),8421);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xx,b_R_xx,(*R_size)*sizeof(float),
+                                       cudaMemcpyHostToDevice),8422);
+
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yy),
+                                       (*R_size)*sizeof(float)),8423);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yy,b_R_yy,(*R_size)*sizeof(float),
+                                       cudaMemcpyHostToDevice),8424);
+
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xy),
+                                       (*R_size)*sizeof(float)),8425);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xy,b_R_xy,(*R_size)*sizeof(float),
+                                       cudaMemcpyHostToDevice),8426);
+
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xz),
+                                       (*R_size)*sizeof(float)),8427);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xz,b_R_xz,(*R_size)*sizeof(float),
+                                       cudaMemcpyHostToDevice),8428);
+
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yz),
+                                       (*R_size)*sizeof(float)),8429);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yz,b_R_yz,(*R_size)*sizeof(float),
+                                       cudaMemcpyHostToDevice),8420);
+
+    // alpha,beta,gamma factors for backward fields
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_alphaval),
+                                       N_SLS*sizeof(float)),8434);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval,
+                                       N_SLS*sizeof(float),cudaMemcpyHostToDevice),8435);
+
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_betaval),
+                                       N_SLS*sizeof(float)),8436);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval,
+                                       N_SLS*sizeof(float),cudaMemcpyHostToDevice),8437);
+
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_gammaval),
+                                       N_SLS*sizeof(float)),8438);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval,
+                                       N_SLS*sizeof(float),cudaMemcpyHostToDevice),8439);
+  }
+
+  if( *APPROXIMATE_HESS_KL ){
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hess_el_kl),125*mp->NSPEC_AB*sizeof(float)),8450);
+    // initializes with zeros
+    print_CUDA_error_if_any(cudaMemset(mp->d_hess_el_kl,0,
+                                       125*mp->NSPEC_AB*sizeof(float)),8451);
+  }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  exit_on_cuda_error("prepare_fields_elastic_adj_dev");
+#endif
 }
 
 
@@ -935,11 +1249,11 @@
 
 extern "C"
 void FC_FUNC_(prepare_fields_noise_device,
-              PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f, 
+              PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
                                            int* NSPEC_AB, int* NGLOB_AB,
-                                           int* free_surface_ispec,int* free_surface_ijk,
+                                           int* free_surface_ispec,
+                                           int* free_surface_ijk,
                                            int* num_free_surface_faces,
-                                           int* size_free_surface_ijk, 
                                            int* SIMULATION_TYPE,
                                            int* NOISE_TOMOGRAPHY,
                                            int* NSTEP,
@@ -951,36 +1265,36 @@
                                            float* free_surface_jacobian2Dw,
                                            float* Sigma_kl
                                            ) {
-  
+
   TRACE("prepare_fields_noise_device");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-  
+
   // free surface
   mp->num_free_surface_faces = *num_free_surface_faces;
-  
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec, 
-                                     *num_free_surface_faces*sizeof(int)),4001);
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec, 
-                                     *num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4002);
-  
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk, 
-                                     (*size_free_surface_ijk)*sizeof(int)),4003);
+
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec,
+                                     mp->num_free_surface_faces*sizeof(int)),4001);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec,
+                                     mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4002);
+
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk,
+                                     3*25*mp->num_free_surface_faces*sizeof(int)),4003);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
-                                     (*size_free_surface_ijk)*sizeof(int),cudaMemcpyHostToDevice),4004);
-  
+                                     3*25*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4004);
+
   // alloc storage for the surface buffer to be copied
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie, 
-                                     3*25*(*num_free_surface_faces)*sizeof(float)),4005);
-  
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie,
+                                     3*25*mp->num_free_surface_faces*sizeof(float)),4005);
+
   // prepares noise source array
   if( *NOISE_TOMOGRAPHY == 1 ){
     print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
-                                       3*125*(*NSTEP)*sizeof(float)),4101);  
+                                       3*125*(*NSTEP)*sizeof(float)),4101);
     print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_sourcearray, noise_sourcearray,
-                                       3*125*(*NSTEP)*sizeof(float),cudaMemcpyHostToDevice),4102);    
+                                       3*125*(*NSTEP)*sizeof(float),cudaMemcpyHostToDevice),4102);
   }
-  
+
   // prepares noise directions
   if( *NOISE_TOMOGRAPHY > 1 ){
     int nface_size = 25*(*num_free_surface_faces);
@@ -991,35 +1305,35 @@
                                        nface_size*sizeof(float)),4302);
     print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise,
                                        nface_size*sizeof(float)),4303);
-    print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise, 
-                                       nface_size*sizeof(float)),4304);    
+    print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise,
+                                       nface_size*sizeof(float)),4304);
     print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_free_surface_jacobian2Dw,
                                        nface_size*sizeof(float)),4305);
     // transfers data onto GPU
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise, 
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise,
                                        nface_size*sizeof(float),cudaMemcpyHostToDevice),4306);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise, 
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise,
                                        nface_size*sizeof(float),cudaMemcpyHostToDevice),4307);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise, 
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise,
                                        nface_size*sizeof(float),cudaMemcpyHostToDevice),4308);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise, 
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise,
                                        nface_size*sizeof(float),cudaMemcpyHostToDevice),4309);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw, 
-                                       nface_size*sizeof(float),cudaMemcpyHostToDevice),4310);    
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw,
+                                       nface_size*sizeof(float),cudaMemcpyHostToDevice),4310);
   }
-  
+
   // prepares noise strength kernel
   if( *NOISE_TOMOGRAPHY == 3 ){
     print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_Sigma_kl),
                                        125*(mp->NSPEC_AB)*sizeof(float)),4401);
-    print_CUDA_error_if_any(cudaMemcpy(mp->d_Sigma_kl, Sigma_kl, 
-                                       125*(mp->NSPEC_AB)*sizeof(float),cudaMemcpyHostToDevice),4403);  
-  }  
-  
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_Sigma_kl, Sigma_kl,
+                                       125*(mp->NSPEC_AB)*sizeof(float),cudaMemcpyHostToDevice),4403);
+  }
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //printf("jacobian_size = %d\n",25*(*num_free_surface_faces));
-  exit_on_cuda_error("prepare_fields_noise_device");  
-#endif    
+  exit_on_cuda_error("prepare_fields_noise_device");
+#endif
 }
 
 
@@ -1033,19 +1347,22 @@
 void FC_FUNC_(prepare_cleanup_device,
               PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f,
                                       int* SIMULATION_TYPE,
+                                      int* SAVE_FORWARD,
                                       int* ACOUSTIC_SIMULATION,
                                       int* ELASTIC_SIMULATION,
                                       int* ABSORBING_CONDITIONS,
                                       int* NOISE_TOMOGRAPHY,
                                       int* COMPUTE_AND_STORE_STRAIN,
-                                      int* ATTENUATION) {
-  
+                                      int* ATTENUATION,
+                                      int* OCEANS,
+                                      int* APPROXIMATE_HESS_KL) {
+
 TRACE("prepare_cleanup_device");
 
-  // frees allocated memory arrays  
+  // frees allocated memory arrays
   Mesh* mp = (Mesh*)(*Mesh_pointer_f);
 
-  // frees memory on GPU  
+  // frees memory on GPU
   // mesh
   cudaFree(mp->d_xix);
   cudaFree(mp->d_xiy);
@@ -1057,15 +1374,15 @@
   cudaFree(mp->d_gammay);
   cudaFree(mp->d_gammaz);
   cudaFree(mp->d_muv);
-  
+
   // absorbing boundaries
-  if( *ABSORBING_CONDITIONS == 1 && mp->d_num_abs_boundary_faces > 0 ){ 
+  if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0 ){
     cudaFree(mp->d_abs_boundary_ispec);
     cudaFree(mp->d_abs_boundary_ijk);
     cudaFree(mp->d_abs_boundary_normal);
     cudaFree(mp->d_abs_boundary_jacobian2Dw);
   }
-  
+
   // interfaces
   cudaFree(mp->d_nibool_interfaces_ext_mesh);
   cudaFree(mp->d_ibool_interfaces_ext_mesh);
@@ -1075,20 +1392,20 @@
   cudaFree(mp->d_ibool);
 
   // sources
-  if (*SIMULATION_TYPE == 1  || *SIMULATION_TYPE == 3){ 
+  if (*SIMULATION_TYPE == 1  || *SIMULATION_TYPE == 3){
     cudaFree(mp->d_sourcearrays);
-    cudaFree(mp->d_stf_pre_compute);    
+    cudaFree(mp->d_stf_pre_compute);
   }
-  
+
   cudaFree(mp->d_islice_selected_source);
   cudaFree(mp->d_ispec_selected_source);
 
   // receivers
-  cudaFree(mp->d_number_receiver_global);
+  if( mp->nrec_local > 0 ) cudaFree(mp->d_number_receiver_global);
   cudaFree(mp->d_ispec_selected_rec);
 
   // ACOUSTIC arrays
-  if( *ACOUSTIC_SIMULATION == 1 ){ 
+  if( *ACOUSTIC_SIMULATION ){
     cudaFree(mp->d_potential_acoustic);
     cudaFree(mp->d_potential_dot_acoustic);
     cudaFree(mp->d_potential_dot_dot_acoustic);
@@ -1098,42 +1415,54 @@
     cudaFree(mp->d_kappastore);
     cudaFree(mp->d_phase_ispec_inner_acoustic);
     cudaFree(mp->d_ispec_is_acoustic);
-    
-    if( *NOISE_TOMOGRAPHY == 0 ){ 
+
+    if( *NOISE_TOMOGRAPHY == 0 ){
       cudaFree(mp->d_free_surface_ispec);
       cudaFree(mp->d_free_surface_ijk);
     }
-    
-    if( *ABSORBING_CONDITIONS == 1 ) cudaFree(mp->d_b_absorb_potential);
-    
+
+    if( *ABSORBING_CONDITIONS ) cudaFree(mp->d_b_absorb_potential);
+
     if( *SIMULATION_TYPE == 3 ) {
       cudaFree(mp->d_b_potential_acoustic);
       cudaFree(mp->d_b_potential_dot_acoustic);
       cudaFree(mp->d_b_potential_dot_dot_acoustic);
       cudaFree(mp->d_rho_ac_kl);
       cudaFree(mp->d_kappa_ac_kl);
+      if( *APPROXIMATE_HESS_KL) cudaFree(mp->d_hess_ac_kl);
     }
-    
-    cudaFree(mp->d_station_seismo_potential);
 
-    free(mp->h_station_seismo_potential);
-  }
 
+    if(mp->nrec_local > 0 ){
+      cudaFree(mp->d_station_seismo_potential);
+      free(mp->h_station_seismo_potential);
+    }
+  } // ACOUSTIC_SIMULATION
+
   // ELASTIC arrays
-  if( *ELASTIC_SIMULATION == 1 ){ 
+  if( *ELASTIC_SIMULATION ){
     cudaFree(mp->d_displ);
     cudaFree(mp->d_veloc);
     cudaFree(mp->d_accel);
     cudaFree(mp->d_send_accel_buffer);
     cudaFree(mp->d_rmass);
-    cudaFree(mp->d_rho_vp);
-    cudaFree(mp->d_rho_vs);
+
     cudaFree(mp->d_phase_ispec_inner_elastic);
     cudaFree(mp->d_ispec_is_elastic);
-    cudaFree(mp->d_station_seismo_field);
-    
-    if( *ABSORBING_CONDITIONS == 1 && mp->d_num_abs_boundary_faces > 0) cudaFree(mp->d_b_absorb_field);
 
+    if( mp->nrec_local > 0 ){
+      cudaFree(mp->d_station_seismo_field);
+      free(mp->h_station_seismo_field);
+    }
+
+    if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0){
+      cudaFree(mp->d_rho_vp);
+      cudaFree(mp->d_rho_vs);
+
+      if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD ))
+          cudaFree(mp->d_b_absorb_field);
+    }
+
     if( *SIMULATION_TYPE == 3 ) {
       cudaFree(mp->d_b_displ);
       cudaFree(mp->d_b_veloc);
@@ -1141,26 +1470,27 @@
       cudaFree(mp->d_rho_kl);
       cudaFree(mp->d_mu_kl);
       cudaFree(mp->d_kappa_kl);
+      if( *APPROXIMATE_HESS_KL ) cudaFree(mp->d_hess_el_kl);
     }
 
-    if( *COMPUTE_AND_STORE_STRAIN == 1 ){  
+    if( *COMPUTE_AND_STORE_STRAIN ){
       cudaFree(mp->d_epsilondev_xx);
       cudaFree(mp->d_epsilondev_yy);
       cudaFree(mp->d_epsilondev_xy);
       cudaFree(mp->d_epsilondev_xz);
       cudaFree(mp->d_epsilondev_yz);
-      if( *SIMULATION_TYPE == 3 ){ 
+      if( *SIMULATION_TYPE == 3 ){
         cudaFree(mp->d_epsilon_trace_over_3);
         cudaFree(mp->d_b_epsilon_trace_over_3);
         cudaFree(mp->d_b_epsilondev_xx);
         cudaFree(mp->d_b_epsilondev_yy);
         cudaFree(mp->d_b_epsilondev_xy);
         cudaFree(mp->d_b_epsilondev_xz);
-        cudaFree(mp->d_b_epsilondev_yz);        
-      }    
+        cudaFree(mp->d_b_epsilondev_yz);
+      }
     }
-    
-    if( *ATTENUATION == 1 ){
+
+    if( *ATTENUATION ){
       cudaFree(mp->d_factor_common);
       cudaFree(mp->d_one_minus_sum_beta);
       cudaFree(mp->d_alphaval);
@@ -1171,7 +1501,7 @@
       cudaFree(mp->d_R_xy);
       cudaFree(mp->d_R_xz);
       cudaFree(mp->d_R_yz);
-      if( *SIMULATION_TYPE == 3){ 
+      if( *SIMULATION_TYPE == 3){
         cudaFree(mp->d_b_R_xx);
         cudaFree(mp->d_b_R_yy);
         cudaFree(mp->d_b_R_xy);
@@ -1182,13 +1512,25 @@
         cudaFree(mp->d_b_gammaval);
       }
     }
-  }
-  
+
+    if( *OCEANS ){
+      if( mp->num_free_surface_faces > 0 ){
+        cudaFree(mp->d_rmass_ocean_load);
+        cudaFree(mp->d_free_surface_normal);
+        cudaFree(mp->d_updated_dof_ocean_load);
+        if( *NOISE_TOMOGRAPHY == 0){
+          cudaFree(mp->d_free_surface_ispec);
+          cudaFree(mp->d_free_surface_ijk);
+        }
+      }
+    }
+  } // ELASTIC_SIMULATION
+
   // purely adjoint & kernel array
   if( *SIMULATION_TYPE == 2 || *SIMULATION_TYPE == 3 ) cudaFree(mp->d_islice_selected_rec);
-        
-  // NOISE arrays      
-  if( *NOISE_TOMOGRAPHY > 0 ){ 
+
+  // NOISE arrays
+  if( *NOISE_TOMOGRAPHY > 0 ){
     cudaFree(mp->d_free_surface_ispec);
     cudaFree(mp->d_free_surface_ijk);
     cudaFree(mp->d_noise_surface_movie);
@@ -1199,10 +1541,11 @@
       cudaFree(mp->d_normal_z_noise);
       cudaFree(mp->d_mask_noise);
       cudaFree(mp->d_free_surface_jacobian2Dw);
-    }  
+    }
     if( *NOISE_TOMOGRAPHY == 3 ) cudaFree(mp->d_Sigma_kl);
   }
-  
+
   // mesh pointer - not needed anymore
   free(mp);
-}
\ No newline at end of file
+}
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,7 +1,7 @@
 #include "config.h"
 #include <stdio.h>
 
-typedef float real;
+typedef float realw;
 
 /* from check_fields_cuda.cu */
 void FC_FUNC_(check_max_norm_displ_gpu,
@@ -33,30 +33,38 @@
 void FC_FUNC_(get_max_accel,
               GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer){}
 
-void FC_FUNC_(get_norm_acoustic_from_device_cuda,
-              GET_NORM_ACOUSTIC_FROM_DEVICE_CUDA)(float* norm, 
+void FC_FUNC_(get_norm_acoustic_from_device,
+              GET_NORM_ACOUSTIC_FROM_DEVICE)(float* norm, 
                                                   long* Mesh_pointer_f,
                                                   int* SIMULATION_TYPE){}
 
-void FC_FUNC_(get_norm_elastic_from_device_cuda,
-              GET_NORM_ELASTIC_FROM_DEVICE_CUDA)(float* norm, 
+void FC_FUNC_(get_norm_elastic_from_device,
+              GET_NORM_ELASTIC_FROM_DEVICE)(float* norm, 
                                                  long* Mesh_pointer_f,
                                                  int* SIMULATION_TYPE){}
 
 						
 /* from file compute_add_sources_cuda.cu */
 
-void FC_FUNC_(add_sourcearrays_adjoint_cuda,
-              ADD_SOURCEARRAYS_ADJOINT_CUDA)(long* Mesh_pointer,
-					     int* USE_FORCE_POINT_SOURCE,
-					     double* h_stf_pre_compute,int* NSOURCES,
-					     int* phase_is_inner,int* myrank){}
+void FC_FUNC_(compute_add_sources_el_cuda,
+              COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
+                                           int* NSPEC_ABf, int* NGLOB_ABf,
+                                           int* phase_is_innerf,int* NSOURCESf,
+                                           int* itf, float* dtf, float* t0f,
+                                           int* SIMULATION_TYPEf,int* NSTEPf,
+                                           int* NOISE_TOMOGRAPHYf,
+                                           int* USE_FORCE_POINT_SOURCEf,
+                                           double* h_stf_pre_compute, int* myrankf){}
 
-void FC_FUNC_(compute_add_sources_elastic_cuda,
-              COMPUTE_ADD_SOURCES_ELASTIC_CUDA)(){}
+void FC_FUNC_(compute_add_sources_el_s3_cuda,
+              COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
+                                              int* USE_FORCE_POINT_SOURCE,
+                                              double* h_stf_pre_compute,
+                                              int* NSOURCES,
+                                              int* phase_is_inner,int* myrank){}
 
-void FC_FUNC_(add_source_master_rec_noise_cuda,
-              ADD_SOURCE_MASTER_REC_NOISE_CUDA)(long* Mesh_pointer_f, 
+void FC_FUNC_(add_source_master_rec_noise_cu,
+              ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f, 
                                                 int* myrank_f,  
                                                 int* it_f, 
                                                 int* irec_master_noise_f, 
@@ -72,8 +80,8 @@
                                            int* h_islice_selected_rec,int* nadj_rec_local,
                                            int* NTSTEP_BETWEEN_READ_ADJSRC){}
 
-void FC_FUNC_(compute_add_sources_acoustic_cuda,
-              COMPUTE_ADD_SOURCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f, 
+void FC_FUNC_(compute_add_sources_ac_cuda,
+              COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f, 
                                                  int* phase_is_innerf,
                                                  int* NSOURCESf, 
                                                  int* SIMULATION_TYPEf,
@@ -81,8 +89,8 @@
                                                  double* h_stf_pre_compute, 
                                                  int* myrankf){}
 
-void FC_FUNC_(compute_add_sources_acoustic_sim3_cuda,
-              COMPUTE_ADD_SOURCES_ACOUSTIC_SIM3_CUDA)(long* Mesh_pointer_f, 
+void FC_FUNC_(compute_add_sources_ac_s3_cuda,
+              COMPUTE_ADD_SOURCES_AC_S3_CUDA)(long* Mesh_pointer_f, 
                                                       int* phase_is_innerf,
                                                       int* NSOURCESf, 
                                                       int* SIMULATION_TYPEf,
@@ -90,8 +98,8 @@
                                                       double* h_stf_pre_compute, 
                                                       int* myrankf){}
 
-void FC_FUNC_(add_sources_acoustic_sim_type_2_or_3_cuda,
-              ADD_SOURCES_ACOUSTIC_SIM_TYPE_2_OR_3_CUDA)(long* Mesh_pointer, 
+void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda,
+              ADD_SOURCES_AC_SIM_2_OR_3_CUDA)(long* Mesh_pointer, 
                                                          float* h_adj_sourcearrays,
                                                          int* size_adj_sourcearrays, 
                                                          int* phase_is_inner,
@@ -104,15 +112,15 @@
 
 /* from compute_coupling_cuda.cu */
 							
-void FC_FUNC_(compute_coupling_acoustic_el_cuda,
-              COMPUTE_COUPLING_ACOUSTIC_EL_CUDA)(
+void FC_FUNC_(compute_coupling_ac_el_cuda,
+              COMPUTE_COUPLING_AC_EL_CUDA)(
                                             long* Mesh_pointer_f, 
                                             int* phase_is_innerf, 
                                             int* num_coupling_ac_el_facesf, 
                                             int* SIMULATION_TYPEf){}
 
-void FC_FUNC_(compute_coupling_elastic_ac_cuda,
-              COMPUTE_COUPLING_ELASTIC_AC_CUDA)(
+void FC_FUNC_(compute_coupling_el_ac_cuda,
+              COMPUTE_COUPLING_EL_AC_CUDA)(
                                                  long* Mesh_pointer_f, 
                                                  int* phase_is_innerf, 
                                                  int* num_coupling_ac_el_facesf, 
@@ -120,8 +128,8 @@
 
 /* from compute_forces_acoustic_cuda.cu */
 
-void FC_FUNC_(transfer_boundary_potential_from_device,
-              TRANSFER_BOUNDARY_POTENTIAL_FROM_DEVICE)(
+void FC_FUNC_(transfer_boun_pot_from_device,
+              TRANSFER_BOUN_POT_FROM_DEVICE)(
                                               int* size, 
                                               long* Mesh_pointer_f, 
                                               float* potential_dot_dot_acoustic, 
@@ -132,11 +140,11 @@
                                               int* ibool_interfaces_ext_mesh,
                                               int* FORWARD_OR_ADJOINT){}
 
-void FC_FUNC_(transfer_and_assemble_potential_to_device,
-              TRANSFER_AND_ASSEMBLE_POTENTIAL_TO_DEVICE)(
+void FC_FUNC_(transfer_asmbl_pot_to_device,
+              TRANSFER_ASMBL_POT_TO_DEVICE)(
                                                 long* Mesh_pointer, 
-                                                real* potential_dot_dot_acoustic, 
-                                                real* buffer_recv_scalar_ext_mesh,
+                                                realw* potential_dot_dot_acoustic, 
+                                                realw* buffer_recv_scalar_ext_mesh,
                                                 int* num_interfaces_ext_mesh, 
                                                 int* max_nibool_interfaces_ext_mesh,
                                                 int* nibool_interfaces_ext_mesh, 
@@ -162,15 +170,15 @@
                                                              int* SIMULATION_TYPE, 
                                                              float* b_deltatover2_F){}
 
-void FC_FUNC_(acoustic_enforce_free_surface_cuda,
-              ACOUSTIC_ENFORCE_FREE_SURFACE_CUDA)(long* Mesh_pointer_f, 
+void FC_FUNC_(acoustic_enforce_free_surf_cuda,
+              ACOUSTIC_ENFORCE_FREE_SURF_CUDA)(long* Mesh_pointer_f, 
                                                   int* SIMULATION_TYPE,
                                                   int* ABSORB_FREE_SURFACE){}
 
 
 /* from compute_forces_elastic_cuda.cu */
-void FC_FUNC_(transfer_boundary_accel_from_device,
-              TRANSFER_BOUNDARY_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, float* accel,
+void FC_FUNC_(transfer_boun_accel_from_device,
+              TRANSFER_BOUN_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, float* accel,
 						   float* send_accel_buffer,
 						   int* num_interfaces_ext_mesh,
 						   int* max_nibool_interfaces_ext_mesh,
@@ -179,12 +187,13 @@
 						   int* FORWARD_OR_ADJOINT){}
 
 						  
-void FC_FUNC_(transfer_and_assemble_accel_to_device,
-              TRANSFER_AND_ASSEMBLE_ACCEL_TO_DEVICE)(long* Mesh_pointer, real* accel,
-                                                    real* buffer_recv_vector_ext_mesh,
-                                                    int* num_interfaces_ext_mesh,
-                                                    int* max_nibool_interfaces_ext_mesh,
-                                                    int* nibool_interfaces_ext_mesh,
+void FC_FUNC_(transfer_asmbl_accel_to_device,
+              TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer, 
+                                              realw* accel,
+                                              realw* buffer_recv_vector_ext_mesh,
+                                              int* num_interfaces_ext_mesh,
+                                              int* max_nibool_interfaces_ext_mesh,
+                                              int* nibool_interfaces_ext_mesh,
 						     int* ibool_interfaces_ext_mesh,int* FORWARD_OR_ADJOINT){}
 
 void FC_FUNC_(compute_forces_elastic_cuda,
@@ -192,8 +201,8 @@
                                            int* iphase,
                                            int* nspec_outer_elastic,
                                            int* nspec_inner_elastic,
+                                           int* SIMULATION_TYPE,
                                            int* COMPUTE_AND_STORE_STRAIN,
-                                           int* SIMULATION_TYPE,
                                            int* ATTENUATION){}
 
 void FC_FUNC_(kernel_3_a_cuda,
@@ -219,10 +228,10 @@
 				      
 void FC_FUNC_(compute_kernels_elastic_cuda,
               COMPUTE_KERNELS_ELASTIC_CUDA)(long* Mesh_pointer,
-                                            float* deltat){}
+                                            float* deltat_f){}
 
-void FC_FUNC_(compute_kernels_strength_noise_cuda,
-              COMPUTE_KERNELS_STRENGTH_NOISE_CUDA)(long* Mesh_pointer, 
+void FC_FUNC_(compute_kernels_strgth_noise_cu,
+              COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer, 
                                                     float* h_noise_surface_movie,
                                                     int* num_free_surface_faces_f,
 						   float* deltat){}
@@ -230,9 +239,12 @@
 void FC_FUNC_(compute_kernels_acoustic_cuda,
               COMPUTE_KERNELS_ACOUSTIC_CUDA)(
                                              long* Mesh_pointer, 
-                                             float* deltat){}
+                                             float* deltat_f){}
 
-
+void FC_FUNC_(compute_kernels_hess_cuda,
+              COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
+                                         float* deltat_f) {}
+                                         
 /* from file compute_stacey_acoustic_cuda.cu */
 void FC_FUNC_(compute_stacey_acoustic_cuda,
               COMPUTE_STACEY_ACOUSTIC_CUDA)(
@@ -252,10 +264,10 @@
                                            int* SAVE_FORWARDf,
                                            float* h_b_absorb_field){}
 
-/* from file it_update_displacement_scheme_cuda.cu */
+/* from file it_update_displacement_cuda.cu */
 					  
-void FC_FUNC_(it_update_displacement_scheme_cuda,
-              IT_UPDATE_DISPLACMENT_SCHEME_CUDA)(long* Mesh_pointer_f,
+void FC_FUNC_(it_update_displacement_cuda,
+              it_update_displacement_cuda)(long* Mesh_pointer_f,
                                                  int* size_F, 
                                                  float* deltat_F, 
                                                  float* deltatsqover2_F, 
@@ -265,8 +277,8 @@
                                                  float* b_deltatsqover2_F, 
                                                  float* b_deltatover2_F){}
 
-void FC_FUNC_(it_update_displacement_scheme_acoustic_cuda,
-              IT_UPDATE_DISPLACEMENT_SCHEME_ACOUSTIC_CUDA)(long* Mesh_pointer_f, 
+void FC_FUNC_(it_update_displacement_ac_cuda,
+              IT_UPDATE_DISPLACEMENT_AC_CUDA)(long* Mesh_pointer_f, 
                                                            int* size_F,
                                                            float* deltat_F, 
                                                            float* deltatsqover2_F, 
@@ -288,10 +300,15 @@
 void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,float* h_displ){}
 
 void FC_FUNC_(transfer_surface_to_host,
-              TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,real* h_noise_surface_movie,int* num_free_surface_faces){}
+              TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,
+                                        realw* h_noise_surface_movie,
+                                        int* num_free_surface_faces){}
 
-void FC_FUNC_(noise_read_add_surface_movie_cuda,
-              NOISE_READ_ADD_SURFACE_MOVIE_CUDA)(long* Mesh_pointer_f, real* h_noise_surface_movie, int* num_free_surface_faces_f,int* NOISE_TOMOGRAPHYf){}
+void FC_FUNC_(noise_read_add_surface_movie_cu,
+              NOISE_READ_ADD_SURFACE_MOVIE_CU)(long* Mesh_pointer_f, 
+                                               realw* h_noise_surface_movie, 
+                                               int* num_free_surface_faces_f,
+                                               int* NOISE_TOMOGRAPHYf){}
 
 						
 /* from file prepare_mesh_constants_cuda.cu						 */
@@ -327,6 +344,7 @@
                                         int* h_num_abs_boundary_faces,
                                         int* h_ispec_is_inner, 
                                         int* NSOURCES,
+                                        int* nsources_local,
                                         float* h_sourcearrays,
                                         int* h_islice_selected_source,
                                         int* h_ispec_selected_source,
@@ -334,17 +352,18 @@
                                         int* h_ispec_selected_rec,
                                         int* nrec_f,
                                         int* nrec_local_f,
-                                        int* SIMULATION_TYPE)
+                                        int* SIMULATION_TYPE,
+                                        int* ncuda_devices)
 {
   fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
   exit(1);
 }
 
-void FC_FUNC_(prepare_adjoint_sim2_or_3_constants_device,
-              PREPARE_ADJOINT_SIM2_OR_3_CONSTANTS_DEVICE)(
-                                                          long* Mesh_pointer_f,
-                                                          int* islice_selected_rec,
-                                                          int* islice_selected_rec_size){}
+void FC_FUNC_(prepare_sim2_or_3_const_device,
+              PREPARE_SIM2_OR_3_CONST_DEVICE)(
+                                              long* Mesh_pointer_f,
+                                              int* islice_selected_rec,
+                                              int* islice_selected_rec_size){}
 
 void FC_FUNC_(prepare_fields_acoustic_device,
               PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f, 
@@ -361,9 +380,6 @@
                                               int* ABSORBING_CONDITIONS,
                                               int* b_reclen_potential,
                                               float* b_absorb_potential,
-                                              int* SIMULATION_TYPE,
-                                              float* rho_ac_kl,
-                                              float* kappa_ac_kl,
                                               int* ELASTIC_SIMULATION,
                                               int* num_coupling_ac_el_faces,
                                               int* coupling_ac_el_ispec,
@@ -371,44 +387,67 @@
                                               float* coupling_ac_el_normal,
                                               float* coupling_ac_el_jacobian2Dw
                                               ){}							 
+
+void FC_FUNC_(prepare_fields_acoustic_adj_dev,
+              PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
+                                               int* SIMULATION_TYPE,
+                                               float* rho_ac_kl,
+                                               float* kappa_ac_kl,
+                                               int* APPROXIMATE_HESS_KL) {}                                              
 void FC_FUNC_(prepare_fields_elastic_device,
-              PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f, 
+              PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
                                              int* size,
                                              float* rmass,
-                                             float* rho_vp,  
+                                             float* rho_vp,
                                              float* rho_vs,
-                                             int* num_phase_ispec_elastic, 
+                                             int* num_phase_ispec_elastic,
                                              int* phase_ispec_inner_elastic,
                                              int* ispec_is_elastic,
                                              int* ABSORBING_CONDITIONS,
                                              float* h_b_absorb_field,
                                              int* h_b_reclen_field,
-                                             int* SIMULATION_TYPE,
-                                             float* rho_kl,
-                                             float* mu_kl,
-                                             float* kappa_kl,
+                                             int* SIMULATION_TYPE,int* SAVE_FORWARD,
                                              int* COMPUTE_AND_STORE_STRAIN,
                                              float* epsilondev_xx,float* epsilondev_yy,float* epsilondev_xy,
                                              float* epsilondev_xz,float* epsilondev_yz,
-                                             float* epsilon_trace_over_3,
-                                             float* b_epsilondev_xx,float* b_epsilondev_yy,float* b_epsilondev_xy,
-                                             float* b_epsilondev_xz,float* b_epsilondev_yz,
-                                             float* b_epsilon_trace_over_3,
-                                             int* ATTENUATION, int* R_size,
+                                             int* ATTENUATION, 
+                                             int* R_size,
                                              float* R_xx,float* R_yy,float* R_xy,float* R_xz,float* R_yz,
-                                             float* b_R_xx,float* b_R_yy,float* b_R_xy,float* b_R_xz,float* b_R_yz,
                                              float* one_minus_sum_beta,float* factor_common,
                                              float* alphaval,float* betaval,float* gammaval,
-                                             float* b_alphaval,float* b_betaval,float* b_gammaval,
-                                             int* OCEANS,float* rmass_ocean_load,
-                                             float* free_surface_normal,int* num_free_surface_faces){}
+                                             int* OCEANS,
+                                             float* rmass_ocean_load,
+                                             int* NOISE_TOMOGRAPHY,
+                                             float* free_surface_normal,
+                                             int* free_surface_ispec,
+                                             int* free_surface_ijk,                                             
+                                             int* num_free_surface_faces){}
+  
+void FC_FUNC_(prepare_fields_elastic_adj_dev,
+              PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
+                                              int* size,
+                                              int* SIMULATION_TYPE,
+                                              float* rho_kl,
+                                              float* mu_kl,
+                                              float* kappa_kl,
+                                              int* COMPUTE_AND_STORE_STRAIN,
+                                              float* epsilon_trace_over_3,                                             
+                                              float* b_epsilondev_xx,float* b_epsilondev_yy,float* b_epsilondev_xy,
+                                              float* b_epsilondev_xz,float* b_epsilondev_yz,
+                                              float* b_epsilon_trace_over_3,
+                                              int* ATTENUATION, 
+                                              int* R_size,
+                                              float* b_R_xx,float* b_R_yy,float* b_R_xy,float* b_R_xz,float* b_R_yz,
+                                              float* b_alphaval,float* b_betaval,float* b_gammaval,
+                                              int* APPROXIMATE_HESS_KL){}
+  
 
 void FC_FUNC_(prepare_fields_noise_device,
               PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f, 
                                            int* NSPEC_AB, int* NGLOB_AB,
-                                           int* free_surface_ispec,int* free_surface_ijk,
+                                           int* free_surface_ispec,
+                                           int* free_surface_ijk,
                                            int* num_free_surface_faces,
-                                           int* size_free_surface_ijk, 
                                            int* SIMULATION_TYPE,
                                            int* NOISE_TOMOGRAPHY,
                                            int* NSTEP,
@@ -424,28 +463,31 @@
 void FC_FUNC_(prepare_cleanup_device,
               PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f,
                                       int* SIMULATION_TYPE,
+                                      int* SAVE_FORWARD,
                                       int* ACOUSTIC_SIMULATION,
                                       int* ELASTIC_SIMULATION,
                                       int* ABSORBING_CONDITIONS,
                                       int* NOISE_TOMOGRAPHY,
                                       int* COMPUTE_AND_STORE_STRAIN,
-                                      int* ATTENUATION){}
+                                      int* ATTENUATION,
+                                      int* OCEANS,
+                                      int* APPROXIMATE_HESS_KL){}
 
 /* from file transfer_fields_cuda.cu				      */
 
+void FC_FUNC_(transfer_fields_el_to_device,
+              TRANSFER_FIELDS_EL_TO_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f){}
+
+void FC_FUNC_(transfer_fields_el_from_device,
+              TRANSFER_FIELDS_EL_FROM_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f){}
+
 void FC_FUNC_(transfer_b_fields_to_device,
               TRANSFER_B_FIELDS_TO_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,
                                            long* Mesh_pointer_f){}
 
-void FC_FUNC_(transfer_fields_to_device,
-              TRANSFER_FIELDS_TO_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f){}
-
 void FC_FUNC_(transfer_b_fields_from_device,
               TRANSFER_B_FIELDS_FROM_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,long* Mesh_pointer_f){}
 
-void FC_FUNC_(transfer_fields_from_device,
-              TRANSFER_FIELDS_FROM_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f){}
-
 void FC_FUNC_(transfer_accel_to_device,
               TRNASFER_ACCEL_TO_DEVICE)(int* size, float* accel,long* Mesh_pointer_f){}
 
@@ -463,12 +505,39 @@
 void FC_FUNC_(transfer_displ_from_device,
               TRANSFER_DISPL_FROM_DEVICE)(int* size, float* displ,long* Mesh_pointer_f){}
 
+/*
 void FC_FUNC_(transfer_compute_kernel_answers_from_device,
               TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
                                                            float* rho_kl,int* size_rho,
                                                            float* mu_kl, int* size_mu,
                                                            float* kappa_kl, int* size_kappa){}
+*/
 
+/*
+void FC_FUNC_(transfer_compute_kernel_fields_from_device,
+              TRANSFER_COMPUTE_KERNEL_FIELDS_FROM_DEVICE)(long* Mesh_pointer,
+                                                          float* accel, int* size_accel,
+                                                          float* b_displ, int* size_b_displ,
+                                                          float* epsilondev_xx,
+                                                          float* epsilondev_yy,
+                                                          float* epsilondev_xy,
+                                                          float* epsilondev_xz,
+                                                          float* epsilondev_yz,
+                                                          int* size_epsilondev,
+                                                          float* b_epsilondev_xx,
+                                                          float* b_epsilondev_yy,
+                                                          float* b_epsilondev_xy,
+                                                          float* b_epsilondev_xz,
+                                                          float* b_epsilondev_yz,
+                                                          int* size_b_epsilondev,
+                                                          float* rho_kl,int* size_rho,
+                                                          float* mu_kl, int* size_mu,
+                                                          float* kappa_kl, int* size_kappa,
+                                                          float* epsilon_trace_over_3,
+                                                          float* b_epsilon_trace_over_3,
+                                                          int* size_epsilon_trace_over_3) {}
+*/
+                                                          
 void FC_FUNC_(transfer_b_fields_att_to_device,
               TRANSFER_B_FIELDS_ATT_TO_DEVICE)(long* Mesh_pointer,
                                              float* b_R_xx,float* b_R_yy,float* b_R_xy,float* b_R_xz,float* b_R_yz,
@@ -491,85 +560,94 @@
                                                float* epsilondev_yz,
                                                int* size_epsilondev){}
 
-void FC_FUNC_(transfer_sensitivity_kernels_to_host,
-              TRANSFER_SENSITIVITY_KERNELS_TO_HOST)(long* Mesh_pointer, 
+void FC_FUNC_(transfer_kernels_el_to_host,
+              TRANSFER_KERNELS_EL_TO_HOST)(long* Mesh_pointer, 
                                                     float* h_rho_kl,
                                                     float* h_mu_kl, 
                                                     float* h_kappa_kl,
                                                     int* NSPEC_AB){}
 
-void FC_FUNC_(transfer_sensitivity_kernels_noise_to_host,
-              TRANSFER_SENSITIVITY_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer, 
-                                                          float* h_Sigma_kl,
-                                                          int* NSPEC_AB){}
+void FC_FUNC_(transfer_kernels_noise_to_host,
+              TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer, 
+                                              float* h_Sigma_kl,
+                                              int* NSPEC_AB){}
 
 							 
-void FC_FUNC_(transfer_fields_acoustic_to_device,
-              TRANSFER_FIELDS_ACOUSTIC_TO_DEVICE)(
+void FC_FUNC_(transfer_fields_ac_to_device,
+              TRANSFER_FIELDS_AC_TO_DEVICE)(
                                                   int* size, 
                                                   float* potential_acoustic, 
                                                   float* potential_dot_acoustic, 
                                                   float* potential_dot_dot_acoustic,
                                                   long* Mesh_pointer_f){}
 
-void FC_FUNC_(transfer_b_fields_acoustic_to_device,
-              TRANSFER_B_FIELDS_ACOUSTIC_TO_DEVICE)(
+void FC_FUNC_(transfer_b_fields_ac_to_device,
+              TRANSFER_B_FIELDS_AC_TO_DEVICE)(
                                                     int* size, 
                                                     float* b_potential_acoustic, 
                                                     float* b_potential_dot_acoustic, 
                                                     float* b_potential_dot_dot_acoustic,
                                                     long* Mesh_pointer_f){}
 
-void FC_FUNC_(transfer_fields_acoustic_from_device,TRANSFER_FIELDS_ACOUSTIC_FROM_DEVICE)(
-                                                                                         int* size, 
-                                                                                         float* potential_acoustic, 
-                                                                                         float* potential_dot_acoustic, 
-                                                                                         float* potential_dot_dot_acoustic,
-                                                                                         long* Mesh_pointer_f){}
+void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
+                                                                             int* size, 
+                                                                             float* potential_acoustic, 
+                                                                             float* potential_dot_acoustic, 
+                                                                             float* potential_dot_dot_acoustic,
+                                                                             long* Mesh_pointer_f){}
 
-void FC_FUNC_(transfer_b_fields_acoustic_from_device,
-              TRANSFER_B_FIELDS_ACOUSTIC_FROM_DEVICE)(
+void FC_FUNC_(transfer_b_fields_ac_from_device,
+              TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
                                                       int* size, 
                                                       float* b_potential_acoustic, 
                                                       float* b_potential_dot_acoustic, 
                                                       float* b_potential_dot_dot_acoustic,
                                                       long* Mesh_pointer_f){}
 
-void FC_FUNC_(transfer_potential_dot_dot_from_device,
-              TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* potential_dot_dot_acoustic,long* Mesh_pointer_f){}
+void FC_FUNC_(transfer_dot_dot_from_device,
+              TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, float* potential_dot_dot_acoustic,long* Mesh_pointer_f){}
 
-void FC_FUNC_(transfer_b_potential_dot_dot_from_device,
-              TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* b_potential_dot_dot_acoustic,long* Mesh_pointer_f){}
+void FC_FUNC_(transfer_b_dot_dot_from_device,
+              TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, float* b_potential_dot_dot_acoustic,long* Mesh_pointer_f){}
 
-void FC_FUNC_(transfer_sensitivity_kernels_acoustic_to_host,
-              TRANSFER_SENSITIVITY_KERNELS_ACOUSTIC_TO_HOST)(long* Mesh_pointer, 
+void FC_FUNC_(transfer_kernels_ac_to_host,
+              TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer, 
                                                              float* h_rho_ac_kl,
                                                              float* h_kappa_ac_kl,
                                                              int* NSPEC_AB){}
 
+void FC_FUNC_(transfer_kernels_hess_el_tohost,
+              TRANSFER_KERNELS_HESS_TO_HOST)(long* Mesh_pointer,
+                                             float* h_hess_kl,
+                                             int* NSPEC_AB) {}
+void FC_FUNC_(transfer_kernels_hess_ac_tohost,
+              TRANSFER_KERNELS_HESS_TO_HOST)(long* Mesh_pointer,
+                                             float* h_hess_ac_kl,
+                                             int* NSPEC_AB) {}
+
 /* from file write_seismograms_cuda.cu */
 
-void FC_FUNC_(transfer_station_fields_from_device,
-              TRANSFER_STATION_FIELDS_FROM_DEVICE)(float* displ,float* veloc,float* accel,
+void FC_FUNC_(transfer_station_el_from_device,
+              TRANSFER_STATION_EL_FROM_DEVICE)(float* displ,float* veloc,float* accel,
                                                    float* b_displ, float* b_veloc, float* b_accel,
                                                    long* Mesh_pointer_f,int* number_receiver_global,
                                                    int* ispec_selected_rec,int* ispec_selected_source,
                                                    int* ibool,int* SIMULATION_TYPEf){}
 
-void FC_FUNC_(transfer_station_fields_acoustic_from_device,
-              TRANSFER_STATION_FIELDS_ACOUSTIC_FROM_DEVICE)(
-                                                            float* potential_acoustic,
-                                                            float* potential_dot_acoustic,
-                                                            float* potential_dot_dot_acoustic,
-                                                            float* b_potential_acoustic, 
-                                                            float* b_potential_dot_acoustic, 
-                                                            float* b_potential_dot_dot_acoustic,
-                                                            long* Mesh_pointer_f,
-                                                            int* number_receiver_global,
-                                                            int* ispec_selected_rec,
-                                                            int* ispec_selected_source,
-                                                            int* ibool,
-                                                            int* SIMULATION_TYPEf){}
+void FC_FUNC_(transfer_station_ac_from_device,
+              TRANSFER_STATION_AC_FROM_DEVICE)(
+                                                float* potential_acoustic,
+                                                float* potential_dot_acoustic,
+                                                float* potential_dot_dot_acoustic,
+                                                float* b_potential_acoustic, 
+                                                float* b_potential_dot_acoustic, 
+                                                float* b_potential_dot_dot_acoustic,
+                                                long* Mesh_pointer_f,
+                                                int* number_receiver_global,
+                                                int* ispec_selected_rec,
+                                                int* ispec_selected_source,
+                                                int* ibool,
+                                                int* SIMULATION_TYPEf){}
 
 							   
 							    

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -53,75 +53,75 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(transfer_b_fields_to_device,
-              TRANSFER_B_FIELDS_TO_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,
-                                           long* Mesh_pointer_f) {
+void FC_FUNC_(transfer_fields_el_to_device,
+              TRANSFER_FIELDS_EL_TO_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
 
-TRACE("transfer_b_fields_to_device_");               
+TRACE("transfer_fields_el_to_device_");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  cudaMemcpy(mp->d_b_displ,b_displ,sizeof(float)*(*size),cudaMemcpyHostToDevice);
-  cudaMemcpy(mp->d_b_veloc,b_veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice);
-  cudaMemcpy(mp->d_b_accel,b_accel,sizeof(float)*(*size),cudaMemcpyHostToDevice);
-  
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_displ,displ,sizeof(float)*(*size),cudaMemcpyHostToDevice),40003);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_veloc,veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice),40004);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_accel,accel,sizeof(float)*(*size),cudaMemcpyHostToDevice),40005);
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(transfer_fields_to_device,
-              TRANSFER_FIELDS_TO_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
+void FC_FUNC_(transfer_fields_el_from_device,
+              TRANSFER_FIELDS_EL_FROM_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
 
-TRACE("transfer_fields_to_device_");
-    
+  TRACE("transfer_fields_el_from_device_");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_displ,displ,sizeof(float)*(*size),cudaMemcpyHostToDevice),40003);
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_veloc,veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice),40004);
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_accel,accel,sizeof(float)*(*size),cudaMemcpyHostToDevice),40005);
-  
+  print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40006);
+  print_CUDA_error_if_any(cudaMemcpy(veloc,mp->d_veloc,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40007);
+  print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40008);
+
+  // printf("Transfered Fields From Device\n");
+  // int procid;
+  // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+  // printf("Quick check of answer for p:%d in transfer_fields_el_from_device\n",procid);
+  // for(int i=0;i<5;i++) {
+  // printf("accel[%d]=%2.20e\n",i,accel[i]);
+  // }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
+void FC_FUNC_(transfer_b_fields_to_device,
+              TRANSFER_B_FIELDS_TO_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,
+                                           long* Mesh_pointer_f) {
+
+  TRACE("transfer_b_fields_to_device_");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+  cudaMemcpy(mp->d_b_displ,b_displ,sizeof(float)*(*size),cudaMemcpyHostToDevice);
+  cudaMemcpy(mp->d_b_veloc,b_veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice);
+  cudaMemcpy(mp->d_b_accel,b_accel,sizeof(float)*(*size),cudaMemcpyHostToDevice);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
 void FC_FUNC_(transfer_b_fields_from_device,
               TRANSFER_B_FIELDS_FROM_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,long* Mesh_pointer_f) {
 
 TRACE("transfer_b_fields_from_device_");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   cudaMemcpy(b_displ,mp->d_b_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
   cudaMemcpy(b_veloc,mp->d_b_veloc,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
   cudaMemcpy(b_accel,mp->d_b_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
-  
+
 }
 
-/* ----------------------------------------------------------------------------------------------- */
 
-extern "C"
-void FC_FUNC_(transfer_fields_from_device,
-              TRANSFER_FIELDS_FROM_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
-  
-TRACE("transfer_fields_from_device_");
-  
-  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
-  print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40006);
-  print_CUDA_error_if_any(cudaMemcpy(veloc,mp->d_veloc,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40007);
-  print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40008);
-  
-  // printf("Transfered Fields From Device\n");
-  // int procid;
-  // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
-  // printf("Quick check of answer for p:%d in transfer_fields_from_device\n",procid);
-  // for(int i=0;i<5;i++) {
-  // printf("accel[%d]=%2.20e\n",i,accel[i]);
-  // }
-  
-}
-
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
@@ -129,9 +129,9 @@
               TRNASFER_ACCEL_TO_DEVICE)(int* size, float* accel,long* Mesh_pointer_f) {
 
 TRACE("transfer_accel_to_device");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(mp->d_accel,accel,sizeof(float)*(*size),cudaMemcpyHostToDevice),40016);
 
 }
@@ -143,9 +143,9 @@
               TRANSFER_ACCEL_FROM_DEVICE)(int* size, float* accel,long* Mesh_pointer_f) {
 
 TRACE("transfer_accel_from_device");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40026);
 
 }
@@ -157,9 +157,9 @@
               TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* b_accel,long* Mesh_pointer_f) {
 
 TRACE("transfer_b_accel_from_device");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40036);
 
 }
@@ -171,9 +171,9 @@
               TRANSFER_SIGMA_FROM_DEVICE)(int* size, float* sigma_kl,long* Mesh_pointer_f) {
 
 TRACE("transfer_sigma_from_device");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(sigma_kl,mp->d_Sigma_kl,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40046);
 
 }
@@ -185,9 +185,9 @@
               TRANSFER_B_DISPL_FROM_DEVICE)(int* size, float* displ,long* Mesh_pointer_f) {
 
 TRACE("transfer_b_displ_from_device");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40056);
 
 }
@@ -199,15 +199,15 @@
               TRANSFER_DISPL_FROM_DEVICE)(int* size, float* displ,long* Mesh_pointer_f) {
 
 TRACE("transfer_displ_from_device");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40066);
 
 }
 
 /* ----------------------------------------------------------------------------------------------- */
-
+/*
 extern "C"
 void FC_FUNC_(transfer_compute_kernel_answers_from_device,
               TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
@@ -219,9 +219,10 @@
   Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
   cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(float),cudaMemcpyDeviceToHost);
-  cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);  
-  
+  cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);
+
 }
+*/
 
 /* ----------------------------------------------------------------------------------------------- */
 /*
@@ -265,12 +266,12 @@
   cudaMemcpy(b_epsilondev_yz,mp->d_b_epsilondev_yz,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(float),cudaMemcpyDeviceToHost);
-  cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);  
+  cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(epsilon_trace_over_3,mp->d_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(float),
-	     cudaMemcpyDeviceToHost);
+       cudaMemcpyDeviceToHost);
   cudaMemcpy(b_epsilon_trace_over_3,mp->d_b_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(float),
-	     cudaMemcpyDeviceToHost);
-       
+       cudaMemcpyDeviceToHost);
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("after transfer_compute_kernel_fields_from_device");
 #endif
@@ -294,21 +295,21 @@
                                              int* size_epsilondev) {
   TRACE("transfer_b_fields_att_to_device");
   //get mesh pointer out of fortran integer container
-  Mesh* mp = (Mesh*)(*Mesh_pointer); 
-  
+  Mesh* mp = (Mesh*)(*Mesh_pointer);
+
   cudaMemcpy(mp->d_b_R_xx,b_R_xx,*size_R*sizeof(float),cudaMemcpyHostToDevice);
   cudaMemcpy(mp->d_b_R_yy,b_R_yy,*size_R*sizeof(float),cudaMemcpyHostToDevice);
   cudaMemcpy(mp->d_b_R_xy,b_R_xy,*size_R*sizeof(float),cudaMemcpyHostToDevice);
   cudaMemcpy(mp->d_b_R_xz,b_R_xz,*size_R*sizeof(float),cudaMemcpyHostToDevice);
   cudaMemcpy(mp->d_b_R_yz,b_R_yz,*size_R*sizeof(float),cudaMemcpyHostToDevice);
-  
+
   cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
-  cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);  
+  cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
   cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
   cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
   cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
-  
-  
+
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("after transfer_b_fields_att_to_device");
 #endif
@@ -331,21 +332,21 @@
                                                int* size_epsilondev) {
   TRACE("transfer_fields_att_from_device");
   //get mesh pointer out of fortran integer container
-  Mesh* mp = (Mesh*)(*Mesh_pointer); 
+  Mesh* mp = (Mesh*)(*Mesh_pointer);
 
   cudaMemcpy(R_xx,mp->d_R_xx,*size_R*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(R_yy,mp->d_R_yy,*size_R*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(R_xy,mp->d_R_xy,*size_R*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(R_xz,mp->d_R_xz,*size_R*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(R_yz,mp->d_R_yz,*size_R*sizeof(float),cudaMemcpyDeviceToHost);
-  
+
   cudaMemcpy(epsilondev_xx,mp->d_epsilondev_xx,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(epsilondev_yy,mp->d_epsilondev_yy,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(epsilondev_xy,mp->d_epsilondev_xy,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(epsilondev_xz,mp->d_epsilondev_xz,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
   cudaMemcpy(epsilondev_yz,mp->d_epsilondev_yz,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
-  
-  
+
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("after transfer_fields_att_from_device");
 #endif
@@ -354,24 +355,24 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(transfer_sensitivity_kernels_to_host,
-              TRANSFER_SENSITIVITY_KERNELS_TO_HOST)(long* Mesh_pointer, 
+extern "C"
+void FC_FUNC_(transfer_kernels_el_to_host,
+              TRANSFER_KERNELS_EL_TO_HOST)(long* Mesh_pointer,
                                                     float* h_rho_kl,
-                                                    float* h_mu_kl, 
+                                                    float* h_mu_kl,
                                                     float* h_kappa_kl,
                                                     int* NSPEC_AB) {
-TRACE("transfer_sensitivity_kernels_to_host");
+TRACE("transfer_kernels_el_to_host");
   //get mesh pointer out of fortran integer container
-  Mesh* mp = (Mesh*)(*Mesh_pointer); 
-  
+  Mesh* mp = (Mesh*)(*Mesh_pointer);
+
   print_CUDA_error_if_any(cudaMemcpy(h_rho_kl,mp->d_rho_kl,*NSPEC_AB*125*sizeof(float),
                                      cudaMemcpyDeviceToHost),40101);
   print_CUDA_error_if_any(cudaMemcpy(h_mu_kl,mp->d_mu_kl,*NSPEC_AB*125*sizeof(float),
                                      cudaMemcpyDeviceToHost),40102);
   print_CUDA_error_if_any(cudaMemcpy(h_kappa_kl,mp->d_kappa_kl,*NSPEC_AB*125*sizeof(float),
                                      cudaMemcpyDeviceToHost),40103);
-  
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -380,18 +381,18 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(transfer_sensitivity_kernels_noise_to_host,
-              TRANSFER_SENSITIVITY_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer, 
+extern "C"
+void FC_FUNC_(transfer_kernels_noise_to_host,
+              TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
                                                           float* h_Sigma_kl,
                                                           int* NSPEC_AB) {
-TRACE("transfer_sensitivity_kernels_noise_to_host");
-  
+TRACE("transfer_kernels_noise_to_host");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(h_Sigma_kl,mp->d_Sigma_kl,125*(*NSPEC_AB)*sizeof(float),
-                                     cudaMemcpyHostToDevice),40201);
-  
+                                     cudaMemcpyDeviceToHost),40201);
+
 }
 
 
@@ -402,52 +403,52 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(transfer_fields_acoustic_to_device,
-              TRANSFER_FIELDS_ACOUSTIC_TO_DEVICE)(
-                                                  int* size, 
-                                                  float* potential_acoustic, 
-                                                  float* potential_dot_acoustic, 
+void FC_FUNC_(transfer_fields_ac_to_device,
+              TRANSFER_FIELDS_AC_TO_DEVICE)(
+                                                  int* size,
+                                                  float* potential_acoustic,
+                                                  float* potential_dot_acoustic,
                                                   float* potential_dot_dot_acoustic,
                                                   long* Mesh_pointer_f) {
-TRACE("transfer_fields_acoustic_to_device");
-  
+TRACE("transfer_fields_ac_to_device");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_acoustic,potential_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyHostToDevice),50110);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_dot_acoustic,potential_dot_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyHostToDevice),50120);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyHostToDevice),50130);
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("after transfer_fields_acoustic_to_device");
-#endif  
+  exit_on_cuda_error("after transfer_fields_ac_to_device");
+#endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(transfer_b_fields_acoustic_to_device,
-              TRANSFER_B_FIELDS_ACOUSTIC_TO_DEVICE)(
-                                                    int* size, 
-                                                    float* b_potential_acoustic, 
-                                                    float* b_potential_dot_acoustic, 
+void FC_FUNC_(transfer_b_fields_ac_to_device,
+              TRANSFER_B_FIELDS_AC_TO_DEVICE)(
+                                                    int* size,
+                                                    float* b_potential_acoustic,
+                                                    float* b_potential_dot_acoustic,
                                                     float* b_potential_dot_dot_acoustic,
                                                     long* Mesh_pointer_f) {
-TRACE("transfer_b_fields_acoustic_to_device");
-  
+TRACE("transfer_b_fields_ac_to_device");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_acoustic,b_potential_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyHostToDevice),51110);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyHostToDevice),51120);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyHostToDevice),51130);
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("after transfer_b_fields_acoustic_to_device");
+  exit_on_cuda_error("after transfer_b_fields_ac_to_device");
 #endif
 }
 
@@ -455,104 +456,139 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(transfer_fields_acoustic_from_device,TRANSFER_FIELDS_ACOUSTIC_FROM_DEVICE)(
-                                                                                         int* size, 
-                                                                                         float* potential_acoustic, 
-                                                                                         float* potential_dot_acoustic, 
+void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
+                                                                                         int* size,
+                                                                                         float* potential_acoustic,
+                                                                                         float* potential_dot_acoustic,
                                                                                          float* potential_dot_dot_acoustic,
                                                                                          long* Mesh_pointer_f) {
-TRACE("transfer_fields_acoustic_from_device");
-  
+TRACE("transfer_fields_ac_from_device");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(potential_acoustic,mp->d_potential_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyDeviceToHost),52111);
   print_CUDA_error_if_any(cudaMemcpy(potential_dot_acoustic,mp->d_potential_dot_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyDeviceToHost),52121);
   print_CUDA_error_if_any(cudaMemcpy(potential_dot_dot_acoustic,mp->d_potential_dot_dot_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyDeviceToHost),52131);
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("after transfer_fields_acoustic_from_device");
+  exit_on_cuda_error("after transfer_fields_ac_from_device");
 #endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(transfer_b_fields_acoustic_from_device,
-              TRANSFER_B_FIELDS_ACOUSTIC_FROM_DEVICE)(
-                                                      int* size, 
-                                                      float* b_potential_acoustic, 
-                                                      float* b_potential_dot_acoustic, 
+void FC_FUNC_(transfer_b_fields_ac_from_device,
+              TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
+                                                      int* size,
+                                                      float* b_potential_acoustic,
+                                                      float* b_potential_dot_acoustic,
                                                       float* b_potential_dot_dot_acoustic,
                                                       long* Mesh_pointer_f) {
-TRACE("transfer_b_fields_acoustic_from_device");
-  
+TRACE("transfer_b_fields_ac_from_device");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(b_potential_acoustic,mp->d_b_potential_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyDeviceToHost),53111);
   print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_acoustic,mp->d_b_potential_dot_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyDeviceToHost),53121);
   print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_dot_acoustic,mp->d_b_potential_dot_dot_acoustic,
-                                     sizeof(float)*(*size),cudaMemcpyDeviceToHost),53131);  
-  
+                                     sizeof(float)*(*size),cudaMemcpyDeviceToHost),53131);
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-  exit_on_cuda_error("after transfer_b_fields_acoustic_from_device");
+  exit_on_cuda_error("after transfer_b_fields_ac_from_device");
 #endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(transfer_potential_dot_dot_from_device,
-              TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* potential_dot_dot_acoustic,long* Mesh_pointer_f) {
-  
-  TRACE("transfer_potential_dot_dot_from_device");
-  
+void FC_FUNC_(transfer_dot_dot_from_device,
+              TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, float* potential_dot_dot_acoustic,long* Mesh_pointer_f) {
+
+  TRACE("transfer_dot_dot_from_device");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(potential_dot_dot_acoustic,mp->d_potential_dot_dot_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyDeviceToHost),50041);
-  
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(transfer_b_potential_dot_dot_from_device,
-              TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {
-  
-  TRACE("transfer_b_potential_dot_dot_from_device");
-  
+void FC_FUNC_(transfer_b_dot_dot_from_device,
+              TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, float* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {
+
+  TRACE("transfer_b_dot_dot_from_device");
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_dot_acoustic,mp->d_b_potential_dot_dot_acoustic,
                                      sizeof(float)*(*size),cudaMemcpyDeviceToHost),50042);
-  
+
 }
 
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(transfer_sensitivity_kernels_acoustic_to_host,
-              TRANSFER_SENSITIVITY_KERNELS_ACOUSTIC_TO_HOST)(long* Mesh_pointer, 
+extern "C"
+void FC_FUNC_(transfer_kernels_ac_to_host,
+              TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer,
                                                              float* h_rho_ac_kl,
                                                              float* h_kappa_ac_kl,
                                                              int* NSPEC_AB) {
-  
-  TRACE("transfer_sensitivity_kernels_acoustic_to_host");  
-  
-  //get mesh pointer out of fortran integer container  
-  Mesh* mp = (Mesh*)(*Mesh_pointer); 
+
+  TRACE("transfer_kernels_ac_to_host");
+
+  //get mesh pointer out of fortran integer container
+  Mesh* mp = (Mesh*)(*Mesh_pointer);
   int size = *NSPEC_AB*125;
-  
+
   // copies kernel values over to CPU host
   print_CUDA_error_if_any(cudaMemcpy(h_rho_ac_kl,mp->d_rho_ac_kl,size*sizeof(float),
                                      cudaMemcpyDeviceToHost),54101);
   print_CUDA_error_if_any(cudaMemcpy(h_kappa_ac_kl,mp->d_kappa_ac_kl,size*sizeof(float),
-                                     cudaMemcpyDeviceToHost),54102);  
+                                     cudaMemcpyDeviceToHost),54102);
 }
 
+/* ----------------------------------------------------------------------------------------------- */
+
+// for Hess kernel calculations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_kernels_hess_el_tohost,
+              TRANSFER_KERNELS_HESS_EL_TOHOST)(long* Mesh_pointer,
+                                              float* h_hess_kl,
+                                              int* NSPEC_AB) {
+TRACE("transfer_kernels_hess_el_tohost");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(h_hess_kl,mp->d_hess_el_kl,125*(*NSPEC_AB)*sizeof(float),
+                                     cudaMemcpyDeviceToHost),70201);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_kernels_hess_ac_tohost,
+              TRANSFER_KERNELS_HESS_AC_TOHOST)(long* Mesh_pointer,
+                                             float* h_hess_ac_kl,
+                                             int* NSPEC_AB) {
+  TRACE("transfer_kernels_hess_ac_tohost");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(h_hess_ac_kl,mp->d_hess_ac_kl,125*(*NSPEC_AB)*sizeof(float),
+                                     cudaMemcpyDeviceToHost),70202);
+}
+
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu	2011-10-30 02:25:28 UTC (rev 19129)
@@ -52,7 +52,7 @@
   int blockID = blockIdx.x + blockIdx.y*gridDim.x;
   if(blockID<nrec_local) {
     //int nodeID = threadIdx.x + blockID*blockDim.x;
-    int irec = number_receiver_global[blockID]-1;  
+    int irec = number_receiver_global[blockID]-1;
     int ispec = ispec_selected_rec[irec]-1; // ispec==0 before -1???
     // if(threadIdx.x==1 && blockID < 125) {
     //   // debug_index[threadIdx.x] = threadIdx.x + 125*ispec;
@@ -79,7 +79,12 @@
                                           int* d_ispec_selected,
                                           int* h_ispec_selected,
                                           int* ibool) {
-  
+
+TRACE("transfer_field_from_device");
+
+  // checks if anything to do
+  if( mp->nrec_local == 0 ) return;
+
   int blocksize = 125;
   int num_blocks_x = mp->nrec_local;
   int num_blocks_y = 1;
@@ -89,7 +94,7 @@
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
 
@@ -98,42 +103,42 @@
   //cudaMalloc((void**)&d_debug_index,125*sizeof(int));
   //h_debug_index = (int*)calloc(125,sizeof(int));
   //cudaMemcpy(d_debug_index,h_debug_index,125*sizeof(int),cudaMemcpyHostToDevice);
-  
-  
+
+
   // prepare field transfer array on device
   transfer_stations_fields_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
-								d_ispec_selected,
-								mp->d_ibool,
-								mp->d_station_seismo_field,
-								d_field,
-								mp->nrec_local,d_debug_index);
+                d_ispec_selected,
+                mp->d_ibool,
+                mp->d_station_seismo_field,
+                d_field,
+                mp->nrec_local,d_debug_index);
 
   //cudaMemcpy(h_debug_index,d_debug_index,125*sizeof(int),cudaMemcpyDeviceToHost);
-  
+
   // pause_for_debug(1);
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("transfer_stations_fields_from_device_kernel");
 #endif
-  
+
   cudaMemcpy(mp->h_station_seismo_field,mp->d_station_seismo_field,
-	     (3*125)*(mp->nrec_local)*sizeof(float),cudaMemcpyDeviceToHost);
-    
+       (3*125)*(mp->nrec_local)*sizeof(float),cudaMemcpyDeviceToHost);
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("transfer_stations_fields_from_device_kernel_memcpy");
 #endif
-  
+
   // pause_for_debug(1);
   int irec_local;
-  
+
   for(irec_local=0;irec_local<mp->nrec_local;irec_local++) {
-    int irec = number_receiver_global[irec_local]-1;
-    int ispec = h_ispec_selected[irec]-1;
-       
+    int irec = number_receiver_global[irec_local] - 1;
+    int ispec = h_ispec_selected[irec] - 1;
+
     for(int i=0;i<125;i++) {
-      int iglob = ibool[i+125*ispec]-1;
+      int iglob = ibool[i+125*ispec] - 1;
       h_field[0+3*iglob] = mp->h_station_seismo_field[0+3*i+irec_local*125*3];
       h_field[1+3*iglob] = mp->h_station_seismo_field[1+3*i+irec_local*125*3];
-      h_field[2+3*iglob] = mp->h_station_seismo_field[2+3*i+irec_local*125*3];      
+      h_field[2+3*iglob] = mp->h_station_seismo_field[2+3*i+irec_local*125*3];
     }
 
   }
@@ -142,44 +147,45 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(transfer_station_fields_from_device,
-              TRANSFER_STATION_FIELDS_FROM_DEVICE)(float* displ,float* veloc,float* accel,
+void FC_FUNC_(transfer_station_el_from_device,
+              TRANSFER_STATION_EL_FROM_DEVICE)(float* displ,float* veloc,float* accel,
                                                    float* b_displ, float* b_veloc, float* b_accel,
                                                    long* Mesh_pointer_f,int* number_receiver_global,
                                                    int* ispec_selected_rec,int* ispec_selected_source,
                                                    int* ibool,int* SIMULATION_TYPEf) {
-TRACE("transfer_station_fields_from_device");  
+TRACE("transfer_station_el_from_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-   
+  // checks if anything to do
+  if( mp->nrec_local == 0 ) return;
 
   int SIMULATION_TYPE = *SIMULATION_TYPEf;
-  
+
   if(SIMULATION_TYPE == 1) {
     transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
-			       mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+             mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
     transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
-			       mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+             mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
     transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
-			       mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+             mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
   }
   else if(SIMULATION_TYPE == 2) {
     transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
-			       mp->d_ispec_selected_source, ispec_selected_source, ibool);
+             mp->d_ispec_selected_source, ispec_selected_source, ibool);
     transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
-			       mp->d_ispec_selected_source, ispec_selected_source, ibool);
+             mp->d_ispec_selected_source, ispec_selected_source, ibool);
     transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
-			       mp->d_ispec_selected_source, ispec_selected_source, ibool);
+             mp->d_ispec_selected_source, ispec_selected_source, ibool);
   }
   else if(SIMULATION_TYPE == 3) {
     transfer_field_from_device(mp,mp->d_b_displ,b_displ, number_receiver_global,
-			       mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+             mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
     transfer_field_from_device(mp,mp->d_b_veloc,b_veloc, number_receiver_global,
-			       mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+             mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
     transfer_field_from_device(mp,mp->d_b_accel,b_accel, number_receiver_global,
-			       mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+             mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
   }
-  
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -193,36 +199,36 @@
                                                                      int* ibool,
                                                                      float* station_seismo_potential,
                                                                      float* desired_potential) {
-  
+
   int blockID = blockIdx.x + blockIdx.y*gridDim.x;
   int nodeID = threadIdx.x + blockID*blockDim.x;
-  
-  int irec = number_receiver_global[blockID]-1;  
+
+  int irec = number_receiver_global[blockID]-1;
   int ispec = ispec_selected_rec[irec]-1;
   int iglob = ibool[threadIdx.x + 125*ispec]-1;
-  
+
   //if(threadIdx.x == 0 ) printf("node acoustic: %i %i %i %i %i %e \n",blockID,nodeID,irec,ispec,iglob,desired_potential[iglob]);
-  
+
   station_seismo_potential[nodeID] = desired_potential[iglob];
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-void transfer_field_acoustic_from_device(Mesh* mp, 
+void transfer_field_acoustic_from_device(Mesh* mp,
                                          float* d_potential,
                                          float* h_potential,
                                          int* number_receiver_global,
                                          int* d_ispec_selected,
                                          int* h_ispec_selected,
                                          int* ibool) {
-  
+
 TRACE("transfer_field_acoustic_from_device");
-  
+
   int irec_local,irec,ispec,iglob,j;
-  
+
   // checks if anything to do
   if( mp->nrec_local == 0 ) return;
-  
+
   // sets up kernel dimensions
   int blocksize = 125;
   int num_blocks_x = mp->nrec_local;
@@ -231,107 +237,109 @@
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-  
+
   // prepare field transfer array on device
   transfer_stations_fields_acoustic_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
                                                                          d_ispec_selected,
                                                                          mp->d_ibool,
                                                                          mp->d_station_seismo_potential,
                                                                          d_potential);
-  
-    
+
+
   print_CUDA_error_if_any(cudaMemcpy(mp->h_station_seismo_potential,mp->d_station_seismo_potential,
                                      mp->nrec_local*125*sizeof(float),cudaMemcpyDeviceToHost),500);
-  
-  //printf("copy local receivers: %i \n",mp->nrec_local); 
-  
+
+  //printf("copy local receivers: %i \n",mp->nrec_local);
+
   for(irec_local=0; irec_local < mp->nrec_local; irec_local++) {
     irec = number_receiver_global[irec_local]-1;
     ispec = h_ispec_selected[irec]-1;
-    
+
     // copy element values
     // note: iglob may vary and can be irregularly accessing the h_potential array
     for(j=0; j < 125; j++){
       iglob = ibool[j+125*ispec]-1;
       h_potential[iglob] = mp->h_station_seismo_potential[j+irec_local*125];
     }
-    
+
     // copy each station element's points to working array
     // note: this works if iglob values would be all aligned...
     //memcpy(&(h_potential[iglob]),&(mp->h_station_seismo_potential[irec_local*125]),125*sizeof(float));
-    
+
   }
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("transfer_field_acoustic_from_device");
-#endif  
+#endif
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
-extern "C" 
-void FC_FUNC_(transfer_station_fields_acoustic_from_device,
-              TRANSFER_STATION_FIELDS_ACOUSTIC_FROM_DEVICE)(
-                                                            float* potential_acoustic,
-                                                            float* potential_dot_acoustic,
-                                                            float* potential_dot_dot_acoustic,
-                                                            float* b_potential_acoustic, 
-                                                            float* b_potential_dot_acoustic, 
-                                                            float* b_potential_dot_dot_acoustic,
-                                                            long* Mesh_pointer_f,
-                                                            int* number_receiver_global,
-                                                            int* ispec_selected_rec,
-                                                            int* ispec_selected_source,
-                                                            int* ibool,
-                                                            int* SIMULATION_TYPEf) {
-  
-TRACE("transfer_station_fields_acoustic_from_device");  
+extern "C"
+void FC_FUNC_(transfer_station_ac_from_device,
+              TRANSFER_STATION_AC_FROM_DEVICE)(
+                                                float* potential_acoustic,
+                                                float* potential_dot_acoustic,
+                                                float* potential_dot_dot_acoustic,
+                                                float* b_potential_acoustic,
+                                                float* b_potential_dot_acoustic,
+                                                float* b_potential_dot_dot_acoustic,
+                                                long* Mesh_pointer_f,
+                                                int* number_receiver_global,
+                                                int* ispec_selected_rec,
+                                                int* ispec_selected_source,
+                                                int* ibool,
+                                                int* SIMULATION_TYPEf) {
+
+TRACE("transfer_station_ac_from_device");
   //double start_time = get_time();
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-  
+  // checks if anything to do
+  if( mp->nrec_local == 0 ) return;
+
   int SIMULATION_TYPE = *SIMULATION_TYPEf;
-  
+
   if(SIMULATION_TYPE == 1) {
     transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
                                         number_receiver_global,
                                         mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
-    transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic, 
+    transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
                                         number_receiver_global,
                                         mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
-    transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic, 
+    transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
                                         number_receiver_global,
                                         mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
   }
   else if(SIMULATION_TYPE == 2) {
-    transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic, 
+    transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
                                         number_receiver_global,
                                         mp->d_ispec_selected_source, ispec_selected_source, ibool);
-    transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic, 
+    transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
                                         number_receiver_global,
                                         mp->d_ispec_selected_source, ispec_selected_source, ibool);
-    transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic, 
+    transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
                                         number_receiver_global,
                                         mp->d_ispec_selected_source, ispec_selected_source, ibool);
   }
   else if(SIMULATION_TYPE == 3) {
-    transfer_field_acoustic_from_device(mp,mp->d_b_potential_acoustic,b_potential_acoustic, 
+    transfer_field_acoustic_from_device(mp,mp->d_b_potential_acoustic,b_potential_acoustic,
                                         number_receiver_global,
                                         mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
-    transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic, 
+    transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
                                         number_receiver_global,
                                         mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
-    transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic, 
+    transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
                                         number_receiver_global,
                                         mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
   }
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
-  exit_on_cuda_error("transfer_station_fields_acoustic_from_device");  
+  exit_on_cuda_error("transfer_station_ac_from_device");
 #endif
 }
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/decompose_mesh_SCOTCH.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/decompose_mesh_SCOTCH.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/decompose_mesh_SCOTCH.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -107,6 +107,8 @@
   integer :: aniso_flag,idomain_id
   double precision :: vp,vs,rho,qmu
 
+  integer, parameter :: IIN_database = 15
+
   contains
 
   !----------------------------------------------------------------------------------------------
@@ -783,8 +785,8 @@
 
        ! opens output file
        write(prname, "(i6.6,'_Database')") ipart
-       open(unit=15,file=outputpath_name(1:len_trim(outputpath_name))//'/proc'//prname,&
-            status='unknown', action='write', form='formatted', iostat = ier)
+       open(unit=IIN_database,file=outputpath_name(1:len_trim(outputpath_name))//'/proc'//prname,&
+            status='unknown', action='write', form='unformatted', iostat = ier)
        if( ier /= 0 ) then
         print*,'error file open:',outputpath_name(1:len_trim(outputpath_name))//'/proc'//prname
         print*
@@ -793,34 +795,36 @@
        endif
 
        ! gets number of nodes
-       call write_glob2loc_nodes_database(15, ipart, nnodes_loc, nodes_coords, &
+       call write_glob2loc_nodes_database(IIN_database, ipart, nnodes_loc, nodes_coords, &
                                   glob2loc_nodes_nparts, glob2loc_nodes_parts, &
                                   glob2loc_nodes, nnodes, 1)
 
        ! gets number of spectral elements
-       call write_partition_database(15, ipart, nspec_loc, nspec, elmnts, &
+       call write_partition_database(IIN_database, ipart, nspec_loc, nspec, elmnts, &
                                   glob2loc_elmnts, glob2loc_nodes_nparts, &
                                   glob2loc_nodes_parts, glob2loc_nodes, part, mat, ngnod, 1)
 
        ! writes out node coordinate locations
-       write(15,*) nnodes_loc
+       !write(IIN_database,*) nnodes_loc
+       write(IIN_database) nnodes_loc
 
-       call write_glob2loc_nodes_database(15, ipart, nnodes_loc, nodes_coords,&
+       call write_glob2loc_nodes_database(IIN_database, ipart, nnodes_loc, nodes_coords,&
                                   glob2loc_nodes_nparts, glob2loc_nodes_parts, &
                                   glob2loc_nodes, nnodes, 2)
 
-       call write_material_props_database(15,count_def_mat,count_undef_mat, &
+       call write_material_props_database(IIN_database,count_def_mat,count_undef_mat, &
                                   mat_prop, undef_mat_prop)
 
        ! writes out spectral element indices
-       write(15,*) nspec_loc
+       !write(IIN_database,*) nspec_loc
+       write(IIN_database) nspec_loc
 
-       call write_partition_database(15, ipart, nspec_loc, nspec, elmnts, &
+       call write_partition_database(IIN_database, ipart, nspec_loc, nspec, elmnts, &
                                   glob2loc_elmnts, glob2loc_nodes_nparts, &
                                   glob2loc_nodes_parts, glob2loc_nodes, part, mat, ngnod, 2)
 
        ! writes out absorbing/free-surface boundaries
-       call write_boundaries_database(15, ipart, nspec, nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, &
+       call write_boundaries_database(IIN_database, ipart, nspec, nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, &
                                   nspec2D_ymax, nspec2D_bottom, nspec2D_top, &
                                   ibelm_xmin, ibelm_xmax, ibelm_ymin, &
                                   ibelm_ymax, ibelm_bottom, ibelm_top, &
@@ -830,26 +834,33 @@
                                   glob2loc_nodes_parts, glob2loc_nodes, part)
 
        ! gets number of MPI interfaces
-       call Write_interfaces_database(15, tab_interfaces, tab_size_interfaces, ipart, ninterfaces, &
+       call Write_interfaces_database(IIN_database, tab_interfaces, tab_size_interfaces, ipart, ninterfaces, &
                                   my_ninterface, my_interfaces, my_nb_interfaces, &
                                   glob2loc_elmnts, glob2loc_nodes_nparts, glob2loc_nodes_parts, &
                                   glob2loc_nodes, 1, nparts)
 
        ! writes out MPI interfaces elements
-       write(15,*) my_ninterface, maxval(my_nb_interfaces)
+       !print*,' my interfaces:',my_ninterface,maxval(my_nb_interfaces)
+       if( my_ninterface == 0 ) then
+        !write(IIN_database,*) my_ninterface, 0       ! avoids problem with maxval for empty array my_nb_interfaces
+        write(IIN_database) my_ninterface, 0       ! avoids problem with maxval for empty array my_nb_interfaces
+       else
+        !write(IIN_database,*) my_ninterface, maxval(my_nb_interfaces)
+        write(IIN_database) my_ninterface, maxval(my_nb_interfaces)
+       endif
 
-       call Write_interfaces_database(15, tab_interfaces, tab_size_interfaces, ipart, ninterfaces, &
+       call Write_interfaces_database(IIN_database, tab_interfaces, tab_size_interfaces, ipart, ninterfaces, &
                                   my_ninterface, my_interfaces, my_nb_interfaces, &
                                   glob2loc_elmnts, glob2loc_nodes_nparts, glob2loc_nodes_parts, &
                                   glob2loc_nodes, 2, nparts)
 
        ! writes out moho surface (optional)
-       call write_moho_surface_database(15, ipart, nspec, &
+       call write_moho_surface_database(IIN_database, ipart, nspec, &
                                   glob2loc_elmnts, glob2loc_nodes_nparts, &
                                   glob2loc_nodes_parts, glob2loc_nodes, part, &
                                   nspec2D_moho,ibelm_moho,nodes_ibelm_moho)
 
-       close(15)
+       close(IIN_database)
 
     end do
     print*, 'partitions: '

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/part_decompose_mesh_SCOTCH.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/part_decompose_mesh_SCOTCH.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/part_decompose_mesh_SCOTCH.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -589,7 +589,10 @@
        do i = 0, nnodes-1
           do j = glob2loc_nodes_nparts(i), glob2loc_nodes_nparts(i+1)-1
              if ( glob2loc_nodes_parts(j) == iproc ) then
-                write(IIN_database,*) glob2loc_nodes(j)+1, nodes_coords(1,i+1), &
+                !write(IIN_database,*) glob2loc_nodes(j)+1, nodes_coords(1,i+1), &
+                !                      nodes_coords(2,i+1), nodes_coords(3,i+1)
+
+                write(IIN_database) glob2loc_nodes(j)+1, nodes_coords(1,i+1), &
                                       nodes_coords(2,i+1), nodes_coords(3,i+1)
              end if
           end do
@@ -611,7 +614,8 @@
     character (len=30), dimension(6,count_undef_mat) :: undef_mat_prop
     integer  :: i
 
-    write(IIN_database,*)  count_def_mat,count_undef_mat
+    !write(IIN_database,*)  count_def_mat,count_undef_mat
+    write(IIN_database)  count_def_mat,count_undef_mat
     do i = 1, count_def_mat
       ! database material definition
       !
@@ -619,13 +623,19 @@
       !
       ! (note that this order of the properties is different than the input in nummaterial_velocity_file)
       !
-       write(IIN_database,*) mat_prop(1,i), mat_prop(2,i), mat_prop(3,i), &
-                            mat_prop(4,i), mat_prop(5,i), mat_prop(6,i)
+      !write(IIN_database,*) mat_prop(1,i), mat_prop(2,i), mat_prop(3,i), &
+      !                     mat_prop(4,i), mat_prop(5,i), mat_prop(6,i)
+      write(IIN_database) mat_prop(1,i), mat_prop(2,i), mat_prop(3,i), &
+                         mat_prop(4,i), mat_prop(5,i), mat_prop(6,i)
     end do
     do i = 1, count_undef_mat
-       write(IIN_database,*) trim(undef_mat_prop(1,i)),' ',trim(undef_mat_prop(2,i)),' ', &
-                            trim(undef_mat_prop(3,i)),' ',trim(undef_mat_prop(4,i)),' ', &
-                            trim(undef_mat_prop(5,i)),' ',trim(undef_mat_prop(6,i))
+       !write(IIN_database,*) trim(undef_mat_prop(1,i)),' ',trim(undef_mat_prop(2,i)),' ', &
+       !                     trim(undef_mat_prop(3,i)),' ',trim(undef_mat_prop(4,i)),' ', &
+       !                     trim(undef_mat_prop(5,i)),' ',trim(undef_mat_prop(6,i))
+
+       write(IIN_database) undef_mat_prop(1,i),undef_mat_prop(2,i), &
+                          undef_mat_prop(3,i),undef_mat_prop(4,i), &
+                          undef_mat_prop(5,i),undef_mat_prop(6,i)
     end do
 
   end subroutine  write_material_props_database
@@ -681,42 +691,53 @@
           loc_nspec2D_xmin = loc_nspec2D_xmin + 1
        end if
     end do
-    write(IIN_database,*) 1, loc_nspec2D_xmin
+    !write(IIN_database,*) 1, loc_nspec2D_xmin
+    write(IIN_database) 1, loc_nspec2D_xmin
+
     loc_nspec2D_xmax = 0
     do i=1,nspec2D_xmax
        if(part(ibelm_xmax(i)) == iproc) then
           loc_nspec2D_xmax = loc_nspec2D_xmax + 1
        end if
     end do
-    write(IIN_database,*) 2, loc_nspec2D_xmax
+    !write(IIN_database,*) 2, loc_nspec2D_xmax
+    write(IIN_database) 2, loc_nspec2D_xmax
+
     loc_nspec2D_ymin = 0
     do i=1,nspec2D_ymin
        if(part(ibelm_ymin(i)) == iproc) then
           loc_nspec2D_ymin = loc_nspec2D_ymin + 1
        end if
     end do
-    write(IIN_database,*) 3, loc_nspec2D_ymin
+    !write(IIN_database,*) 3, loc_nspec2D_ymin
+    write(IIN_database) 3, loc_nspec2D_ymin
+
     loc_nspec2D_ymax = 0
     do i=1,nspec2D_ymax
        if(part(ibelm_ymax(i)) == iproc) then
           loc_nspec2D_ymax = loc_nspec2D_ymax + 1
        end if
     end do
-    write(IIN_database,*) 4, loc_nspec2D_ymax
+    !write(IIN_database,*) 4, loc_nspec2D_ymax
+    write(IIN_database) 4, loc_nspec2D_ymax
+
     loc_nspec2D_bottom = 0
     do i=1,nspec2D_bottom
        if(part(ibelm_bottom(i)) == iproc) then
           loc_nspec2D_bottom = loc_nspec2D_bottom + 1
        end if
     end do
-    write(IIN_database,*) 5, loc_nspec2D_bottom
+    !write(IIN_database,*) 5, loc_nspec2D_bottom
+    write(IIN_database) 5, loc_nspec2D_bottom
+
     loc_nspec2D_top = 0
     do i=1,nspec2D_top
        if(part(ibelm_top(i)) == iproc) then
           loc_nspec2D_top = loc_nspec2D_top + 1
        end if
     end do
-    write(IIN_database,*) 6, loc_nspec2D_top
+    !write(IIN_database,*) 6, loc_nspec2D_top
+    write(IIN_database) 6, loc_nspec2D_top
 
     ! outputs element index and element node indices
     ! note: assumes that element indices in ibelm_* arrays are in the range from 1 to nspec
@@ -749,7 +770,10 @@
                 loc_node4 = glob2loc_nodes(j)+1
              end if
           end do
-          write(IIN_database,*) glob2loc_elmnts(ibelm_xmin(i)-1)+1, &
+          !write(IIN_database,*) glob2loc_elmnts(ibelm_xmin(i)-1)+1, &
+          !                      loc_node1, loc_node2, loc_node3, loc_node4
+
+          write(IIN_database) glob2loc_elmnts(ibelm_xmin(i)-1)+1, &
                                 loc_node1, loc_node2, loc_node3, loc_node4
        end if
     end do
@@ -780,7 +804,10 @@
                 loc_node4 = glob2loc_nodes(j)+1
              end if
           end do
-          write(IIN_database,*) glob2loc_elmnts(ibelm_xmax(i)-1)+1, &
+          !write(IIN_database,*) glob2loc_elmnts(ibelm_xmax(i)-1)+1, &
+          !                      loc_node1, loc_node2, loc_node3, loc_node4
+
+          write(IIN_database) glob2loc_elmnts(ibelm_xmax(i)-1)+1, &
                                 loc_node1, loc_node2, loc_node3, loc_node4
        end if
     end do
@@ -811,7 +838,10 @@
                 loc_node4 = glob2loc_nodes(j)+1
              end if
           end do
-          write(IIN_database,*) glob2loc_elmnts(ibelm_ymin(i)-1)+1, &
+          !write(IIN_database,*) glob2loc_elmnts(ibelm_ymin(i)-1)+1, &
+          !                      loc_node1, loc_node2, loc_node3, loc_node4
+
+          write(IIN_database) glob2loc_elmnts(ibelm_ymin(i)-1)+1, &
                                 loc_node1, loc_node2, loc_node3, loc_node4
        end if
     end do
@@ -842,7 +872,10 @@
                 loc_node4 = glob2loc_nodes(j)+1
              end if
           end do
-          write(IIN_database,*) glob2loc_elmnts(ibelm_ymax(i)-1)+1, &
+          !write(IIN_database,*) glob2loc_elmnts(ibelm_ymax(i)-1)+1, &
+          !                     loc_node1, loc_node2, loc_node3, loc_node4
+
+          write(IIN_database) glob2loc_elmnts(ibelm_ymax(i)-1)+1, &
                                loc_node1, loc_node2, loc_node3, loc_node4
        end if
     end do
@@ -873,7 +906,8 @@
                 loc_node4 = glob2loc_nodes(j)+1
              end if
           end do
-          write(IIN_database,*) glob2loc_elmnts(ibelm_bottom(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
+          !write(IIN_database,*) glob2loc_elmnts(ibelm_bottom(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
+          write(IIN_database) glob2loc_elmnts(ibelm_bottom(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
        end if
     end do
 
@@ -899,7 +933,8 @@
                 loc_node4 = glob2loc_nodes(j)+1
              end if
           end do
-          write(IIN_database,*) glob2loc_elmnts(ibelm_top(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
+          !write(IIN_database,*) glob2loc_elmnts(ibelm_top(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
+          write(IIN_database) glob2loc_elmnts(ibelm_top(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
        end if
 
     end do
@@ -959,7 +994,10 @@
 
              ! format:
              ! # ispec_local # material_index_1 # material_index_2 # corner_id1 # corner_id2 # ... # corner_id8
-             write(IIN_database,*) glob2loc_elmnts(i)+1, num_modele(1,i+1), &
+             !write(IIN_database,*) glob2loc_elmnts(i)+1, num_modele(1,i+1), &
+             !                     num_modele(2,i+1),(loc_nodes(k)+1, k=0,ngnod-1)
+
+             write(IIN_database) glob2loc_elmnts(i)+1, num_modele(1,i+1), &
                                   num_modele(2,i+1),(loc_nodes(k)+1, k=0,ngnod-1)
           end if
        end do
@@ -1033,9 +1071,13 @@
          do j = i+1, nparts-1
             if ( my_interfaces(num_interface) == 1 ) then
                if ( i == iproc ) then
-                  write(IIN_database,*) j, my_nb_interfaces(num_interface)
+                  !write(IIN_database,*) j, my_nb_interfaces(num_interface)
+                  write(IIN_database) j, my_nb_interfaces(num_interface)
+
                else
-                  write(IIN_database,*) i, my_nb_interfaces(num_interface)
+                  !write(IIN_database,*) i, my_nb_interfaces(num_interface)
+                  write(IIN_database) i, my_nb_interfaces(num_interface)
+
                end if
 
                count_faces = 0
@@ -1083,8 +1125,11 @@
                            local_nodes(1) = glob2loc_nodes(l)+1
                         end if
                      end do
-                     write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+                     !write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+                     !                   local_nodes(1), -1, -1, -1
+                     write(IIN_database) local_elmnt, tab_interfaces(k*7+2), &
                                         local_nodes(1), -1, -1, -1
+
                   case (2)
                      ! edge element
                      do l = glob2loc_nodes_nparts(tab_interfaces(k*7+3)), &
@@ -1099,8 +1144,11 @@
                            local_nodes(2) = glob2loc_nodes(l)+1
                         end if
                      end do
-                     write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+                     !write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+                     !                   local_nodes(1), local_nodes(2), -1, -1
+                     write(IIN_database) local_elmnt, tab_interfaces(k*7+2), &
                                         local_nodes(1), local_nodes(2), -1, -1
+
                   case (4)
                      ! face element
                      count_faces = count_faces + 1
@@ -1128,8 +1176,11 @@
                            local_nodes(4) = glob2loc_nodes(l)+1
                         end if
                      end do
-                     write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+                     !write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+                     !     local_nodes(1), local_nodes(2),local_nodes(3), local_nodes(4)
+                     write(IIN_database) local_elmnt, tab_interfaces(k*7+2), &
                           local_nodes(1), local_nodes(2),local_nodes(3), local_nodes(4)
+
                   case default
                      print *, "error in write_interfaces_database!", tab_interfaces(k*7+2), iproc
                   end select
@@ -1188,7 +1239,8 @@
     if( loc_nspec2D_moho == 0 ) return
 
     ! format: #surface_id, #number of elements
-    write(IIN_database,*) 7, loc_nspec2D_moho
+    !write(IIN_database,*) 7, loc_nspec2D_moho
+    write(IIN_database) 7, loc_nspec2D_moho
 
     ! outputs element index and element node indices
     ! note: assumes that element indices in ibelm_* arrays are in the range from 1 to nspec
@@ -1219,7 +1271,9 @@
                 loc_node4 = glob2loc_nodes(j)+1
              end if
           end do
-          write(IIN_database,*) glob2loc_elmnts(ibelm_moho(i)-1)+1, &
+          !write(IIN_database,*) glob2loc_elmnts(ibelm_moho(i)-1)+1, &
+          !                    loc_node1, loc_node2, loc_node3, loc_node4
+          write(IIN_database) glob2loc_elmnts(ibelm_moho(i)-1)+1, &
                               loc_node1, loc_node2, loc_node3, loc_node4
        end if
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/program_decompose_mesh_SCOTCH.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/program_decompose_mesh_SCOTCH.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/program_decompose_mesh_SCOTCH.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -31,6 +31,9 @@
                                   check_valence, &
                                   scotch_partitioning, &
                                   write_mesh_databases
+
+! daniel: ifort
+!  USE IFPORT,only: getarg
   implicit none
   integer :: i
   character(len=256) :: arg(3)

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/calc_jacobian.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/calc_jacobian.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/calc_jacobian.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,364 +1,364 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine calc_jacobian(myrank,xixstore,xiystore,xizstore, &
-                          etaxstore,etaystore,etazstore, &
-                          gammaxstore,gammaystore,gammazstore,jacobianstore, &
-                          xstore,ystore,zstore, &
-                          xelm,yelm,zelm,shape3D,dershape3D,ispec,nspec)
-
-  implicit none
-
-  include "constants.h"
-
-  integer ispec,nspec,myrank
-
-  double precision shape3D(NGNOD,NGLLX,NGLLY,NGLLZ)
-  double precision dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ)
-
-  double precision, dimension(NGNOD) :: xelm,yelm,zelm
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
-    xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, &
-    gammaxstore,gammaystore,gammazstore,jacobianstore
-
-  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-
-  integer i,j,k,ia
-  double precision xxi,xeta,xgamma,yxi,yeta,ygamma,zxi,zeta,zgamma
-  double precision xmesh,ymesh,zmesh
-  double precision xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  double precision jacobian
-
-  do k=1,NGLLZ
-    do j=1,NGLLY
-      do i=1,NGLLX
-
-      xxi = ZERO
-      xeta = ZERO
-      xgamma = ZERO
-      yxi = ZERO
-      yeta = ZERO
-      ygamma = ZERO
-      zxi = ZERO
-      zeta = ZERO
-      zgamma = ZERO
-      xmesh = ZERO
-      ymesh = ZERO
-      zmesh = ZERO
-
-      do ia=1,NGNOD
-        xxi = xxi + dershape3D(1,ia,i,j,k)*xelm(ia)
-        xeta = xeta + dershape3D(2,ia,i,j,k)*xelm(ia)
-        xgamma = xgamma + dershape3D(3,ia,i,j,k)*xelm(ia)
-        yxi = yxi + dershape3D(1,ia,i,j,k)*yelm(ia)
-        yeta = yeta + dershape3D(2,ia,i,j,k)*yelm(ia)
-        ygamma = ygamma + dershape3D(3,ia,i,j,k)*yelm(ia)
-        zxi = zxi + dershape3D(1,ia,i,j,k)*zelm(ia)
-        zeta = zeta + dershape3D(2,ia,i,j,k)*zelm(ia)
-        zgamma = zgamma + dershape3D(3,ia,i,j,k)*zelm(ia)
-        xmesh = xmesh + shape3D(ia,i,j,k)*xelm(ia)
-        ymesh = ymesh + shape3D(ia,i,j,k)*yelm(ia)
-        zmesh = zmesh + shape3D(ia,i,j,k)*zelm(ia)
-      enddo
-
-      jacobian = xxi*(yeta*zgamma-ygamma*zeta) - &
-             xeta*(yxi*zgamma-ygamma*zxi) + &
-             xgamma*(yxi*zeta-yeta*zxi)
-
-! can ignore negative jacobian in mesher if needed when debugging code
-      if(jacobian <= ZERO) call exit_MPI(myrank,'3D Jacobian undefined')
-
-!     invert the relation (Fletcher p. 50 vol. 2)
-      xix = (yeta*zgamma-ygamma*zeta) / jacobian
-      xiy = (xgamma*zeta-xeta*zgamma) / jacobian
-      xiz = (xeta*ygamma-xgamma*yeta) / jacobian
-      etax = (ygamma*zxi-yxi*zgamma) / jacobian
-      etay = (xxi*zgamma-xgamma*zxi) / jacobian
-      etaz = (xgamma*yxi-xxi*ygamma) / jacobian
-      gammax = (yxi*zeta-yeta*zxi) / jacobian
-      gammay = (xeta*zxi-xxi*zeta) / jacobian
-      gammaz = (xxi*yeta-xeta*yxi) / jacobian
-
-!     compute and store the jacobian for the solver
-      jacobian = 1. / (xix*(etay*gammaz-etaz*gammay) &
-                      -xiy*(etax*gammaz-etaz*gammax) &
-                      +xiz*(etax*gammay-etay*gammax))
-
-!     save the derivatives and the jacobian
-
-! distinguish between single and double precision for reals
-      if(CUSTOM_REAL == SIZE_REAL) then
-        xixstore(i,j,k,ispec) = sngl(xix)
-        xiystore(i,j,k,ispec) = sngl(xiy)
-        xizstore(i,j,k,ispec) = sngl(xiz)
-        etaxstore(i,j,k,ispec) = sngl(etax)
-        etaystore(i,j,k,ispec) = sngl(etay)
-        etazstore(i,j,k,ispec) = sngl(etaz)
-        gammaxstore(i,j,k,ispec) = sngl(gammax)
-        gammaystore(i,j,k,ispec) = sngl(gammay)
-        gammazstore(i,j,k,ispec) = sngl(gammaz)
-        jacobianstore(i,j,k,ispec) = sngl(jacobian)
-      else
-        xixstore(i,j,k,ispec) = xix
-        xiystore(i,j,k,ispec) = xiy
-        xizstore(i,j,k,ispec) = xiz
-        etaxstore(i,j,k,ispec) = etax
-        etaystore(i,j,k,ispec) = etay
-        etazstore(i,j,k,ispec) = etaz
-        gammaxstore(i,j,k,ispec) = gammax
-        gammaystore(i,j,k,ispec) = gammay
-        gammazstore(i,j,k,ispec) = gammaz
-        jacobianstore(i,j,k,ispec) = jacobian
-      endif
-
-      xstore(i,j,k,ispec) = xmesh
-      ystore(i,j,k,ispec) = ymesh
-      zstore(i,j,k,ispec) = zmesh
-
-      enddo
-    enddo
-  enddo
-
-  end subroutine calc_jacobian
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! This subroutine recomputes the 3D jacobian for one element
-! based upon all GLL points
-! Hejun Zhu OCT16,2009
-
-! input: myrank,
-!        xstore,ystore,zstore ----- input position
-!        xigll,yigll,zigll ----- gll points position
-!        ispec,nspec       ----- element number
-!        ACTUALLY_STORE_ARRAYS   ------ save array or not
-
-! output: xixstore,xiystore,xizstore,
-!         etaxstore,etaystore,etazstore,
-!         gammaxstore,gammaystore,gammazstore ------ parameters used for calculating jacobian
-!
-!
-!  subroutine recalc_jacobian_gll3D(myrank,xixstore,xiystore,xizstore, &
-!                                  etaxstore,etaystore,etazstore, &
-!                                  gammaxstore,gammaystore,gammazstore,jacobianstore, &
-!                                  xstore,ystore,zstore, &
-!                                  ispec,nspec, &
-!                                  xigll,yigll,zigll, &
-!                                  ACTUALLY_STORE_ARRAYS)
-!
-!  implicit none
-!
-!  include "constants.h"
-!
-!  ! input parameter
-!  integer::myrank,ispec,nspec
-!  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-!  double precision, dimension(NGLLX):: xigll
-!  double precision, dimension(NGLLY):: yigll
-!  double precision, dimension(NGLLZ):: zigll
-!  logical::ACTUALLY_STORE_ARRAYS
-!
-!
-!  ! output results
-!  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
-!                        xixstore,xiystore,xizstore,&
-!                        etaxstore,etaystore,etazstore,&
-!                        gammaxstore,gammaystore,gammazstore,&
-!                        jacobianstore
-!
-!
-!  ! other parameters for this subroutine
-!  integer:: i,j,k,i1,j1,k1
-!  double precision:: xxi,xeta,xgamma,yxi,yeta,ygamma,zxi,zeta,zgamma
-!  double precision:: xi,eta,gamma
-!  double precision,dimension(NGLLX):: hxir,hpxir
-!  double precision,dimension(NGLLY):: hetar,hpetar
-!  double precision,dimension(NGLLZ):: hgammar,hpgammar
-!  double precision:: hlagrange,hlagrange_xi,hlagrange_eta,hlagrange_gamma
-!  double precision:: jacobian
-!  double precision:: xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-!
-!
-!
-!  ! test parameters which can be deleted
-!  double precision:: xmesh,ymesh,zmesh
-!  double precision:: sumshape,sumdershapexi,sumdershapeeta,sumdershapegamma
-!
-!  ! first go over all 125 gll points
-!  do k=1,NGLLZ
-!    do j=1,NGLLY
-!      do i=1,NGLLX
-!
-!            xxi = 0.0
-!            xeta = 0.0
-!            xgamma = 0.0
-!            yxi = 0.0
-!            yeta = 0.0
-!            ygamma = 0.0
-!            zxi = 0.0
-!            zeta = 0.0
-!            zgamma = 0.0
-!
-!            xi = xigll(i)
-!            eta = yigll(j)
-!            gamma = zigll(k)
-!
-!            ! calculate lagrange polynomial and its derivative
-!            call lagrange_any(xi,NGLLX,xigll,hxir,hpxir)
-!            call lagrange_any(eta,NGLLY,yigll,hetar,hpetar)
-!            call lagrange_any(gamma,NGLLZ,zigll,hgammar,hpgammar)
-!
-!            ! test parameters
-!            sumshape = 0.0
-!            sumdershapexi = 0.0
-!            sumdershapeeta = 0.0
-!            sumdershapegamma = 0.0
-!            xmesh = 0.0
-!            ymesh = 0.0
-!            zmesh = 0.0
-!
-!
-!            do k1 = 1,NGLLZ
-!               do j1 = 1,NGLLY
-!                  do i1 = 1,NGLLX
-!                     hlagrange = hxir(i1)*hetar(j1)*hgammar(k1)
-!                     hlagrange_xi = hpxir(i1)*hetar(j1)*hgammar(k1)
-!                     hlagrange_eta = hxir(i1)*hpetar(j1)*hgammar(k1)
-!                     hlagrange_gamma = hxir(i1)*hetar(j1)*hpgammar(k1)
-!
-!
-!                     xxi = xxi + xstore(i1,j1,k1,ispec)*hlagrange_xi
-!                     xeta = xeta + xstore(i1,j1,k1,ispec)*hlagrange_eta
-!                     xgamma = xgamma + xstore(i1,j1,k1,ispec)*hlagrange_gamma
-!
-!                     yxi = yxi + ystore(i1,j1,k1,ispec)*hlagrange_xi
-!                     yeta = yeta + ystore(i1,j1,k1,ispec)*hlagrange_eta
-!                     ygamma = ygamma + ystore(i1,j1,k1,ispec)*hlagrange_gamma
-!
-!                     zxi = zxi + zstore(i1,j1,k1,ispec)*hlagrange_xi
-!                     zeta = zeta + zstore(i1,j1,k1,ispec)*hlagrange_eta
-!                     zgamma = zgamma + zstore(i1,j1,k1,ispec)*hlagrange_gamma
-!
-!                     ! test the lagrange polynomial and its derivate
-!                     xmesh = xmesh + xstore(i1,j1,k1,ispec)*hlagrange
-!                     ymesh = ymesh + ystore(i1,j1,k1,ispec)*hlagrange
-!                     zmesh = zmesh + zstore(i1,j1,k1,ispec)*hlagrange
-!                     sumshape = sumshape + hlagrange
-!                     sumdershapexi = sumdershapexi + hlagrange_xi
-!                     sumdershapeeta = sumdershapeeta + hlagrange_eta
-!                     sumdershapegamma = sumdershapegamma + hlagrange_gamma
-!
-!                  end do
-!               end do
-!            end do
-!
-!            ! Check the lagrange polynomial and its derivative
-!            if (xmesh /=xstore(i,j,k,ispec).or.ymesh/=ystore(i,j,k,ispec).or.zmesh/=zstore(i,j,k,ispec)) then
-!                    call exit_MPI(myrank,'new mesh positions are wrong in recalc_jacobian_gall3D.f90')
-!            end if
-!            if(abs(sumshape-one) >  TINYVAL) then
-!                    call exit_MPI(myrank,'error shape functions in recalc_jacobian_gll3D.f90')
-!            end if
-!            if(abs(sumdershapexi) >  TINYVAL) then
-!                    call exit_MPI(myrank,'error derivative xi shape functions in recalc_jacobian_gll3D.f90')
-!            end if
-!            if(abs(sumdershapeeta) >  TINYVAL) then
-!                    call exit_MPI(myrank,'error derivative eta shape functions in recalc_jacobian_gll3D.f90')
-!            end if
-!            if(abs(sumdershapegamma) >  TINYVAL) then
-!                    call exit_MPI(myrank,'error derivative gamma shape functions in recalc_jacobian_gll3D.f90')
-!            end if
-!
-!
-!            jacobian = xxi*(yeta*zgamma-ygamma*zeta) - &
-!                 xeta*(yxi*zgamma-ygamma*zxi) + &
-!                 xgamma*(yxi*zeta-yeta*zxi)
-!
-!            ! Check the jacobian
-!            if(jacobian <= ZERO) then
-!                   call exit_MPI(myrank,'3D Jacobian undefined in recalc_jacobian_gll3D.f90')
-!            end if
-!
-!            !     invert the relation (Fletcher p. 50 vol. 2)
-!            xix = (yeta*zgamma-ygamma*zeta) / jacobian
-!            xiy = (xgamma*zeta-xeta*zgamma) / jacobian
-!            xiz = (xeta*ygamma-xgamma*yeta) / jacobian
-!            etax = (ygamma*zxi-yxi*zgamma) / jacobian
-!            etay = (xxi*zgamma-xgamma*zxi) / jacobian
-!            etaz = (xgamma*yxi-xxi*ygamma) / jacobian
-!            gammax = (yxi*zeta-yeta*zxi) / jacobian
-!            gammay = (xeta*zxi-xxi*zeta) / jacobian
-!            gammaz = (xxi*yeta-xeta*yxi) / jacobian
-!
-!
-!            !     compute and store the jacobian for the solver
-!            jacobian = 1. / (xix*(etay*gammaz-etaz*gammay) &
-!                            -xiy*(etax*gammaz-etaz*gammax) &
-!                            +xiz*(etax*gammay-etay*gammax))
-!
-!            ! resave the derivatives and the jacobian
-!            ! distinguish between single and double precision for reals
-!            if (ACTUALLY_STORE_ARRAYS) then
-!
-!                if (myrank == 0) then
-!                        print*,'xix before',xixstore(i,j,k,ispec),'after',xix
-!                        print*,'etax before',etaxstore(i,j,k,ispec),'after',etax
-!                        print*,'gammax before',gammaxstore(i,j,k,ispec),'after',gammax
-!                end if
-!
-!                if(CUSTOM_REAL == SIZE_REAL) then
-!                    xixstore(i,j,k,ispec) = sngl(xix)
-!                    xiystore(i,j,k,ispec) = sngl(xiy)
-!                    xizstore(i,j,k,ispec) = sngl(xiz)
-!                    etaxstore(i,j,k,ispec) = sngl(etax)
-!                    etaystore(i,j,k,ispec) = sngl(etay)
-!                    etazstore(i,j,k,ispec) = sngl(etaz)
-!                    gammaxstore(i,j,k,ispec) = sngl(gammax)
-!                    gammaystore(i,j,k,ispec) = sngl(gammay)
-!                    gammazstore(i,j,k,ispec) = sngl(gammaz)
-!                    jacobianstore(i,j,k,ispec) = sngl(jacobian)
-!                else
-!                    xixstore(i,j,k,ispec) = xix
-!                    xiystore(i,j,k,ispec) = xiy
-!                    xizstore(i,j,k,ispec) = xiz
-!                    etaxstore(i,j,k,ispec) = etax
-!                    etaystore(i,j,k,ispec) = etay
-!                    etazstore(i,j,k,ispec) = etaz
-!                    gammaxstore(i,j,k,ispec) = gammax
-!                    gammaystore(i,j,k,ispec) = gammay
-!                    gammazstore(i,j,k,ispec) = gammaz
-!                    jacobianstore(i,j,k,ispec) = jacobian
-!                endif
-!             end if
-!        enddo
-!    enddo
-!  enddo
-!
-!  end subroutine recalc_jacobian_gll3D
-!
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine calc_jacobian(myrank,xixstore,xiystore,xizstore, &
+                          etaxstore,etaystore,etazstore, &
+                          gammaxstore,gammaystore,gammazstore,jacobianstore, &
+                          xstore,ystore,zstore, &
+                          xelm,yelm,zelm,shape3D,dershape3D,ispec,nspec)
+
+  implicit none
+
+  include "constants.h"
+
+  integer ispec,nspec,myrank
+
+  double precision shape3D(NGNOD,NGLLX,NGLLY,NGLLZ)
+  double precision dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ)
+
+  double precision, dimension(NGNOD) :: xelm,yelm,zelm
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
+    xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, &
+    gammaxstore,gammaystore,gammazstore,jacobianstore
+
+  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+
+  integer i,j,k,ia
+  double precision xxi,xeta,xgamma,yxi,yeta,ygamma,zxi,zeta,zgamma
+  double precision xmesh,ymesh,zmesh
+  double precision xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+  double precision jacobian
+
+  do k=1,NGLLZ
+    do j=1,NGLLY
+      do i=1,NGLLX
+
+      xxi = ZERO
+      xeta = ZERO
+      xgamma = ZERO
+      yxi = ZERO
+      yeta = ZERO
+      ygamma = ZERO
+      zxi = ZERO
+      zeta = ZERO
+      zgamma = ZERO
+      xmesh = ZERO
+      ymesh = ZERO
+      zmesh = ZERO
+
+      do ia=1,NGNOD
+        xxi = xxi + dershape3D(1,ia,i,j,k)*xelm(ia)
+        xeta = xeta + dershape3D(2,ia,i,j,k)*xelm(ia)
+        xgamma = xgamma + dershape3D(3,ia,i,j,k)*xelm(ia)
+        yxi = yxi + dershape3D(1,ia,i,j,k)*yelm(ia)
+        yeta = yeta + dershape3D(2,ia,i,j,k)*yelm(ia)
+        ygamma = ygamma + dershape3D(3,ia,i,j,k)*yelm(ia)
+        zxi = zxi + dershape3D(1,ia,i,j,k)*zelm(ia)
+        zeta = zeta + dershape3D(2,ia,i,j,k)*zelm(ia)
+        zgamma = zgamma + dershape3D(3,ia,i,j,k)*zelm(ia)
+        xmesh = xmesh + shape3D(ia,i,j,k)*xelm(ia)
+        ymesh = ymesh + shape3D(ia,i,j,k)*yelm(ia)
+        zmesh = zmesh + shape3D(ia,i,j,k)*zelm(ia)
+      enddo
+
+      jacobian = xxi*(yeta*zgamma-ygamma*zeta) - &
+             xeta*(yxi*zgamma-ygamma*zxi) + &
+             xgamma*(yxi*zeta-yeta*zxi)
+
+! can ignore negative jacobian in mesher if needed when debugging code
+      if(jacobian <= ZERO) call exit_MPI(myrank,'3D Jacobian undefined')
+
+!     invert the relation (Fletcher p. 50 vol. 2)
+      xix = (yeta*zgamma-ygamma*zeta) / jacobian
+      xiy = (xgamma*zeta-xeta*zgamma) / jacobian
+      xiz = (xeta*ygamma-xgamma*yeta) / jacobian
+      etax = (ygamma*zxi-yxi*zgamma) / jacobian
+      etay = (xxi*zgamma-xgamma*zxi) / jacobian
+      etaz = (xgamma*yxi-xxi*ygamma) / jacobian
+      gammax = (yxi*zeta-yeta*zxi) / jacobian
+      gammay = (xeta*zxi-xxi*zeta) / jacobian
+      gammaz = (xxi*yeta-xeta*yxi) / jacobian
+
+!     compute and store the jacobian for the solver
+      jacobian = 1. / (xix*(etay*gammaz-etaz*gammay) &
+                      -xiy*(etax*gammaz-etaz*gammax) &
+                      +xiz*(etax*gammay-etay*gammax))
+
+!     save the derivatives and the jacobian
+
+! distinguish between single and double precision for reals
+      if(CUSTOM_REAL == SIZE_REAL) then
+        xixstore(i,j,k,ispec) = sngl(xix)
+        xiystore(i,j,k,ispec) = sngl(xiy)
+        xizstore(i,j,k,ispec) = sngl(xiz)
+        etaxstore(i,j,k,ispec) = sngl(etax)
+        etaystore(i,j,k,ispec) = sngl(etay)
+        etazstore(i,j,k,ispec) = sngl(etaz)
+        gammaxstore(i,j,k,ispec) = sngl(gammax)
+        gammaystore(i,j,k,ispec) = sngl(gammay)
+        gammazstore(i,j,k,ispec) = sngl(gammaz)
+        jacobianstore(i,j,k,ispec) = sngl(jacobian)
+      else
+        xixstore(i,j,k,ispec) = xix
+        xiystore(i,j,k,ispec) = xiy
+        xizstore(i,j,k,ispec) = xiz
+        etaxstore(i,j,k,ispec) = etax
+        etaystore(i,j,k,ispec) = etay
+        etazstore(i,j,k,ispec) = etaz
+        gammaxstore(i,j,k,ispec) = gammax
+        gammaystore(i,j,k,ispec) = gammay
+        gammazstore(i,j,k,ispec) = gammaz
+        jacobianstore(i,j,k,ispec) = jacobian
+      endif
+
+      xstore(i,j,k,ispec) = xmesh
+      ystore(i,j,k,ispec) = ymesh
+      zstore(i,j,k,ispec) = zmesh
+
+      enddo
+    enddo
+  enddo
+
+  end subroutine calc_jacobian
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! This subroutine recomputes the 3D jacobian for one element
+! based upon all GLL points
+! Hejun Zhu OCT16,2009
+
+! input: myrank,
+!        xstore,ystore,zstore ----- input position
+!        xigll,yigll,zigll ----- gll points position
+!        ispec,nspec       ----- element number
+!        ACTUALLY_STORE_ARRAYS   ------ save array or not
+
+! output: xixstore,xiystore,xizstore,
+!         etaxstore,etaystore,etazstore,
+!         gammaxstore,gammaystore,gammazstore ------ parameters used for calculating jacobian
+!
+!
+!  subroutine recalc_jacobian_gll3D(myrank,xixstore,xiystore,xizstore, &
+!                                  etaxstore,etaystore,etazstore, &
+!                                  gammaxstore,gammaystore,gammazstore,jacobianstore, &
+!                                  xstore,ystore,zstore, &
+!                                  ispec,nspec, &
+!                                  xigll,yigll,zigll, &
+!                                  ACTUALLY_STORE_ARRAYS)
+!
+!  implicit none
+!
+!  include "constants.h"
+!
+!  ! input parameter
+!  integer::myrank,ispec,nspec
+!  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+!  double precision, dimension(NGLLX):: xigll
+!  double precision, dimension(NGLLY):: yigll
+!  double precision, dimension(NGLLZ):: zigll
+!  logical::ACTUALLY_STORE_ARRAYS
+!
+!
+!  ! output results
+!  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
+!                        xixstore,xiystore,xizstore,&
+!                        etaxstore,etaystore,etazstore,&
+!                        gammaxstore,gammaystore,gammazstore,&
+!                        jacobianstore
+!
+!
+!  ! other parameters for this subroutine
+!  integer:: i,j,k,i1,j1,k1
+!  double precision:: xxi,xeta,xgamma,yxi,yeta,ygamma,zxi,zeta,zgamma
+!  double precision:: xi,eta,gamma
+!  double precision,dimension(NGLLX):: hxir,hpxir
+!  double precision,dimension(NGLLY):: hetar,hpetar
+!  double precision,dimension(NGLLZ):: hgammar,hpgammar
+!  double precision:: hlagrange,hlagrange_xi,hlagrange_eta,hlagrange_gamma
+!  double precision:: jacobian
+!  double precision:: xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+!
+!
+!
+!  ! test parameters which can be deleted
+!  double precision:: xmesh,ymesh,zmesh
+!  double precision:: sumshape,sumdershapexi,sumdershapeeta,sumdershapegamma
+!
+!  ! first go over all 125 gll points
+!  do k=1,NGLLZ
+!    do j=1,NGLLY
+!      do i=1,NGLLX
+!
+!            xxi = 0.0
+!            xeta = 0.0
+!            xgamma = 0.0
+!            yxi = 0.0
+!            yeta = 0.0
+!            ygamma = 0.0
+!            zxi = 0.0
+!            zeta = 0.0
+!            zgamma = 0.0
+!
+!            xi = xigll(i)
+!            eta = yigll(j)
+!            gamma = zigll(k)
+!
+!            ! calculate lagrange polynomial and its derivative
+!            call lagrange_any(xi,NGLLX,xigll,hxir,hpxir)
+!            call lagrange_any(eta,NGLLY,yigll,hetar,hpetar)
+!            call lagrange_any(gamma,NGLLZ,zigll,hgammar,hpgammar)
+!
+!            ! test parameters
+!            sumshape = 0.0
+!            sumdershapexi = 0.0
+!            sumdershapeeta = 0.0
+!            sumdershapegamma = 0.0
+!            xmesh = 0.0
+!            ymesh = 0.0
+!            zmesh = 0.0
+!
+!
+!            do k1 = 1,NGLLZ
+!               do j1 = 1,NGLLY
+!                  do i1 = 1,NGLLX
+!                     hlagrange = hxir(i1)*hetar(j1)*hgammar(k1)
+!                     hlagrange_xi = hpxir(i1)*hetar(j1)*hgammar(k1)
+!                     hlagrange_eta = hxir(i1)*hpetar(j1)*hgammar(k1)
+!                     hlagrange_gamma = hxir(i1)*hetar(j1)*hpgammar(k1)
+!
+!
+!                     xxi = xxi + xstore(i1,j1,k1,ispec)*hlagrange_xi
+!                     xeta = xeta + xstore(i1,j1,k1,ispec)*hlagrange_eta
+!                     xgamma = xgamma + xstore(i1,j1,k1,ispec)*hlagrange_gamma
+!
+!                     yxi = yxi + ystore(i1,j1,k1,ispec)*hlagrange_xi
+!                     yeta = yeta + ystore(i1,j1,k1,ispec)*hlagrange_eta
+!                     ygamma = ygamma + ystore(i1,j1,k1,ispec)*hlagrange_gamma
+!
+!                     zxi = zxi + zstore(i1,j1,k1,ispec)*hlagrange_xi
+!                     zeta = zeta + zstore(i1,j1,k1,ispec)*hlagrange_eta
+!                     zgamma = zgamma + zstore(i1,j1,k1,ispec)*hlagrange_gamma
+!
+!                     ! test the lagrange polynomial and its derivate
+!                     xmesh = xmesh + xstore(i1,j1,k1,ispec)*hlagrange
+!                     ymesh = ymesh + ystore(i1,j1,k1,ispec)*hlagrange
+!                     zmesh = zmesh + zstore(i1,j1,k1,ispec)*hlagrange
+!                     sumshape = sumshape + hlagrange
+!                     sumdershapexi = sumdershapexi + hlagrange_xi
+!                     sumdershapeeta = sumdershapeeta + hlagrange_eta
+!                     sumdershapegamma = sumdershapegamma + hlagrange_gamma
+!
+!                  end do
+!               end do
+!            end do
+!
+!            ! Check the lagrange polynomial and its derivative
+!            if (xmesh /=xstore(i,j,k,ispec).or.ymesh/=ystore(i,j,k,ispec).or.zmesh/=zstore(i,j,k,ispec)) then
+!                    call exit_MPI(myrank,'new mesh positions are wrong in recalc_jacobian_gall3D.f90')
+!            end if
+!            if(abs(sumshape-one) >  TINYVAL) then
+!                    call exit_MPI(myrank,'error shape functions in recalc_jacobian_gll3D.f90')
+!            end if
+!            if(abs(sumdershapexi) >  TINYVAL) then
+!                    call exit_MPI(myrank,'error derivative xi shape functions in recalc_jacobian_gll3D.f90')
+!            end if
+!            if(abs(sumdershapeeta) >  TINYVAL) then
+!                    call exit_MPI(myrank,'error derivative eta shape functions in recalc_jacobian_gll3D.f90')
+!            end if
+!            if(abs(sumdershapegamma) >  TINYVAL) then
+!                    call exit_MPI(myrank,'error derivative gamma shape functions in recalc_jacobian_gll3D.f90')
+!            end if
+!
+!
+!            jacobian = xxi*(yeta*zgamma-ygamma*zeta) - &
+!                 xeta*(yxi*zgamma-ygamma*zxi) + &
+!                 xgamma*(yxi*zeta-yeta*zxi)
+!
+!            ! Check the jacobian
+!            if(jacobian <= ZERO) then
+!                   call exit_MPI(myrank,'3D Jacobian undefined in recalc_jacobian_gll3D.f90')
+!            end if
+!
+!            !     invert the relation (Fletcher p. 50 vol. 2)
+!            xix = (yeta*zgamma-ygamma*zeta) / jacobian
+!            xiy = (xgamma*zeta-xeta*zgamma) / jacobian
+!            xiz = (xeta*ygamma-xgamma*yeta) / jacobian
+!            etax = (ygamma*zxi-yxi*zgamma) / jacobian
+!            etay = (xxi*zgamma-xgamma*zxi) / jacobian
+!            etaz = (xgamma*yxi-xxi*ygamma) / jacobian
+!            gammax = (yxi*zeta-yeta*zxi) / jacobian
+!            gammay = (xeta*zxi-xxi*zeta) / jacobian
+!            gammaz = (xxi*yeta-xeta*yxi) / jacobian
+!
+!
+!            !     compute and store the jacobian for the solver
+!            jacobian = 1. / (xix*(etay*gammaz-etaz*gammay) &
+!                            -xiy*(etax*gammaz-etaz*gammax) &
+!                            +xiz*(etax*gammay-etay*gammax))
+!
+!            ! resave the derivatives and the jacobian
+!            ! distinguish between single and double precision for reals
+!            if (ACTUALLY_STORE_ARRAYS) then
+!
+!                if (myrank == 0) then
+!                        print*,'xix before',xixstore(i,j,k,ispec),'after',xix
+!                        print*,'etax before',etaxstore(i,j,k,ispec),'after',etax
+!                        print*,'gammax before',gammaxstore(i,j,k,ispec),'after',gammax
+!                end if
+!
+!                if(CUSTOM_REAL == SIZE_REAL) then
+!                    xixstore(i,j,k,ispec) = sngl(xix)
+!                    xiystore(i,j,k,ispec) = sngl(xiy)
+!                    xizstore(i,j,k,ispec) = sngl(xiz)
+!                    etaxstore(i,j,k,ispec) = sngl(etax)
+!                    etaystore(i,j,k,ispec) = sngl(etay)
+!                    etazstore(i,j,k,ispec) = sngl(etaz)
+!                    gammaxstore(i,j,k,ispec) = sngl(gammax)
+!                    gammaystore(i,j,k,ispec) = sngl(gammay)
+!                    gammazstore(i,j,k,ispec) = sngl(gammaz)
+!                    jacobianstore(i,j,k,ispec) = sngl(jacobian)
+!                else
+!                    xixstore(i,j,k,ispec) = xix
+!                    xiystore(i,j,k,ispec) = xiy
+!                    xizstore(i,j,k,ispec) = xiz
+!                    etaxstore(i,j,k,ispec) = etax
+!                    etaystore(i,j,k,ispec) = etay
+!                    etazstore(i,j,k,ispec) = etaz
+!                    gammaxstore(i,j,k,ispec) = gammax
+!                    gammaystore(i,j,k,ispec) = gammay
+!                    gammazstore(i,j,k,ispec) = gammaz
+!                    jacobianstore(i,j,k,ispec) = jacobian
+!                endif
+!             end if
+!        enddo
+!    enddo
+!  enddo
+!
+!  end subroutine recalc_jacobian_gll3D
+!

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,1091 +1,1094 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-module create_regions_mesh_ext_par
-
-  include 'constants.h'
-
-! global point coordinates
-  real(kind=CUSTOM_REAL), dimension(:), allocatable :: xstore_dummy
-  real(kind=CUSTOM_REAL), dimension(:), allocatable :: ystore_dummy
-  real(kind=CUSTOM_REAL), dimension(:), allocatable :: zstore_dummy
-
-! Gauss-Lobatto-Legendre points and weights of integration
-  double precision, dimension(:), allocatable :: xigll,yigll,zigll,wxgll,wygll,wzgll
-
-! 3D shape functions and their derivatives
-  double precision, dimension(:,:,:,:), allocatable :: shape3D
-  double precision, dimension(:,:,:,:,:), allocatable :: dershape3D
-
-  double precision, dimension(:), allocatable :: xelm,yelm,zelm
-
-! arrays with mesh parameters
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: xixstore,xiystore,xizstore, &
-    etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore,jacobianstore
-
-! for model density, kappa, mu
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rhostore,kappastore,mustore
-
-! mass matrix
-  real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass,rmass_acoustic,&
-                            rmass_solid_poroelastic,rmass_fluid_poroelastic
-
-! ocean load
-  integer :: NGLOB_OCEAN
-  real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_ocean_load
-
-! attenuation
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: qmu_attenuation_store
-
-! 2D shape functions and their derivatives, weights
-  double precision, dimension(:,:,:), allocatable :: shape2D_x,shape2D_y,shape2D_bottom,shape2D_top
-  double precision, dimension(:,:,:,:), allocatable :: dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top
-  double precision, dimension(:,:), allocatable :: wgllwgll_xy,wgllwgll_xz,wgllwgll_yz
-
-! absorbing boundary arrays (for all boundaries) - keeps all infos, allowing for irregular surfaces
-  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: abs_boundary_normal
-  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: abs_boundary_jacobian2Dw
-  integer, dimension(:,:,:), allocatable :: abs_boundary_ijk
-  integer, dimension(:), allocatable :: abs_boundary_ispec
-  integer :: num_abs_boundary_faces
-
-! free surface arrays
-  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: free_surface_normal
-  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: free_surface_jacobian2Dw
-  integer, dimension(:,:,:), allocatable :: free_surface_ijk
-  integer, dimension(:), allocatable :: free_surface_ispec
-  integer :: num_free_surface_faces
-
-! acoustic-elastic coupling surface
-  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: coupling_ac_el_normal
-  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: coupling_ac_el_jacobian2Dw
-  integer, dimension(:,:,:), allocatable :: coupling_ac_el_ijk
-  integer, dimension(:), allocatable :: coupling_ac_el_ispec
-  integer :: num_coupling_ac_el_faces
-
-! for stacey
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rho_vp,rho_vs
-
-! anisotropy
-  integer :: NSPEC_ANISO
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: &
-            c11store,c12store,c13store,c14store,c15store,c16store,&
-            c22store,c23store,c24store,c25store,c26store,c33store,&
-            c34store,c35store,c36store,c44store,c45store,c46store,&
-            c55store,c56store,c66store
-
-! material domain flags
-  logical, dimension(:), allocatable :: ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic
-
-! name of the database file
-  character(len=256) prname
-
-end module create_regions_mesh_ext_par
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! main routine
-
-subroutine create_regions_mesh_ext(ibool, &
-                        xstore,ystore,zstore,nspec,npointot,myrank,LOCAL_PATH, &
-                        nnodes_ext_mesh,nelmnts_ext_mesh, &
-                        nodes_coords_ext_mesh, elmnts_ext_mesh, &
-                        max_static_memory_size, mat_ext_mesh, materials_ext_mesh, &
-                        nmat_ext_mesh, undef_mat_prop, nundefMat_ext_mesh, &
-                        num_interfaces_ext_mesh, max_interface_size_ext_mesh, &
-                        my_neighbours_ext_mesh, my_nelmnts_neighbours_ext_mesh, &
-                        my_interfaces_ext_mesh, &
-                        ibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh, &
-                        nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, &
-                        NSPEC2D_BOTTOM, NSPEC2D_TOP,&
-                        ibelm_xmin, ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top, &
-                        nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax,&
-                        nodes_ibelm_bottom,nodes_ibelm_top, &
-                        SAVE_MESH_FILES,nglob, &
-                        ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
-                        ATTENUATION,USE_OLSEN_ATTENUATION, &
-                        UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
-                        ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
-                        itopo_bathy)
-
-! create the different regions of the mesh
-
-  use create_regions_mesh_ext_par
-  implicit none
-  !include "constants.h"
-
-! number of spectral elements in each block
-  integer :: nspec
-
-! arrays with the mesh
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-
-  integer :: npointot
-
-! proc numbers for MPI
-  integer :: myrank
-  integer :: NPROC
-
-  character(len=256) :: LOCAL_PATH
-
-! data from the external mesh
-  integer :: nnodes_ext_mesh,nelmnts_ext_mesh
-  double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
-  integer, dimension(ESIZE,nelmnts_ext_mesh) :: elmnts_ext_mesh
-
-! static memory size needed by the solver
-  double precision :: max_static_memory_size
-
-  integer, dimension(2,nelmnts_ext_mesh) :: mat_ext_mesh
-
-! material properties
-  integer :: nmat_ext_mesh,nundefMat_ext_mesh
-  double precision, dimension(6,nmat_ext_mesh) :: materials_ext_mesh
-  character (len=30), dimension(6,nundefMat_ext_mesh):: undef_mat_prop
-
-!  double precision, external :: materials_ext_mesh
-
-! MPI communication
-  integer :: num_interfaces_ext_mesh,max_interface_size_ext_mesh
-  integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh
-  integer, dimension(num_interfaces_ext_mesh) :: my_nelmnts_neighbours_ext_mesh
-  integer, dimension(6,max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: my_interfaces_ext_mesh
-  integer, dimension(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
-  integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
-
-! absorbing boundaries
-  integer  :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, NSPEC2D_BOTTOM, NSPEC2D_TOP
-  integer, dimension(nspec2D_xmin)  :: ibelm_xmin
-  integer, dimension(nspec2D_xmax)  :: ibelm_xmax
-  integer, dimension(nspec2D_ymin)  :: ibelm_ymin
-  integer, dimension(nspec2D_ymax)  :: ibelm_ymax
-  integer, dimension(NSPEC2D_BOTTOM)  :: ibelm_bottom
-  integer, dimension(NSPEC2D_TOP)  :: ibelm_top
-  ! node indices of boundary faces
-  integer, dimension(4,nspec2D_xmin)  :: nodes_ibelm_xmin
-  integer, dimension(4,nspec2D_xmax)  :: nodes_ibelm_xmax
-  integer, dimension(4,nspec2D_ymin)  :: nodes_ibelm_ymin
-  integer, dimension(4,nspec2D_ymax)  :: nodes_ibelm_ymax
-  integer, dimension(4,NSPEC2D_BOTTOM)  :: nodes_ibelm_bottom
-  integer, dimension(4,NSPEC2D_TOP)  :: nodes_ibelm_top
-
-  integer :: nglob
-
-  logical :: SAVE_MESH_FILES
-  logical :: ANISOTROPY
-  logical :: OCEANS,TOPOGRAPHY
-  logical :: ATTENUATION,USE_OLSEN_ATTENUATION
-
-! use integer array to store topography values
-  integer :: UTM_PROJECTION_ZONE
-  logical :: SUPPRESS_UTM_PROJECTION
-  integer :: NX_TOPO,NY_TOPO
-  double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
-  integer, dimension(NX_TOPO,NY_TOPO) :: itopo_bathy
-
-! local parameters
-! static memory size needed by the solver
-  double precision :: static_memory_size
-  real(kind=CUSTOM_REAL) :: model_speed_max,min_resolved_period
-
-! for vtk output
-!  character(len=256) prname_file
-!  integer,dimension(:),allocatable :: itest_flag
-!  integer, dimension(:), allocatable :: elem_flag
-
-! For Piero Basini :
-! integer :: doubling_value_found_for_Piero
-!   double precision :: xmesh,ymesh,zmesh
-!   double precision :: rho,vp,vs
-
-!   integer,dimension(nspec) ::  idoubling
-!   integer :: doubling_value_found_for_Piero
-!   integer, parameter :: NUMBER_OF_STATIONS = 6
-!   double precision, parameter :: RADIUS_TO_EXCLUDE = 250.d0
-!   double precision, dimension(NUMBER_OF_STATIONS) :: utm_x_station,utm_y_station
-
-!   logical :: is_around_a_station
-!   integer :: istation
-
-! ! store bedrock values
-!   integer ::  icornerlat,icornerlong
-!   double precision ::  lat,long,elevation_bedrock
-!   double precision ::  lat_corner,long_corner,ratio_xi,ratio_eta
-!real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: ibedrock
-
-! initializes arrays
-  call sync_all()
-  if( myrank == 0) then
-    write(IMAIN,*)
-    write(IMAIN,*) '  ...allocating arrays '
-  endif
-  call crm_ext_allocate_arrays(nspec,LOCAL_PATH,myrank, &
-                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
-                        nspec2D_bottom,nspec2D_top,ANISOTROPY)
-
-
-! fills location and weights for Gauss-Lobatto-Legendre points, shape and derivations,
-! returns jacobianstore,xixstore,...gammazstore
-! and GLL-point locations in xstore,ystore,zstore
-  call sync_all()
-  if( myrank == 0) then
-    write(IMAIN,*) '  ...setting up jacobian '
-  endif
-  call crm_ext_setup_jacobian(myrank, &
-                        xstore,ystore,zstore,nspec, &
-                        nodes_coords_ext_mesh,nnodes_ext_mesh,&
-                        elmnts_ext_mesh,nelmnts_ext_mesh)
-
-! creates ibool index array for projection from local to global points
-  call sync_all()
-  if( myrank == 0) then
-    write(IMAIN,*) '  ...indexing global points'
-  endif
-  call crm_ext_setup_indexing(ibool, &
-                        xstore,ystore,zstore,nspec,nglob,npointot, &
-                        nnodes_ext_mesh,nodes_coords_ext_mesh,myrank)
-
-! sets up MPI interfaces between partitions
-  call sync_all()
-  if( myrank == 0) then
-    write(IMAIN,*) '  ...preparing MPI interfaces '
-  endif
-  call get_MPI(myrank,nglob,nspec,ibool, &
-                        nelmnts_ext_mesh,elmnts_ext_mesh, &
-                        my_nelmnts_neighbours_ext_mesh, my_interfaces_ext_mesh, &
-                        ibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh, &
-                        num_interfaces_ext_mesh,max_interface_size_ext_mesh,&
-                        my_neighbours_ext_mesh,NPROC)
-
-! sets material velocities
-  call sync_all()
-  if( myrank == 0) then
-    write(IMAIN,*) '  ...determining velocity model'
-  endif
-  ! Default model. Using PREM model instead
-  ! call get_model(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
-  ! materials_ext_mesh,nmat_ext_mesh, &
-  ! undef_mat_prop,nundefMat_ext_mesh, &
-  ! ANISOTROPY,LOCAL_PATH)
-
-  call get_model_PREM(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
-                        materials_ext_mesh,nmat_ext_mesh, &
-                        undef_mat_prop,nundefMat_ext_mesh, &
-                        ANISOTROPY,LOCAL_PATH)
-  
-! sets up absorbing/free surface boundaries
-  call sync_all()
-  if( myrank == 0) then
-    write(IMAIN,*) '  ...setting up absorbing boundaries '
-  endif
-  call get_absorbing_boundary(myrank,nspec,nglob,ibool, &
-                            nodes_coords_ext_mesh,nnodes_ext_mesh, &
-                            ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
-                            nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax, &
-                            nodes_ibelm_bottom,nodes_ibelm_top, &
-                            nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
-                            nspec2D_bottom,nspec2D_top)
-
-! sets up acoustic-elastic coupling surfaces
-  call sync_all()
-  if( myrank == 0) then
-    write(IMAIN,*) '  ...detecting acoustic-elastic surfaces '
-  endif
-  call get_coupling_surfaces(myrank, &
-                        nspec,nglob,ibool,NPROC, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
-                        num_interfaces_ext_mesh,max_interface_size_ext_mesh, &
-                        my_neighbours_ext_mesh)
-
-! creates mass matrix
-  call sync_all()
-  if( myrank == 0) then
-    write(IMAIN,*) '  ...creating mass matrix '
-  endif
-  call create_mass_matrices(nglob,nspec,ibool)
-
-! creates ocean load mass matrix
-  call sync_all()
-  if( myrank == 0) then
-    write(IMAIN,*) '  ...creating ocean load mass matrix '
-  endif
-  call create_mass_matrices_ocean_load(nglob,nspec,ibool,OCEANS,TOPOGRAPHY, &
-                        UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
-                        ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
-                        itopo_bathy)
-
-! saves the binary mesh files
-  call sync_all()
-  if( myrank == 0) then
-    write(IMAIN,*) '  ...saving databases'
-  endif
-  !call create_name_database(prname,myrank,LOCAL_PATH)
-  call save_arrays_solver_ext_mesh(nspec,nglob, &
-                        xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore,&
-                        gammaxstore,gammaystore,gammazstore, &
-                        jacobianstore, rho_vp,rho_vs,qmu_attenuation_store, &
-                        rhostore,kappastore,mustore, &
-                        rmass,rmass_acoustic,rmass_solid_poroelastic,rmass_fluid_poroelastic, &
-                        OCEANS,rmass_ocean_load,NGLOB_OCEAN,ibool,xstore_dummy,ystore_dummy,zstore_dummy, &
-                        abs_boundary_normal,abs_boundary_jacobian2Dw, &
-                        abs_boundary_ijk,abs_boundary_ispec,num_abs_boundary_faces, &
-                        free_surface_normal,free_surface_jacobian2Dw, &
-                        free_surface_ijk,free_surface_ispec,num_free_surface_faces, &
-                        coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
-                        coupling_ac_el_ijk,coupling_ac_el_ispec,num_coupling_ac_el_faces, &
-                        num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
-                        max_interface_size_ext_mesh,ibool_interfaces_ext_mesh, &
-                        prname,SAVE_MESH_FILES,ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store, &
-                        c22store,c23store,c24store,c25store,c26store,c33store, &
-                        c34store,c35store,c36store,c44store,c45store,c46store, &
-                        c55store,c56store,c66store, &
-                        ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
-
-! computes the approximate amount of static memory needed to run the solver
-  call memory_eval(nspec,nglob,maxval(nibool_interfaces_ext_mesh),num_interfaces_ext_mesh, &
-                  OCEANS,static_memory_size)
-  call max_all_dp(static_memory_size, max_static_memory_size)
-
-! checks the mesh, stability and resolved period
-  call sync_all()
-  call check_mesh_resolution(myrank,nspec,nglob,ibool,&
-                            xstore_dummy,ystore_dummy,zstore_dummy, &
-                            kappastore,mustore,rho_vp,rho_vs, &
-                            -1.0d0, model_speed_max,min_resolved_period )
-
-! saves binary mesh files for attenuation
-  if( ATTENUATION ) then
-    call get_attenuation_model(myrank,nspec,USE_OLSEN_ATTENUATION, &
-                          mustore,rho_vs,qmu_attenuation_store, &
-                          ispec_is_elastic,min_resolved_period,prname)
-  endif
-
-
-! VTK file output
-!  if( SAVE_MESH_FILES ) then
-!    ! saves material flag assigned for each spectral element into a vtk file
-!    prname_file = prname(1:len_trim(prname))//'material_flag'
-!    allocate(elem_flag(nspec))
-!    elem_flag(:) = mat_ext_mesh(1,:)
-!    call write_VTK_data_elem_i(nspec,nglob, &
-!            xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-!            elem_flag,prname_file)
-!    deallocate(elem_flag)
-!
-!    !plotting abs boundaries
-!    !  allocate(itest_flag(nspec))
-!    !  itest_flag(:) = 0
-!    !  do ispec=1,nspec
-!    !    if( iboun(1,ispec) ) itest_flag(ispec) = 1
-!    !  enddo
-!    !  prname_file = prname(1:len_trim(prname))//'iboundary1_flag'
-!    !  call write_VTK_data_elem_i(nspec,nglob, &
-!    !            xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-!    !            itest_flag,prname_file)
-!    !  deallocate(itest_flag)
-!  endif
-
-! cleanup
-  if( .not. SAVE_MOHO_MESH ) deallocate(xstore_dummy,ystore_dummy,zstore_dummy)
-  deallocate(xixstore,xiystore,xizstore,&
-              etaxstore,etaystore,etazstore,&
-              gammaxstore,gammaystore,gammazstore)
-  deallocate(jacobianstore,qmu_attenuation_store)
-  deallocate(kappastore,mustore,rho_vp,rho_vs)
-
-end subroutine create_regions_mesh_ext
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-subroutine crm_ext_allocate_arrays(nspec,LOCAL_PATH,myrank, &
-                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
-                        nspec2D_bottom,nspec2D_top,ANISOTROPY)
-
-  use create_regions_mesh_ext_par
-  implicit none
-
-  integer :: nspec,myrank
-  integer :: nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
-            nspec2D_bottom,nspec2D_top
-
-  character(len=256) :: LOCAL_PATH
-
-  logical :: ANISOTROPY
-
-! local parameters
-  integer :: ier
-
-! memory test
-!  logical,dimension(:),allocatable :: test_mem
-!
-! tests memory availability (including some small buffer of 10*1024 byte)
-!  allocate( test_mem(int(max_static_memory_size)+10*1024),stat=ier)
-!  if(ier /= 0) then
-!    write(IMAIN,*) 'error: try to increase the available process stack size by'
-!    write(IMAIN,*) '       ulimit -s **** '
-!    call exit_MPI(myrank,'not enough memory to allocate arrays')
-!  endif
-!  test_mem(:) = .true.
-!  deallocate( test_mem, stat=ier)
-!  if(ier /= 0) call exit_MPI(myrank,'error to allocate arrays')
-!  call sync_all()
-
-  allocate( xelm(NGNOD),yelm(NGNOD),zelm(NGNOD),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array xelm etc.'
-
-  allocate( qmu_attenuation_store(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! create the name for the database of the current slide and region
-  call create_name_database(prname,myrank,LOCAL_PATH)
-
-! Gauss-Lobatto-Legendre points of integration
-  allocate(xigll(NGLLX),yigll(NGLLY),zigll(NGLLZ),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array xigll etc.'
-
-! Gauss-Lobatto-Legendre weights of integration
-  allocate(wxgll(NGLLX),wygll(NGLLY),wzgll(NGLLZ),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array wxgll etc.'
-
-! 3D shape functions and their derivatives
-  allocate(shape3D(NGNOD,NGLLX,NGLLY,NGLLZ), &
-          dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array shape3D etc.'
-
-! 2D shape functions and their derivatives
-  allocate(shape2D_x(NGNOD2D,NGLLY,NGLLZ), &
-          shape2D_y(NGNOD2D,NGLLX,NGLLZ), &
-          shape2D_bottom(NGNOD2D,NGLLX,NGLLY), &
-          shape2D_top(NGNOD2D,NGLLX,NGLLY),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array shape2D_x etc.'
-
-  allocate(dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ), &
-          dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ), &
-          dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY), &
-          dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array dershape2D_x etc.'
-
-  allocate(wgllwgll_xy(NGLLX,NGLLY), &
-          wgllwgll_xz(NGLLX,NGLLZ), &
-          wgllwgll_yz(NGLLY,NGLLZ),stat=ier)
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! Stacey
-  allocate(rho_vp(NGLLX,NGLLY,NGLLZ,nspec), &
-          rho_vs(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! array with model density
-  allocate(rhostore(NGLLX,NGLLY,NGLLZ,nspec), &
-          kappastore(NGLLX,NGLLY,NGLLZ,nspec), &
-          mustore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
-          !vpstore(NGLLX,NGLLY,NGLLZ,nspec), &
-          !vsstore(NGLLX,NGLLY,NGLLZ,nspec),
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! arrays with mesh parameters
-  allocate(xixstore(NGLLX,NGLLY,NGLLZ,nspec), &
-          xiystore(NGLLX,NGLLY,NGLLZ,nspec), &
-          xizstore(NGLLX,NGLLY,NGLLZ,nspec), &
-          etaxstore(NGLLX,NGLLY,NGLLZ,nspec), &
-          etaystore(NGLLX,NGLLY,NGLLZ,nspec), &
-          etazstore(NGLLX,NGLLY,NGLLZ,nspec), &
-          gammaxstore(NGLLX,NGLLY,NGLLZ,nspec), &
-          gammaystore(NGLLX,NGLLY,NGLLZ,nspec), &
-          gammazstore(NGLLX,NGLLY,NGLLZ,nspec), &
-          jacobianstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! absorbing boundary
-  ! absorbing faces
-  num_abs_boundary_faces = nspec2D_xmin + nspec2D_xmax + nspec2D_ymin + nspec2D_ymax + nspec2D_bottom
-  ! adds faces of free surface if it also absorbs
-  if( ABSORB_FREE_SURFACE ) num_abs_boundary_faces = num_abs_boundary_faces + nspec2D_top
-
-  ! allocates arrays to store info for each face (assumes NGLLX=NGLLY=NGLLZ)
-  allocate( abs_boundary_ispec(num_abs_boundary_faces), &
-           abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces), &
-           abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces), &
-           abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces),stat=ier)
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-  ! free surface faces
-  num_free_surface_faces = nspec2D_top
-
-  ! allocates arrays to store info for each face (assumes NGLLX=NGLLY=NGLLZ)
-  allocate( free_surface_ispec(num_free_surface_faces), &
-           free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces), &
-           free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces), &
-           free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces),stat=ier)
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! array with anisotropy
-  if( ANISOTROPY ) then
-    NSPEC_ANISO = nspec
-  else
-    NSPEC_ANISO = 1
-  endif
-  allocate(c11store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c12store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c13store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c14store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c15store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c16store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c22store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c23store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c24store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c25store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c26store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c33store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c34store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c35store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c36store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c44store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c45store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c46store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c55store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c56store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
-          c66store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO),stat=ier)
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! material flags
-  allocate( ispec_is_acoustic(nspec), &
-           ispec_is_elastic(nspec), &
-           ispec_is_poroelastic(nspec), stat=ier)
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-end subroutine crm_ext_allocate_arrays
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-subroutine crm_ext_setup_jacobian(myrank, &
-                        xstore,ystore,zstore,nspec, &
-                        nodes_coords_ext_mesh,nnodes_ext_mesh,&
-                        elmnts_ext_mesh,nelmnts_ext_mesh)
-
-  use create_regions_mesh_ext_par
-  implicit none
-
-! number of spectral elements in each block
-  integer :: nspec
-
-  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-
-! data from the external mesh
-  integer :: nnodes_ext_mesh,nelmnts_ext_mesh
-  double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
-  integer, dimension(ESIZE,nelmnts_ext_mesh) :: elmnts_ext_mesh
-
-! proc numbers for MPI
-  integer :: myrank
-
-! local parameters
-  integer :: ispec,ia,i,j,k
-
-! set up coordinates of the Gauss-Lobatto-Legendre points
-  call zwgljd(xigll,wxgll,NGLLX,GAUSSALPHA,GAUSSBETA)
-  call zwgljd(yigll,wygll,NGLLY,GAUSSALPHA,GAUSSBETA)
-  call zwgljd(zigll,wzgll,NGLLZ,GAUSSALPHA,GAUSSBETA)
-
-! if number of points is odd, the middle abscissa is exactly zero
-  if(mod(NGLLX,2) /= 0) xigll((NGLLX-1)/2+1) = ZERO
-  if(mod(NGLLY,2) /= 0) yigll((NGLLY-1)/2+1) = ZERO
-  if(mod(NGLLZ,2) /= 0) zigll((NGLLZ-1)/2+1) = ZERO
-
-! get the 3-D shape functions
-  call get_shape3D(myrank,shape3D,dershape3D,xigll,yigll,zigll)
-
-! get the 2-D shape functions
-  call get_shape2D(myrank,shape2D_x,dershape2D_x,yigll,zigll,NGLLY,NGLLZ)
-  call get_shape2D(myrank,shape2D_y,dershape2D_y,xigll,zigll,NGLLX,NGLLZ)
-  call get_shape2D(myrank,shape2D_bottom,dershape2D_bottom,xigll,yigll,NGLLX,NGLLY)
-  call get_shape2D(myrank,shape2D_top,dershape2D_top,xigll,yigll,NGLLX,NGLLY)
-
-! 2D weights
-  do j=1,NGLLY
-    do i=1,NGLLX
-      wgllwgll_xy(i,j) = wxgll(i)*wygll(j)
-    enddo
-  enddo
-  do k=1,NGLLZ
-    do i=1,NGLLX
-      wgllwgll_xz(i,k) = wxgll(i)*wzgll(k)
-    enddo
-  enddo
-  do k=1,NGLLZ
-    do j=1,NGLLY
-      wgllwgll_yz(j,k) = wygll(j)*wzgll(k)
-    enddo
-  enddo
-
-! point locations
-  xstore(:,:,:,:) = 0.d0
-  ystore(:,:,:,:) = 0.d0
-  zstore(:,:,:,:) = 0.d0
-
-  do ispec = 1, nspec
-    !call get_xyzelm(xelm, yelm, zelm, ispec, elmnts_ext_mesh, nodes_coords_ext_mesh, nspec, nnodes_ext_mesh)
-    do ia = 1,NGNOD
-      xelm(ia) = nodes_coords_ext_mesh(1,elmnts_ext_mesh(ia,ispec))
-      yelm(ia) = nodes_coords_ext_mesh(2,elmnts_ext_mesh(ia,ispec))
-      zelm(ia) = nodes_coords_ext_mesh(3,elmnts_ext_mesh(ia,ispec))
-    enddo
-
-    ! CUBIT should provide a mesh ordering such that the 3D jacobian is defined
-    ! (otherwise mesh would be degenerated)
-    call calc_jacobian(myrank,xixstore,xiystore,xizstore, &
-                      etaxstore,etaystore,etazstore, &
-                      gammaxstore,gammaystore,gammazstore,jacobianstore, &
-                      xstore,ystore,zstore, &
-                      xelm,yelm,zelm,shape3D,dershape3D,ispec,nspec)
-
-  enddo
-
-end subroutine crm_ext_setup_jacobian
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-subroutine crm_ext_setup_indexing(ibool, &
-                            xstore,ystore,zstore,nspec,nglob,npointot, &
-                            nnodes_ext_mesh,nodes_coords_ext_mesh,myrank)
-
-! creates global indexing array ibool
-
-  use create_regions_mesh_ext_par
-  implicit none
-
-! number of spectral elements in each block
-  integer :: nspec,nglob,npointot,myrank
-
-! arrays with the mesh
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-
-! data from the external mesh
-  integer :: nnodes_ext_mesh
-  double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
-
-! local parameters
-! variables for creating array ibool
-  double precision, dimension(:), allocatable :: xp,yp,zp
-  integer, dimension(:), allocatable :: locval
-  logical, dimension(:), allocatable :: ifseg
-
-  integer :: ieoff,ilocnum,ier
-  integer :: i,j,k,ispec,iglobnum
-
-! allocate memory for arrays
-  allocate(locval(npointot), &
-          ifseg(npointot), &
-          xp(npointot), &
-          yp(npointot), &
-          zp(npointot),stat=ier)
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! creates temporary global point arrays
-  locval = 0
-  ifseg = .false.
-  xp = 0.d0
-  yp = 0.d0
-  zp = 0.d0
-
-  do ispec=1,nspec
-    ieoff = NGLLX * NGLLY * NGLLZ * (ispec-1)
-    ilocnum = 0
-    do k=1,NGLLZ
-      do j=1,NGLLY
-        do i=1,NGLLX
-          ilocnum = ilocnum + 1
-          xp(ilocnum+ieoff) = xstore(i,j,k,ispec)
-          yp(ilocnum+ieoff) = ystore(i,j,k,ispec)
-          zp(ilocnum+ieoff) = zstore(i,j,k,ispec)
-        enddo
-      enddo
-    enddo
-  enddo
-
-! gets ibool indexing from local (GLL points) to global points
-  call get_global(nspec,xp,yp,zp,ibool,locval,ifseg,nglob,npointot, &
-       minval(nodes_coords_ext_mesh(1,:)),maxval(nodes_coords_ext_mesh(1,:)))
-
-!- we can create a new indirect addressing to reduce cache misses
-  call get_global_indirect_addressing(nspec,nglob,ibool)
-
-!cleanup
-  deallocate(xp,stat=ier); if(ier /= 0) stop 'error in deallocate'
-  deallocate(yp,stat=ier); if(ier /= 0) stop 'error in deallocate'
-  deallocate(zp,stat=ier); if(ier /= 0) stop 'error in deallocate'
-  deallocate(locval,stat=ier); if(ier /= 0) stop 'error in deallocate'
-  deallocate(ifseg,stat=ier); if(ier /= 0) stop 'error in deallocate'
-
-! unique global point locations
-  allocate(xstore_dummy(nglob), &
-          ystore_dummy(nglob), &
-          zstore_dummy(nglob),stat=ier)
-  if(ier /= 0) stop 'error in allocate'
-  do ispec = 1, nspec
-     do k = 1, NGLLZ
-        do j = 1, NGLLY
-           do i = 1, NGLLX
-              iglobnum = ibool(i,j,k,ispec)
-              xstore_dummy(iglobnum) = xstore(i,j,k,ispec)
-              ystore_dummy(iglobnum) = ystore(i,j,k,ispec)
-              zstore_dummy(iglobnum) = zstore(i,j,k,ispec)
-           enddo
-        enddo
-     enddo
-  enddo
-
-  end subroutine crm_ext_setup_indexing
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-  subroutine create_regions_mesh_save_moho( myrank,nglob,nspec, &
-                        nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
-                        nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
-
-  use create_regions_mesh_ext_par
-  implicit none
-
-  integer :: nspec2D_moho_ext
-  integer, dimension(nspec2D_moho_ext) :: ibelm_moho
-  integer, dimension(4,nspec2D_moho_ext) :: nodes_ibelm_moho
-
-  integer :: myrank,nglob,nspec
-
-  ! data from the external mesh
-  integer :: nnodes_ext_mesh
-  double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
-
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-
-! local parameters
-  ! Moho mesh
-  real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_top
-  real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_bot
-  integer,dimension(:,:,:),allocatable :: ijk_moho_top, ijk_moho_bot
-  integer,dimension(:),allocatable :: ibelm_moho_top, ibelm_moho_bot
-  integer :: NSPEC2D_MOHO
-  logical, dimension(:),allocatable :: is_moho_top, is_moho_bot
-
-  real(kind=CUSTOM_REAL),dimension(NGNOD2D) :: xcoord,ycoord,zcoord
-  real(kind=CUSTOM_REAL) :: jacobian2Dw_face(NGLLX,NGLLY)
-  real(kind=CUSTOM_REAL) :: normal_face(NDIM,NGLLX,NGLLY)
-  real(kind=CUSTOM_REAL),dimension(NDIM):: normal
-  integer :: ijk_face(3,NGLLX,NGLLY)
-
-  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: iglob_normals
-  integer,dimension(:),allocatable:: iglob_is_surface
-
-  integer :: imoho_bot,imoho_top
-  integer :: ispec2D,ispec,icorner,iface,i,j,k,igll,iglob,ier
-  integer :: iglob_midpoint,idirect,counter
-  integer :: imoho_top_all,imoho_bot_all,imoho_all
-
-  ! corners indices of reference cube faces
-  integer,dimension(3,4),parameter :: iface1_corner_ijk = &
-             reshape( (/ 1,1,1, 1,NGLLY,1, 1,NGLLY,NGLLZ, 1,1,NGLLZ /),(/3,4/))   ! xmin
-  integer,dimension(3,4),parameter :: iface2_corner_ijk = &
-             reshape( (/ NGLLX,1,1, NGLLX,NGLLY,1, NGLLX,NGLLY,NGLLZ, NGLLX,1,NGLLZ  /),(/3,4/))   ! xmax
-  integer,dimension(3,4),parameter :: iface3_corner_ijk = &
-             reshape( (/ 1,1,1, 1,1,NGLLZ, NGLLX,1,NGLLZ, NGLLX,1,1  /),(/3,4/))   ! ymin
-  integer,dimension(3,4),parameter :: iface4_corner_ijk = &
-             reshape( (/ 1,NGLLY,1, NGLLX,NGLLY,1, NGLLX,NGLLY,NGLLZ, 1,NGLLY,NGLLZ /),(/3,4/))   ! ymax
-  integer,dimension(3,4),parameter :: iface5_corner_ijk = &
-             reshape( (/ 1,1,1, 1,NGLLY,1, NGLLX,NGLLY,1, NGLLX,1,1 /),(/3,4/))  ! bottom
-  integer,dimension(3,4),parameter :: iface6_corner_ijk = &
-             reshape( (/ 1,1,NGLLZ, NGLLX,1,NGLLZ, NGLLX,NGLLY,NGLLZ, 1,NGLLY,NGLLZ  /),(/3,4/))   ! top
-  integer,dimension(3,4,6),parameter :: iface_all_corner_ijk = &
-             reshape( (/ iface1_corner_ijk,iface2_corner_ijk, &
-                 iface3_corner_ijk,iface4_corner_ijk, &
-                 iface5_corner_ijk,iface6_corner_ijk /),(/3,4,6/))   ! all faces
-  ! midpoint indices for each face (xmin,xmax,ymin,ymax,zmin,zmax)
-  integer,dimension(3,6),parameter :: iface_all_midpointijk = &
-             reshape( (/ 1,2,2, NGLLX,2,2, 2,1,2, 2,NGLLY,2, 2,2,1, 2,2,NGLLZ  /),(/3,6/))   ! top
-
-  ! temporary arrays for passing information
-  allocate(iglob_is_surface(nglob), &
-          iglob_normals(NDIM,nglob),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array iglob_is_surface'
-
-  iglob_is_surface = 0
-  iglob_normals = 0._CUSTOM_REAL
-
-  ! loops over given moho surface elements
-  do ispec2D=1, nspec2D_moho_ext
-
-    ! gets element id
-    ispec = ibelm_moho(ispec2D)
-
-    ! looks for i,j,k indices of GLL points on boundary face
-    ! determines element face by given CUBIT corners
-    ! (note: uses point locations rather than point indices to find the element face,
-    !            because the indices refer no more to the newly indexed ibool array )
-    do icorner=1,NGNOD2D
-      xcoord(icorner) = nodes_coords_ext_mesh(1,nodes_ibelm_moho(icorner,ispec2D))
-      ycoord(icorner) = nodes_coords_ext_mesh(2,nodes_ibelm_moho(icorner,ispec2D))
-      zcoord(icorner) = nodes_coords_ext_mesh(3,nodes_ibelm_moho(icorner,ispec2D))
-    enddo
-
-    ! sets face id of reference element associated with this face
-    call get_element_face_id(ispec,xcoord,ycoord,zcoord, &
-                            ibool,nspec,nglob, &
-                            xstore_dummy,ystore_dummy,zstore_dummy, &
-                            iface)
-
-    ! ijk indices of GLL points for face id
-    call get_element_face_gll_indices(iface,ijk_face,NGLLX,NGLLZ)
-
-    ! weighted jacobian and normal
-    call get_jacobian_boundary_face(myrank,nspec, &
-              xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
-              dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
-              wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
-              ispec,iface,jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
-
-    ! normal convention: points away from element
-    ! switch normal direction if necessary
-    do j=1,NGLLY
-      do i=1,NGLLX
-          call get_element_face_normal(ispec,iface,xcoord,ycoord,zcoord, &
-                                      ibool,nspec,nglob, &
-                                      xstore_dummy,ystore_dummy,zstore_dummy, &
-                                      normal_face(:,i,j) )
-      enddo
-    enddo
-
-    ! stores information on global points on moho surface
-    igll = 0
-    do j=1,NGLLY
-      do i=1,NGLLX
-        iglob = ibool(ijk_face(1,i,j),ijk_face(2,i,j),ijk_face(3,i,j),ispec)
-        ! sets flag
-        iglob_is_surface(iglob) = ispec2D
-        ! sets normals
-        iglob_normals(:,iglob) = normal_face(:,i,j)
-      enddo
-    enddo
-  enddo
-
-  ! stores moho elements
-  NSPEC2D_MOHO = nspec2D_moho_ext
-
-  allocate(ibelm_moho_bot(NSPEC2D_MOHO), &
-          ibelm_moho_top(NSPEC2D_MOHO), &
-          normal_moho_top(NDIM,NGLLSQUARE,NSPEC2D_MOHO), &
-          normal_moho_bot(NDIM,NGLLSQUARE,NSPEC2D_MOHO), &
-          ijk_moho_bot(3,NGLLSQUARE,NSPEC2D_MOHO), &
-          ijk_moho_top(3,NGLLSQUARE,NSPEC2D_MOHO),stat=ier)
-  if( ier /= 0 ) stop 'error allocating ibelm_moho_bot'
-
-  ibelm_moho_bot = 0
-  ibelm_moho_top = 0
-
-  ! element flags
-  allocate(is_moho_top(nspec), &
-          is_moho_bot(nspec),stat=ier)
-  if( ier /= 0 ) stop 'error allocating is_moho_top'
-  is_moho_top = .false.
-  is_moho_bot = .false.
-
-  ! finds spectral elements with moho surface
-  imoho_top = 0
-  imoho_bot = 0
-  do ispec=1,nspec
-
-    ! loops over each face
-    do iface = 1,6
-      ! checks if corners of face on surface
-      counter = 0
-      do icorner = 1,NGNOD2D
-        i = iface_all_corner_ijk(1,icorner,iface)
-        j = iface_all_corner_ijk(2,icorner,iface)
-        k = iface_all_corner_ijk(3,icorner,iface)
-        iglob = ibool(i,j,k,ispec)
-
-        ! checks if point on surface
-        if( iglob_is_surface(iglob) > 0 ) then
-          counter = counter+1
-
-          ! reference corner coordinates
-          xcoord(icorner) = xstore_dummy(iglob)
-          ycoord(icorner) = ystore_dummy(iglob)
-          zcoord(icorner) = zstore_dummy(iglob)
-        endif
-      enddo
-
-      ! stores moho informations
-      if( counter == NGNOD2D ) then
-
-        ! gets face GLL points i,j,k indices from element face
-        call get_element_face_gll_indices(iface,ijk_face,NGLLX,NGLLY)
-
-        ! re-computes face infos
-        ! weighted jacobian and normal
-        call get_jacobian_boundary_face(myrank,nspec, &
-              xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
-              dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
-              wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
-              ispec,iface,jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
-
-        ! normal convention: points away from element
-        ! switch normal direction if necessary
-        do j=1,NGLLZ
-          do i=1,NGLLX
-            call get_element_face_normal(ispec,iface,xcoord,ycoord,zcoord, &
-                                      ibool,nspec,nglob, &
-                                      xstore_dummy,ystore_dummy,zstore_dummy, &
-                                      normal_face(:,i,j) )
-          enddo
-        enddo
-
-        ! takes normal stored temporary on a face midpoint
-        i = iface_all_midpointijk(1,iface)
-        j = iface_all_midpointijk(2,iface)
-        k = iface_all_midpointijk(3,iface)
-        iglob_midpoint = ibool(i,j,k,ispec)
-        normal(:) = iglob_normals(:,iglob_midpoint)
-
-        ! determines whether normal points into element or not (top/bottom distinction)
-        call get_element_face_normal_idirect(ispec,iface,xcoord,ycoord,zcoord, &
-                              ibool,nspec,nglob, &
-                              xstore_dummy,ystore_dummy,zstore_dummy, &
-                              normal,idirect )
-
-        ! takes moho surface element id given by id on midpoint
-        ispec2D = iglob_is_surface(iglob_midpoint)
-
-        ! sets face infos for bottom (normal points away from element)
-        if( idirect == 1 ) then
-
-          ! checks validity
-          if( is_moho_bot( ispec) .eqv. .true. ) then
-            print*,'error: moho surface geometry bottom'
-            print*,'  does not allow for mulitple element faces in kernel computation'
-            call exit_mpi(myrank,'error moho bottom elements')
-          endif
-
-          imoho_bot = imoho_bot + 1
-          is_moho_bot(ispec) = .true.
-          ibelm_moho_bot(ispec2D) = ispec
-
-          ! stores on surface gll points (assuming NGLLX = NGLLY = NGLLZ)
-          igll = 0
-          do j=1,NGLLZ
-            do i=1,NGLLX
-              igll = igll+1
-              ijk_moho_bot(:,igll,ispec2D) = ijk_face(:,i,j)
-              normal_moho_bot(:,igll,ispec2D) = normal_face(:,i,j)
-            enddo
-          enddo
-
-        ! sets face infos for top element
-        else if( idirect == 2 ) then
-
-          ! checks validity
-          if( is_moho_top( ispec) .eqv. .true. ) then
-            print*,'error: moho surface geometry top'
-            print*,'  does not allow for mulitple element faces kernel computation'
-            call exit_mpi(myrank,'error moho top elements')
-          endif
-
-          imoho_top = imoho_top + 1
-          is_moho_top(ispec) = .true.
-          ibelm_moho_top(ispec2D) = ispec
-
-          ! gll points
-          igll = 0
-          do j=1,NGLLZ
-            do i=1,NGLLX
-              igll = igll+1
-              ijk_moho_top(:,igll,ispec) = ijk_face(:,i,j)
-              ! note: top elements have normal pointing into element
-              normal_moho_top(:,igll,ispec) = - normal_face(:,i,j)
-            enddo
-          enddo
-        endif
-
-      endif ! counter
-
-    enddo ! iface
-
-    ! checks validity of top/bottom distinction
-    if( is_moho_top(ispec) .and. is_moho_bot(ispec) ) then
-      print*,'error: moho surface elements confusing'
-      print*,'  element:',ispec,'has top and bottom surface'
-      call exit_mpi(myrank,'error moho surface element')
-    endif
-
-  enddo ! ispec2D
-
-  ! note: surface e.g. could be at the free-surface and have no top elements etc...
-  ! user output
-  call sum_all_i( imoho_top, imoho_top_all )
-  call sum_all_i( imoho_bot, imoho_bot_all )
-  call sum_all_i( NSPEC2D_MOHO, imoho_all )
-  if( myrank == 0 ) then
-    write(IMAIN,*) '********'
-    write(IMAIN,*) 'Moho surface:'
-    write(IMAIN,*) '    total surface elements: ',imoho_all
-    write(IMAIN,*) '    top elements   :',imoho_top_all
-    write(IMAIN,*) '    bottom elements:',imoho_bot_all
-    write(IMAIN,*) '********'
-  endif
-
-  ! saves moho files: total number of elements, corner points, all points
-  open(unit=27,file=prname(1:len_trim(prname))//'ibelm_moho.bin',status='unknown',form='unformatted')
-  write(27) NSPEC2D_MOHO
-  write(27) ibelm_moho_top
-  write(27) ibelm_moho_bot
-  write(27) ijk_moho_top
-  write(27) ijk_moho_bot
-  close(27)
-  open(unit=27,file=prname(1:len_trim(prname))//'normal_moho.bin',status='unknown',form='unformatted')
-  write(27) normal_moho_top
-  write(27) normal_moho_bot
-  close(27)
-  open(unit=27,file=prname(1:len_trim(prname))//'is_moho.bin',status='unknown',form='unformatted')
-  write(27) is_moho_top
-  write(27) is_moho_bot
-  close(27)
-
-  end subroutine create_regions_mesh_save_moho
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+module create_regions_mesh_ext_par
+
+  include 'constants.h'
+
+! global point coordinates
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: xstore_dummy
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: ystore_dummy
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: zstore_dummy
+
+! Gauss-Lobatto-Legendre points and weights of integration
+  double precision, dimension(:), allocatable :: xigll,yigll,zigll,wxgll,wygll,wzgll
+
+! 3D shape functions and their derivatives
+  double precision, dimension(:,:,:,:), allocatable :: shape3D
+  double precision, dimension(:,:,:,:,:), allocatable :: dershape3D
+
+  double precision, dimension(:), allocatable :: xelm,yelm,zelm
+
+! arrays with mesh parameters
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: xixstore,xiystore,xizstore, &
+    etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore,jacobianstore
+
+! for model density, kappa, mu
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rhostore,kappastore,mustore
+
+! mass matrix
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass,rmass_acoustic,&
+                            rmass_solid_poroelastic,rmass_fluid_poroelastic
+
+! ocean load
+  integer :: NGLOB_OCEAN
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_ocean_load
+
+! attenuation
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: qmu_attenuation_store
+
+! 2D shape functions and their derivatives, weights
+  double precision, dimension(:,:,:), allocatable :: shape2D_x,shape2D_y,shape2D_bottom,shape2D_top
+  double precision, dimension(:,:,:,:), allocatable :: dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top
+  double precision, dimension(:,:), allocatable :: wgllwgll_xy,wgllwgll_xz,wgllwgll_yz
+
+! absorbing boundary arrays (for all boundaries) - keeps all infos, allowing for irregular surfaces
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: abs_boundary_normal
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: abs_boundary_jacobian2Dw
+  integer, dimension(:,:,:), allocatable :: abs_boundary_ijk
+  integer, dimension(:), allocatable :: abs_boundary_ispec
+  integer :: num_abs_boundary_faces
+
+! free surface arrays
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: free_surface_normal
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: free_surface_jacobian2Dw
+  integer, dimension(:,:,:), allocatable :: free_surface_ijk
+  integer, dimension(:), allocatable :: free_surface_ispec
+  integer :: num_free_surface_faces
+
+! acoustic-elastic coupling surface
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: coupling_ac_el_normal
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: coupling_ac_el_jacobian2Dw
+  integer, dimension(:,:,:), allocatable :: coupling_ac_el_ijk
+  integer, dimension(:), allocatable :: coupling_ac_el_ispec
+  integer :: num_coupling_ac_el_faces
+
+! for stacey
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rho_vp,rho_vs
+
+! anisotropy
+  integer :: NSPEC_ANISO
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: &
+            c11store,c12store,c13store,c14store,c15store,c16store,&
+            c22store,c23store,c24store,c25store,c26store,c33store,&
+            c34store,c35store,c36store,c44store,c45store,c46store,&
+            c55store,c56store,c66store
+
+! material domain flags
+  logical, dimension(:), allocatable :: ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic
+
+! name of the database file
+  character(len=256) prname
+
+end module create_regions_mesh_ext_par
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! main routine
+
+subroutine create_regions_mesh_ext(ibool, &
+                        xstore,ystore,zstore,nspec,npointot,myrank,LOCAL_PATH, &
+                        nnodes_ext_mesh,nelmnts_ext_mesh, &
+                        nodes_coords_ext_mesh, elmnts_ext_mesh, &
+                        max_static_memory_size, mat_ext_mesh, materials_ext_mesh, &
+                        nmat_ext_mesh, undef_mat_prop, nundefMat_ext_mesh, &
+                        num_interfaces_ext_mesh, max_interface_size_ext_mesh, &
+                        my_neighbours_ext_mesh, my_nelmnts_neighbours_ext_mesh, &
+                        my_interfaces_ext_mesh, &
+                        ibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh, &
+                        nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, &
+                        NSPEC2D_BOTTOM, NSPEC2D_TOP,&
+                        ibelm_xmin, ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top, &
+                        nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax,&
+                        nodes_ibelm_bottom,nodes_ibelm_top, &
+                        SAVE_MESH_FILES,nglob, &
+                        ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
+                        ATTENUATION,USE_OLSEN_ATTENUATION, &
+                        UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
+                        ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
+                        itopo_bathy)
+
+! create the different regions of the mesh
+
+  use create_regions_mesh_ext_par
+  implicit none
+  !include "constants.h"
+
+! number of spectral elements in each block
+  integer :: nspec
+
+! arrays with the mesh
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+
+  integer :: npointot
+
+! proc numbers for MPI
+  integer :: myrank
+  integer :: NPROC
+
+  character(len=256) :: LOCAL_PATH
+
+! data from the external mesh
+  integer :: nnodes_ext_mesh,nelmnts_ext_mesh
+  double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
+  integer, dimension(ESIZE,nelmnts_ext_mesh) :: elmnts_ext_mesh
+
+! static memory size needed by the solver
+  double precision :: max_static_memory_size
+
+  integer, dimension(2,nelmnts_ext_mesh) :: mat_ext_mesh
+
+! material properties
+  integer :: nmat_ext_mesh,nundefMat_ext_mesh
+  double precision, dimension(6,nmat_ext_mesh) :: materials_ext_mesh
+  character (len=30), dimension(6,nundefMat_ext_mesh):: undef_mat_prop
+
+!  double precision, external :: materials_ext_mesh
+
+! MPI communication
+  integer :: num_interfaces_ext_mesh,max_interface_size_ext_mesh
+  integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh
+  integer, dimension(num_interfaces_ext_mesh) :: my_nelmnts_neighbours_ext_mesh
+  integer, dimension(6,max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: my_interfaces_ext_mesh
+  integer, dimension(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+  integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+
+! absorbing boundaries
+  integer  :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, NSPEC2D_BOTTOM, NSPEC2D_TOP
+  integer, dimension(nspec2D_xmin)  :: ibelm_xmin
+  integer, dimension(nspec2D_xmax)  :: ibelm_xmax
+  integer, dimension(nspec2D_ymin)  :: ibelm_ymin
+  integer, dimension(nspec2D_ymax)  :: ibelm_ymax
+  integer, dimension(NSPEC2D_BOTTOM)  :: ibelm_bottom
+  integer, dimension(NSPEC2D_TOP)  :: ibelm_top
+  ! node indices of boundary faces
+  integer, dimension(4,nspec2D_xmin)  :: nodes_ibelm_xmin
+  integer, dimension(4,nspec2D_xmax)  :: nodes_ibelm_xmax
+  integer, dimension(4,nspec2D_ymin)  :: nodes_ibelm_ymin
+  integer, dimension(4,nspec2D_ymax)  :: nodes_ibelm_ymax
+  integer, dimension(4,NSPEC2D_BOTTOM)  :: nodes_ibelm_bottom
+  integer, dimension(4,NSPEC2D_TOP)  :: nodes_ibelm_top
+
+  integer :: nglob
+
+  logical :: SAVE_MESH_FILES
+  logical :: ANISOTROPY
+  logical :: OCEANS,TOPOGRAPHY
+  logical :: ATTENUATION,USE_OLSEN_ATTENUATION
+
+! use integer array to store topography values
+  integer :: UTM_PROJECTION_ZONE
+  logical :: SUPPRESS_UTM_PROJECTION
+  integer :: NX_TOPO,NY_TOPO
+  double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
+  integer, dimension(NX_TOPO,NY_TOPO) :: itopo_bathy
+
+! local parameters
+! static memory size needed by the solver
+  double precision :: static_memory_size
+  real(kind=CUSTOM_REAL) :: model_speed_max,min_resolved_period
+
+! for vtk output
+!  character(len=256) prname_file
+!  integer,dimension(:),allocatable :: itest_flag
+!  integer, dimension(:), allocatable :: elem_flag
+
+! For Piero Basini :
+! integer :: doubling_value_found_for_Piero
+!   double precision :: xmesh,ymesh,zmesh
+!   double precision :: rho,vp,vs
+
+!   integer,dimension(nspec) ::  idoubling
+!   integer :: doubling_value_found_for_Piero
+!   integer, parameter :: NUMBER_OF_STATIONS = 6
+!   double precision, parameter :: RADIUS_TO_EXCLUDE = 250.d0
+!   double precision, dimension(NUMBER_OF_STATIONS) :: utm_x_station,utm_y_station
+
+!   logical :: is_around_a_station
+!   integer :: istation
+
+! ! store bedrock values
+!   integer ::  icornerlat,icornerlong
+!   double precision ::  lat,long,elevation_bedrock
+!   double precision ::  lat_corner,long_corner,ratio_xi,ratio_eta
+!real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: ibedrock
+
+! initializes arrays
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) '  ...allocating arrays '
+  endif
+  call crm_ext_allocate_arrays(nspec,LOCAL_PATH,myrank, &
+                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
+                        nspec2D_bottom,nspec2D_top,ANISOTROPY)
+
+
+! fills location and weights for Gauss-Lobatto-Legendre points, shape and derivations,
+! returns jacobianstore,xixstore,...gammazstore
+! and GLL-point locations in xstore,ystore,zstore
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*) '  ...setting up jacobian '
+  endif
+  call crm_ext_setup_jacobian(myrank, &
+                        xstore,ystore,zstore,nspec, &
+                        nodes_coords_ext_mesh,nnodes_ext_mesh,&
+                        elmnts_ext_mesh,nelmnts_ext_mesh)
+
+! creates ibool index array for projection from local to global points
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*) '  ...indexing global points'
+  endif
+  call crm_ext_setup_indexing(ibool, &
+                        xstore,ystore,zstore,nspec,nglob,npointot, &
+                        nnodes_ext_mesh,nodes_coords_ext_mesh,myrank)
+
+! sets up MPI interfaces between partitions
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*) '  ...preparing MPI interfaces '
+  endif
+  call get_MPI(myrank,nglob,nspec,ibool, &
+                        nelmnts_ext_mesh,elmnts_ext_mesh, &
+                        my_nelmnts_neighbours_ext_mesh, my_interfaces_ext_mesh, &
+                        ibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh, &
+                        num_interfaces_ext_mesh,max_interface_size_ext_mesh,&
+                        my_neighbours_ext_mesh,NPROC)
+
+! sets material velocities
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*) '  ...determining velocity model'
+  endif
+  ! Default model. Using PREM model instead
+  ! call get_model(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
+  ! materials_ext_mesh,nmat_ext_mesh, &
+  ! undef_mat_prop,nundefMat_ext_mesh, &
+  ! ANISOTROPY,LOCAL_PATH)
+
+  call get_model_PREM(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
+                        materials_ext_mesh,nmat_ext_mesh, &
+                        undef_mat_prop,nundefMat_ext_mesh, &
+                        ANISOTROPY,LOCAL_PATH)
+
+! sets up absorbing/free surface boundaries
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*) '  ...setting up absorbing boundaries '
+  endif
+  call get_absorbing_boundary(myrank,nspec,nglob,ibool, &
+                            nodes_coords_ext_mesh,nnodes_ext_mesh, &
+                            ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
+                            nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax, &
+                            nodes_ibelm_bottom,nodes_ibelm_top, &
+                            nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
+                            nspec2D_bottom,nspec2D_top)
+
+! sets up acoustic-elastic coupling surfaces
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*) '  ...detecting acoustic-elastic surfaces '
+  endif
+  call get_coupling_surfaces(myrank, &
+                        nspec,nglob,ibool,NPROC, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+                        num_interfaces_ext_mesh,max_interface_size_ext_mesh, &
+                        my_neighbours_ext_mesh)
+
+! creates mass matrix
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*) '  ...creating mass matrix '
+  endif
+  call create_mass_matrices(nglob,nspec,ibool)
+
+! creates ocean load mass matrix
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*) '  ...creating ocean load mass matrix '
+  endif
+  call create_mass_matrices_ocean_load(nglob,nspec,ibool,OCEANS,TOPOGRAPHY, &
+                        UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
+                        ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
+                        itopo_bathy)
+
+! saves the binary mesh files
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*) '  ...saving databases'
+  endif
+  !call create_name_database(prname,myrank,LOCAL_PATH)
+  call save_arrays_solver_ext_mesh(nspec,nglob, &
+                        xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore,&
+                        gammaxstore,gammaystore,gammazstore, &
+                        jacobianstore, rho_vp,rho_vs,qmu_attenuation_store, &
+                        rhostore,kappastore,mustore, &
+                        rmass,rmass_acoustic,rmass_solid_poroelastic,rmass_fluid_poroelastic, &
+                        OCEANS,rmass_ocean_load,NGLOB_OCEAN, &
+                        ibool, &
+                        xstore_dummy,ystore_dummy,zstore_dummy, &
+                        abs_boundary_normal,abs_boundary_jacobian2Dw, &
+                        abs_boundary_ijk,abs_boundary_ispec,num_abs_boundary_faces, &
+                        free_surface_normal,free_surface_jacobian2Dw, &
+                        free_surface_ijk,free_surface_ispec,num_free_surface_faces, &
+                        coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
+                        coupling_ac_el_ijk,coupling_ac_el_ispec,num_coupling_ac_el_faces, &
+                        num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
+                        max_interface_size_ext_mesh,ibool_interfaces_ext_mesh, &
+                        prname,SAVE_MESH_FILES, &
+                        ANISOTROPY,NSPEC_ANISO, &
+                        c11store,c12store,c13store,c14store,c15store,c16store, &
+                        c22store,c23store,c24store,c25store,c26store,c33store, &
+                        c34store,c35store,c36store,c44store,c45store,c46store, &
+                        c55store,c56store,c66store, &
+                        ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
+
+! computes the approximate amount of static memory needed to run the solver
+  call memory_eval(nspec,nglob,maxval(nibool_interfaces_ext_mesh),num_interfaces_ext_mesh, &
+                  OCEANS,static_memory_size)
+  call max_all_dp(static_memory_size, max_static_memory_size)
+
+! checks the mesh, stability and resolved period
+  call sync_all()
+  call check_mesh_resolution(myrank,nspec,nglob,ibool,&
+                            xstore_dummy,ystore_dummy,zstore_dummy, &
+                            kappastore,mustore,rho_vp,rho_vs, &
+                            -1.0d0, model_speed_max,min_resolved_period )
+
+! saves binary mesh files for attenuation
+  if( ATTENUATION ) then
+    call get_attenuation_model(myrank,nspec,USE_OLSEN_ATTENUATION, &
+                          mustore,rho_vs,qmu_attenuation_store, &
+                          ispec_is_elastic,min_resolved_period,prname)
+  endif
+
+
+! VTK file output
+!  if( SAVE_MESH_FILES ) then
+!    ! saves material flag assigned for each spectral element into a vtk file
+!    prname_file = prname(1:len_trim(prname))//'material_flag'
+!    allocate(elem_flag(nspec))
+!    elem_flag(:) = mat_ext_mesh(1,:)
+!    call write_VTK_data_elem_i(nspec,nglob, &
+!            xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+!            elem_flag,prname_file)
+!    deallocate(elem_flag)
+!
+!    !plotting abs boundaries
+!    !  allocate(itest_flag(nspec))
+!    !  itest_flag(:) = 0
+!    !  do ispec=1,nspec
+!    !    if( iboun(1,ispec) ) itest_flag(ispec) = 1
+!    !  enddo
+!    !  prname_file = prname(1:len_trim(prname))//'iboundary1_flag'
+!    !  call write_VTK_data_elem_i(nspec,nglob, &
+!    !            xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+!    !            itest_flag,prname_file)
+!    !  deallocate(itest_flag)
+!  endif
+
+! cleanup
+  if( .not. SAVE_MOHO_MESH ) deallocate(xstore_dummy,ystore_dummy,zstore_dummy)
+  deallocate(xixstore,xiystore,xizstore,&
+              etaxstore,etaystore,etazstore,&
+              gammaxstore,gammaystore,gammazstore)
+  deallocate(jacobianstore,qmu_attenuation_store)
+  deallocate(kappastore,mustore,rho_vp,rho_vs)
+
+end subroutine create_regions_mesh_ext
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine crm_ext_allocate_arrays(nspec,LOCAL_PATH,myrank, &
+                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
+                        nspec2D_bottom,nspec2D_top,ANISOTROPY)
+
+  use create_regions_mesh_ext_par
+  implicit none
+
+  integer :: nspec,myrank
+  integer :: nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
+            nspec2D_bottom,nspec2D_top
+
+  character(len=256) :: LOCAL_PATH
+
+  logical :: ANISOTROPY
+
+! local parameters
+  integer :: ier
+
+! memory test
+!  logical,dimension(:),allocatable :: test_mem
+!
+! tests memory availability (including some small buffer of 10*1024 byte)
+!  allocate( test_mem(int(max_static_memory_size)+10*1024),stat=ier)
+!  if(ier /= 0) then
+!    write(IMAIN,*) 'error: try to increase the available process stack size by'
+!    write(IMAIN,*) '       ulimit -s **** '
+!    call exit_MPI(myrank,'not enough memory to allocate arrays')
+!  endif
+!  test_mem(:) = .true.
+!  deallocate( test_mem, stat=ier)
+!  if(ier /= 0) call exit_MPI(myrank,'error to allocate arrays')
+!  call sync_all()
+
+  allocate( xelm(NGNOD),yelm(NGNOD),zelm(NGNOD),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array xelm etc.'
+
+  allocate( qmu_attenuation_store(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,LOCAL_PATH)
+
+! Gauss-Lobatto-Legendre points of integration
+  allocate(xigll(NGLLX),yigll(NGLLY),zigll(NGLLZ),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array xigll etc.'
+
+! Gauss-Lobatto-Legendre weights of integration
+  allocate(wxgll(NGLLX),wygll(NGLLY),wzgll(NGLLZ),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array wxgll etc.'
+
+! 3D shape functions and their derivatives
+  allocate(shape3D(NGNOD,NGLLX,NGLLY,NGLLZ), &
+          dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array shape3D etc.'
+
+! 2D shape functions and their derivatives
+  allocate(shape2D_x(NGNOD2D,NGLLY,NGLLZ), &
+          shape2D_y(NGNOD2D,NGLLX,NGLLZ), &
+          shape2D_bottom(NGNOD2D,NGLLX,NGLLY), &
+          shape2D_top(NGNOD2D,NGLLX,NGLLY),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array shape2D_x etc.'
+
+  allocate(dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ), &
+          dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ), &
+          dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY), &
+          dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array dershape2D_x etc.'
+
+  allocate(wgllwgll_xy(NGLLX,NGLLY), &
+          wgllwgll_xz(NGLLX,NGLLZ), &
+          wgllwgll_yz(NGLLY,NGLLZ),stat=ier)
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! Stacey
+  allocate(rho_vp(NGLLX,NGLLY,NGLLZ,nspec), &
+          rho_vs(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! array with model density
+  allocate(rhostore(NGLLX,NGLLY,NGLLZ,nspec), &
+          kappastore(NGLLX,NGLLY,NGLLZ,nspec), &
+          mustore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+          !vpstore(NGLLX,NGLLY,NGLLZ,nspec), &
+          !vsstore(NGLLX,NGLLY,NGLLZ,nspec),
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! arrays with mesh parameters
+  allocate(xixstore(NGLLX,NGLLY,NGLLZ,nspec), &
+          xiystore(NGLLX,NGLLY,NGLLZ,nspec), &
+          xizstore(NGLLX,NGLLY,NGLLZ,nspec), &
+          etaxstore(NGLLX,NGLLY,NGLLZ,nspec), &
+          etaystore(NGLLX,NGLLY,NGLLZ,nspec), &
+          etazstore(NGLLX,NGLLY,NGLLZ,nspec), &
+          gammaxstore(NGLLX,NGLLY,NGLLZ,nspec), &
+          gammaystore(NGLLX,NGLLY,NGLLZ,nspec), &
+          gammazstore(NGLLX,NGLLY,NGLLZ,nspec), &
+          jacobianstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! absorbing boundary
+  ! absorbing faces
+  num_abs_boundary_faces = nspec2D_xmin + nspec2D_xmax + nspec2D_ymin + nspec2D_ymax + nspec2D_bottom
+  ! adds faces of free surface if it also absorbs
+  if( ABSORB_FREE_SURFACE ) num_abs_boundary_faces = num_abs_boundary_faces + nspec2D_top
+
+  ! allocates arrays to store info for each face (assumes NGLLX=NGLLY=NGLLZ)
+  allocate( abs_boundary_ispec(num_abs_boundary_faces), &
+           abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces), &
+           abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces), &
+           abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces),stat=ier)
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+  ! free surface faces
+  num_free_surface_faces = nspec2D_top
+
+  ! allocates arrays to store info for each face (assumes NGLLX=NGLLY=NGLLZ)
+  allocate( free_surface_ispec(num_free_surface_faces), &
+           free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces), &
+           free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces), &
+           free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces),stat=ier)
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! array with anisotropy
+  if( ANISOTROPY ) then
+    NSPEC_ANISO = nspec
+  else
+    NSPEC_ANISO = 1
+  endif
+  allocate(c11store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c12store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c13store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c14store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c15store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c16store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c22store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c23store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c24store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c25store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c26store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c33store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c34store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c35store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c36store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c44store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c45store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c46store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c55store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c56store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+          c66store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO),stat=ier)
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! material flags
+  allocate( ispec_is_acoustic(nspec), &
+           ispec_is_elastic(nspec), &
+           ispec_is_poroelastic(nspec), stat=ier)
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+end subroutine crm_ext_allocate_arrays
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine crm_ext_setup_jacobian(myrank, &
+                        xstore,ystore,zstore,nspec, &
+                        nodes_coords_ext_mesh,nnodes_ext_mesh,&
+                        elmnts_ext_mesh,nelmnts_ext_mesh)
+
+  use create_regions_mesh_ext_par
+  implicit none
+
+! number of spectral elements in each block
+  integer :: nspec
+
+  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+
+! data from the external mesh
+  integer :: nnodes_ext_mesh,nelmnts_ext_mesh
+  double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
+  integer, dimension(ESIZE,nelmnts_ext_mesh) :: elmnts_ext_mesh
+
+! proc numbers for MPI
+  integer :: myrank
+
+! local parameters
+  integer :: ispec,ia,i,j,k
+
+! set up coordinates of the Gauss-Lobatto-Legendre points
+  call zwgljd(xigll,wxgll,NGLLX,GAUSSALPHA,GAUSSBETA)
+  call zwgljd(yigll,wygll,NGLLY,GAUSSALPHA,GAUSSBETA)
+  call zwgljd(zigll,wzgll,NGLLZ,GAUSSALPHA,GAUSSBETA)
+
+! if number of points is odd, the middle abscissa is exactly zero
+  if(mod(NGLLX,2) /= 0) xigll((NGLLX-1)/2+1) = ZERO
+  if(mod(NGLLY,2) /= 0) yigll((NGLLY-1)/2+1) = ZERO
+  if(mod(NGLLZ,2) /= 0) zigll((NGLLZ-1)/2+1) = ZERO
+
+! get the 3-D shape functions
+  call get_shape3D(myrank,shape3D,dershape3D,xigll,yigll,zigll)
+
+! get the 2-D shape functions
+  call get_shape2D(myrank,shape2D_x,dershape2D_x,yigll,zigll,NGLLY,NGLLZ)
+  call get_shape2D(myrank,shape2D_y,dershape2D_y,xigll,zigll,NGLLX,NGLLZ)
+  call get_shape2D(myrank,shape2D_bottom,dershape2D_bottom,xigll,yigll,NGLLX,NGLLY)
+  call get_shape2D(myrank,shape2D_top,dershape2D_top,xigll,yigll,NGLLX,NGLLY)
+
+! 2D weights
+  do j=1,NGLLY
+    do i=1,NGLLX
+      wgllwgll_xy(i,j) = wxgll(i)*wygll(j)
+    enddo
+  enddo
+  do k=1,NGLLZ
+    do i=1,NGLLX
+      wgllwgll_xz(i,k) = wxgll(i)*wzgll(k)
+    enddo
+  enddo
+  do k=1,NGLLZ
+    do j=1,NGLLY
+      wgllwgll_yz(j,k) = wygll(j)*wzgll(k)
+    enddo
+  enddo
+
+! point locations
+  xstore(:,:,:,:) = 0.d0
+  ystore(:,:,:,:) = 0.d0
+  zstore(:,:,:,:) = 0.d0
+
+  do ispec = 1, nspec
+    !call get_xyzelm(xelm, yelm, zelm, ispec, elmnts_ext_mesh, nodes_coords_ext_mesh, nspec, nnodes_ext_mesh)
+    do ia = 1,NGNOD
+      xelm(ia) = nodes_coords_ext_mesh(1,elmnts_ext_mesh(ia,ispec))
+      yelm(ia) = nodes_coords_ext_mesh(2,elmnts_ext_mesh(ia,ispec))
+      zelm(ia) = nodes_coords_ext_mesh(3,elmnts_ext_mesh(ia,ispec))
+    enddo
+
+    ! CUBIT should provide a mesh ordering such that the 3D jacobian is defined
+    ! (otherwise mesh would be degenerated)
+    call calc_jacobian(myrank,xixstore,xiystore,xizstore, &
+                      etaxstore,etaystore,etazstore, &
+                      gammaxstore,gammaystore,gammazstore,jacobianstore, &
+                      xstore,ystore,zstore, &
+                      xelm,yelm,zelm,shape3D,dershape3D,ispec,nspec)
+
+  enddo
+
+end subroutine crm_ext_setup_jacobian
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine crm_ext_setup_indexing(ibool, &
+                            xstore,ystore,zstore,nspec,nglob,npointot, &
+                            nnodes_ext_mesh,nodes_coords_ext_mesh,myrank)
+
+! creates global indexing array ibool
+
+  use create_regions_mesh_ext_par
+  implicit none
+
+! number of spectral elements in each block
+  integer :: nspec,nglob,npointot,myrank
+
+! arrays with the mesh
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+
+! data from the external mesh
+  integer :: nnodes_ext_mesh
+  double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
+
+! local parameters
+! variables for creating array ibool
+  double precision, dimension(:), allocatable :: xp,yp,zp
+  integer, dimension(:), allocatable :: locval
+  logical, dimension(:), allocatable :: ifseg
+
+  integer :: ieoff,ilocnum,ier
+  integer :: i,j,k,ispec,iglobnum
+
+! allocate memory for arrays
+  allocate(locval(npointot), &
+          ifseg(npointot), &
+          xp(npointot), &
+          yp(npointot), &
+          zp(npointot),stat=ier)
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! creates temporary global point arrays
+  locval = 0
+  ifseg = .false.
+  xp = 0.d0
+  yp = 0.d0
+  zp = 0.d0
+
+  do ispec=1,nspec
+    ieoff = NGLLX * NGLLY * NGLLZ * (ispec-1)
+    ilocnum = 0
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+          ilocnum = ilocnum + 1
+          xp(ilocnum+ieoff) = xstore(i,j,k,ispec)
+          yp(ilocnum+ieoff) = ystore(i,j,k,ispec)
+          zp(ilocnum+ieoff) = zstore(i,j,k,ispec)
+        enddo
+      enddo
+    enddo
+  enddo
+
+! gets ibool indexing from local (GLL points) to global points
+  call get_global(nspec,xp,yp,zp,ibool,locval,ifseg,nglob,npointot, &
+       minval(nodes_coords_ext_mesh(1,:)),maxval(nodes_coords_ext_mesh(1,:)))
+
+!- we can create a new indirect addressing to reduce cache misses
+  call get_global_indirect_addressing(nspec,nglob,ibool)
+
+!cleanup
+  deallocate(xp,stat=ier); if(ier /= 0) stop 'error in deallocate'
+  deallocate(yp,stat=ier); if(ier /= 0) stop 'error in deallocate'
+  deallocate(zp,stat=ier); if(ier /= 0) stop 'error in deallocate'
+  deallocate(locval,stat=ier); if(ier /= 0) stop 'error in deallocate'
+  deallocate(ifseg,stat=ier); if(ier /= 0) stop 'error in deallocate'
+
+! unique global point locations
+  allocate(xstore_dummy(nglob), &
+          ystore_dummy(nglob), &
+          zstore_dummy(nglob),stat=ier)
+  if(ier /= 0) stop 'error in allocate'
+  do ispec = 1, nspec
+     do k = 1, NGLLZ
+        do j = 1, NGLLY
+           do i = 1, NGLLX
+              iglobnum = ibool(i,j,k,ispec)
+              xstore_dummy(iglobnum) = xstore(i,j,k,ispec)
+              ystore_dummy(iglobnum) = ystore(i,j,k,ispec)
+              zstore_dummy(iglobnum) = zstore(i,j,k,ispec)
+           enddo
+        enddo
+     enddo
+  enddo
+
+  end subroutine crm_ext_setup_indexing
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine create_regions_mesh_save_moho( myrank,nglob,nspec, &
+                        nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
+                        nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
+
+  use create_regions_mesh_ext_par
+  implicit none
+
+  integer :: nspec2D_moho_ext
+  integer, dimension(nspec2D_moho_ext) :: ibelm_moho
+  integer, dimension(4,nspec2D_moho_ext) :: nodes_ibelm_moho
+
+  integer :: myrank,nglob,nspec
+
+  ! data from the external mesh
+  integer :: nnodes_ext_mesh
+  double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+! local parameters
+  ! Moho mesh
+  real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_top
+  real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_bot
+  integer,dimension(:,:,:),allocatable :: ijk_moho_top, ijk_moho_bot
+  integer,dimension(:),allocatable :: ibelm_moho_top, ibelm_moho_bot
+  integer :: NSPEC2D_MOHO
+  logical, dimension(:),allocatable :: is_moho_top, is_moho_bot
+
+  real(kind=CUSTOM_REAL),dimension(NGNOD2D) :: xcoord,ycoord,zcoord
+  real(kind=CUSTOM_REAL) :: jacobian2Dw_face(NGLLX,NGLLY)
+  real(kind=CUSTOM_REAL) :: normal_face(NDIM,NGLLX,NGLLY)
+  real(kind=CUSTOM_REAL),dimension(NDIM):: normal
+  integer :: ijk_face(3,NGLLX,NGLLY)
+
+  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: iglob_normals
+  integer,dimension(:),allocatable:: iglob_is_surface
+
+  integer :: imoho_bot,imoho_top
+  integer :: ispec2D,ispec,icorner,iface,i,j,k,igll,iglob,ier
+  integer :: iglob_midpoint,idirect,counter
+  integer :: imoho_top_all,imoho_bot_all,imoho_all
+
+  ! corners indices of reference cube faces
+  integer,dimension(3,4),parameter :: iface1_corner_ijk = &
+             reshape( (/ 1,1,1, 1,NGLLY,1, 1,NGLLY,NGLLZ, 1,1,NGLLZ /),(/3,4/))   ! xmin
+  integer,dimension(3,4),parameter :: iface2_corner_ijk = &
+             reshape( (/ NGLLX,1,1, NGLLX,NGLLY,1, NGLLX,NGLLY,NGLLZ, NGLLX,1,NGLLZ  /),(/3,4/))   ! xmax
+  integer,dimension(3,4),parameter :: iface3_corner_ijk = &
+             reshape( (/ 1,1,1, 1,1,NGLLZ, NGLLX,1,NGLLZ, NGLLX,1,1  /),(/3,4/))   ! ymin
+  integer,dimension(3,4),parameter :: iface4_corner_ijk = &
+             reshape( (/ 1,NGLLY,1, NGLLX,NGLLY,1, NGLLX,NGLLY,NGLLZ, 1,NGLLY,NGLLZ /),(/3,4/))   ! ymax
+  integer,dimension(3,4),parameter :: iface5_corner_ijk = &
+             reshape( (/ 1,1,1, 1,NGLLY,1, NGLLX,NGLLY,1, NGLLX,1,1 /),(/3,4/))  ! bottom
+  integer,dimension(3,4),parameter :: iface6_corner_ijk = &
+             reshape( (/ 1,1,NGLLZ, NGLLX,1,NGLLZ, NGLLX,NGLLY,NGLLZ, 1,NGLLY,NGLLZ  /),(/3,4/))   ! top
+  integer,dimension(3,4,6),parameter :: iface_all_corner_ijk = &
+             reshape( (/ iface1_corner_ijk,iface2_corner_ijk, &
+                 iface3_corner_ijk,iface4_corner_ijk, &
+                 iface5_corner_ijk,iface6_corner_ijk /),(/3,4,6/))   ! all faces
+  ! midpoint indices for each face (xmin,xmax,ymin,ymax,zmin,zmax)
+  integer,dimension(3,6),parameter :: iface_all_midpointijk = &
+             reshape( (/ 1,2,2, NGLLX,2,2, 2,1,2, 2,NGLLY,2, 2,2,1, 2,2,NGLLZ  /),(/3,6/))   ! top
+
+  ! temporary arrays for passing information
+  allocate(iglob_is_surface(nglob), &
+          iglob_normals(NDIM,nglob),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array iglob_is_surface'
+
+  iglob_is_surface = 0
+  iglob_normals = 0._CUSTOM_REAL
+
+  ! loops over given moho surface elements
+  do ispec2D=1, nspec2D_moho_ext
+
+    ! gets element id
+    ispec = ibelm_moho(ispec2D)
+
+    ! looks for i,j,k indices of GLL points on boundary face
+    ! determines element face by given CUBIT corners
+    ! (note: uses point locations rather than point indices to find the element face,
+    !            because the indices refer no more to the newly indexed ibool array )
+    do icorner=1,NGNOD2D
+      xcoord(icorner) = nodes_coords_ext_mesh(1,nodes_ibelm_moho(icorner,ispec2D))
+      ycoord(icorner) = nodes_coords_ext_mesh(2,nodes_ibelm_moho(icorner,ispec2D))
+      zcoord(icorner) = nodes_coords_ext_mesh(3,nodes_ibelm_moho(icorner,ispec2D))
+    enddo
+
+    ! sets face id of reference element associated with this face
+    call get_element_face_id(ispec,xcoord,ycoord,zcoord, &
+                            ibool,nspec,nglob, &
+                            xstore_dummy,ystore_dummy,zstore_dummy, &
+                            iface)
+
+    ! ijk indices of GLL points for face id
+    call get_element_face_gll_indices(iface,ijk_face,NGLLX,NGLLZ)
+
+    ! weighted jacobian and normal
+    call get_jacobian_boundary_face(myrank,nspec, &
+              xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
+              dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
+              wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
+              ispec,iface,jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
+
+    ! normal convention: points away from element
+    ! switch normal direction if necessary
+    do j=1,NGLLY
+      do i=1,NGLLX
+          call get_element_face_normal(ispec,iface,xcoord,ycoord,zcoord, &
+                                      ibool,nspec,nglob, &
+                                      xstore_dummy,ystore_dummy,zstore_dummy, &
+                                      normal_face(:,i,j) )
+      enddo
+    enddo
+
+    ! stores information on global points on moho surface
+    igll = 0
+    do j=1,NGLLY
+      do i=1,NGLLX
+        iglob = ibool(ijk_face(1,i,j),ijk_face(2,i,j),ijk_face(3,i,j),ispec)
+        ! sets flag
+        iglob_is_surface(iglob) = ispec2D
+        ! sets normals
+        iglob_normals(:,iglob) = normal_face(:,i,j)
+      enddo
+    enddo
+  enddo
+
+  ! stores moho elements
+  NSPEC2D_MOHO = nspec2D_moho_ext
+
+  allocate(ibelm_moho_bot(NSPEC2D_MOHO), &
+          ibelm_moho_top(NSPEC2D_MOHO), &
+          normal_moho_top(NDIM,NGLLSQUARE,NSPEC2D_MOHO), &
+          normal_moho_bot(NDIM,NGLLSQUARE,NSPEC2D_MOHO), &
+          ijk_moho_bot(3,NGLLSQUARE,NSPEC2D_MOHO), &
+          ijk_moho_top(3,NGLLSQUARE,NSPEC2D_MOHO),stat=ier)
+  if( ier /= 0 ) stop 'error allocating ibelm_moho_bot'
+
+  ibelm_moho_bot = 0
+  ibelm_moho_top = 0
+
+  ! element flags
+  allocate(is_moho_top(nspec), &
+          is_moho_bot(nspec),stat=ier)
+  if( ier /= 0 ) stop 'error allocating is_moho_top'
+  is_moho_top = .false.
+  is_moho_bot = .false.
+
+  ! finds spectral elements with moho surface
+  imoho_top = 0
+  imoho_bot = 0
+  do ispec=1,nspec
+
+    ! loops over each face
+    do iface = 1,6
+      ! checks if corners of face on surface
+      counter = 0
+      do icorner = 1,NGNOD2D
+        i = iface_all_corner_ijk(1,icorner,iface)
+        j = iface_all_corner_ijk(2,icorner,iface)
+        k = iface_all_corner_ijk(3,icorner,iface)
+        iglob = ibool(i,j,k,ispec)
+
+        ! checks if point on surface
+        if( iglob_is_surface(iglob) > 0 ) then
+          counter = counter+1
+
+          ! reference corner coordinates
+          xcoord(icorner) = xstore_dummy(iglob)
+          ycoord(icorner) = ystore_dummy(iglob)
+          zcoord(icorner) = zstore_dummy(iglob)
+        endif
+      enddo
+
+      ! stores moho informations
+      if( counter == NGNOD2D ) then
+
+        ! gets face GLL points i,j,k indices from element face
+        call get_element_face_gll_indices(iface,ijk_face,NGLLX,NGLLY)
+
+        ! re-computes face infos
+        ! weighted jacobian and normal
+        call get_jacobian_boundary_face(myrank,nspec, &
+              xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
+              dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
+              wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
+              ispec,iface,jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
+
+        ! normal convention: points away from element
+        ! switch normal direction if necessary
+        do j=1,NGLLZ
+          do i=1,NGLLX
+            call get_element_face_normal(ispec,iface,xcoord,ycoord,zcoord, &
+                                      ibool,nspec,nglob, &
+                                      xstore_dummy,ystore_dummy,zstore_dummy, &
+                                      normal_face(:,i,j) )
+          enddo
+        enddo
+
+        ! takes normal stored temporary on a face midpoint
+        i = iface_all_midpointijk(1,iface)
+        j = iface_all_midpointijk(2,iface)
+        k = iface_all_midpointijk(3,iface)
+        iglob_midpoint = ibool(i,j,k,ispec)
+        normal(:) = iglob_normals(:,iglob_midpoint)
+
+        ! determines whether normal points into element or not (top/bottom distinction)
+        call get_element_face_normal_idirect(ispec,iface,xcoord,ycoord,zcoord, &
+                              ibool,nspec,nglob, &
+                              xstore_dummy,ystore_dummy,zstore_dummy, &
+                              normal,idirect )
+
+        ! takes moho surface element id given by id on midpoint
+        ispec2D = iglob_is_surface(iglob_midpoint)
+
+        ! sets face infos for bottom (normal points away from element)
+        if( idirect == 1 ) then
+
+          ! checks validity
+          if( is_moho_bot( ispec) .eqv. .true. ) then
+            print*,'error: moho surface geometry bottom'
+            print*,'  does not allow for mulitple element faces in kernel computation'
+            call exit_mpi(myrank,'error moho bottom elements')
+          endif
+
+          imoho_bot = imoho_bot + 1
+          is_moho_bot(ispec) = .true.
+          ibelm_moho_bot(ispec2D) = ispec
+
+          ! stores on surface gll points (assuming NGLLX = NGLLY = NGLLZ)
+          igll = 0
+          do j=1,NGLLZ
+            do i=1,NGLLX
+              igll = igll+1
+              ijk_moho_bot(:,igll,ispec2D) = ijk_face(:,i,j)
+              normal_moho_bot(:,igll,ispec2D) = normal_face(:,i,j)
+            enddo
+          enddo
+
+        ! sets face infos for top element
+        else if( idirect == 2 ) then
+
+          ! checks validity
+          if( is_moho_top( ispec) .eqv. .true. ) then
+            print*,'error: moho surface geometry top'
+            print*,'  does not allow for mulitple element faces kernel computation'
+            call exit_mpi(myrank,'error moho top elements')
+          endif
+
+          imoho_top = imoho_top + 1
+          is_moho_top(ispec) = .true.
+          ibelm_moho_top(ispec2D) = ispec
+
+          ! gll points
+          igll = 0
+          do j=1,NGLLZ
+            do i=1,NGLLX
+              igll = igll+1
+              ijk_moho_top(:,igll,ispec) = ijk_face(:,i,j)
+              ! note: top elements have normal pointing into element
+              normal_moho_top(:,igll,ispec) = - normal_face(:,i,j)
+            enddo
+          enddo
+        endif
+
+      endif ! counter
+
+    enddo ! iface
+
+    ! checks validity of top/bottom distinction
+    if( is_moho_top(ispec) .and. is_moho_bot(ispec) ) then
+      print*,'error: moho surface elements confusing'
+      print*,'  element:',ispec,'has top and bottom surface'
+      call exit_mpi(myrank,'error moho surface element')
+    endif
+
+  enddo ! ispec2D
+
+  ! note: surface e.g. could be at the free-surface and have no top elements etc...
+  ! user output
+  call sum_all_i( imoho_top, imoho_top_all )
+  call sum_all_i( imoho_bot, imoho_bot_all )
+  call sum_all_i( NSPEC2D_MOHO, imoho_all )
+  if( myrank == 0 ) then
+    write(IMAIN,*) '********'
+    write(IMAIN,*) 'Moho surface:'
+    write(IMAIN,*) '    total surface elements: ',imoho_all
+    write(IMAIN,*) '    top elements   :',imoho_top_all
+    write(IMAIN,*) '    bottom elements:',imoho_bot_all
+    write(IMAIN,*) '********'
+  endif
+
+  ! saves moho files: total number of elements, corner points, all points
+  open(unit=27,file=prname(1:len_trim(prname))//'ibelm_moho.bin',status='unknown',form='unformatted')
+  write(27) NSPEC2D_MOHO
+  write(27) ibelm_moho_top
+  write(27) ibelm_moho_bot
+  write(27) ijk_moho_top
+  write(27) ijk_moho_bot
+  close(27)
+  open(unit=27,file=prname(1:len_trim(prname))//'normal_moho.bin',status='unknown',form='unformatted')
+  write(27) normal_moho_top
+  write(27) normal_moho_bot
+  close(27)
+  open(unit=27,file=prname(1:len_trim(prname))//'is_moho.bin',status='unknown',form='unformatted')
+  write(27) is_moho_top
+  write(27) is_moho_bot
+  close(27)
+
+  end subroutine create_regions_mesh_save_moho

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,1006 +1,1068 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-!
-! United States and French Government Sponsorship Acknowledged.
-!
-!=============================================================================!
-!                                                                             !
-!  generate_databases produces a spectral element grid                        !
-!  for a local or regional model.                                             !
-!  The mesher uses the UTM projection                                         !
-!                                                                             !
-!=============================================================================!
-!
-! If you use this code for your own research, please cite at least one article
-! written by the developers of the package, for instance:
-!
-! @ARTICLE{TrKoLi08,
-! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
-! title = {Spectral-Element and Adjoint Methods in Seismology},
-! journal = {Communications in Computational Physics},
-! year = {2008},
-! volume = {3},
-! pages = {1-32},
-! number = {1}}
-!
-! or
-!
-! @ARTICLE{LiPoKoTr04,
-! author = {Qinya Liu and Jascha Polet and Dimitri Komatitsch and Jeroen Tromp},
-! title = {Spectral-element moment tensor inversions for earthquakes in {S}outhern {C}alifornia},
-! journal={Bull. Seismol. Soc. Am.},
-! year = {2004},
-! volume = {94},
-! pages = {1748-1761},
-! number = {5},
-! doi = {10.1785/012004038}}
-!
-! @INCOLLECTION{ChKoViCaVaFe07,
-! author = {Emmanuel Chaljub and Dimitri Komatitsch and Jean-Pierre Vilotte and
-! Yann Capdeville and Bernard Valette and Gaetano Festa},
-! title = {Spectral Element Analysis in Seismology},
-! booktitle = {Advances in Wave Propagation in Heterogeneous Media},
-! publisher = {Elsevier - Academic Press},
-! year = {2007},
-! editor = {Ru-Shan Wu and Val\'erie Maupin},
-! volume = {48},
-! series = {Advances in Geophysics},
-! pages = {365-419}}
-!
-! @ARTICLE{KoVi98,
-! author={D. Komatitsch and J. P. Vilotte},
-! title={The spectral-element method: an efficient tool to simulate the seismic response of 2{D} and 3{D} geological structures},
-! journal={Bull. Seismol. Soc. Am.},
-! year=1998,
-! volume=88,
-! number=2,
-! pages={368-392}}
-!
-! @ARTICLE{KoTr99,
-! author={D. Komatitsch and J. Tromp},
-! year=1999,
-! title={Introduction to the spectral-element method for 3-{D} seismic wave propagation},
-! journal={Geophys. J. Int.},
-! volume=139,
-! number=3,
-! pages={806-822},
-! doi={10.1046/j.1365-246x.1999.00967.x}}
-!
-! @ARTICLE{KoLiTrSuStSh04,
-! author={Dimitri Komatitsch and Qinya Liu and Jeroen Tromp and Peter S\"{u}ss
-!   and Christiane Stidham and John H. Shaw},
-! year=2004,
-! title={Simulations of Ground Motion in the {L}os {A}ngeles {B}asin
-!   based upon the Spectral-Element Method},
-! journal={Bull. Seism. Soc. Am.},
-! volume=94,
-! number=1,
-! pages={187-206}}
-!
-! and/or another article from http://web.univ-pau.fr/~dkomati1/publications.html
-!
-!
-! If you use the kernel capabilities of the code, please cite at least one article
-! written by the developers of the package, for instance:
-!
-! @ARTICLE{TrKoLi08,
-! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
-! title = {Spectral-Element and Adjoint Methods in Seismology},
-! journal = {Communications in Computational Physics},
-! year = {2008},
-! volume = {3},
-! pages = {1-32},
-! number = {1}}
-!
-! or
-!
-! @ARTICLE{LiTr06,
-! author={Qinya Liu and Jeroen Tromp},
-! title={Finite-frequency kernels based on adjoint methods},
-! journal={Bull. Seismol. Soc. Am.},
-! year=2006,
-! volume=96,
-! number=6,
-! pages={2383-2397},
-! doi={10.1785/0120060041}}
-!
-!
-! Reference frame - convention:
-! ----------------------------
-!
-! The code uses the following convention for the reference frame:
-!
-!  - X axis is East
-!  - Y axis is North
-!  - Z axis is up
-!
-! Note that this convention is different from both the Aki-Richards convention
-! and the Harvard CMT convention.
-!
-! Let us recall that the Aki-Richards convention is:
-!
-!  - X axis is North
-!  - Y axis is East
-!  - Z axis is down
-!
-! and that the Harvard CMT convention is:
-!
-!  - X axis is South
-!  - Y axis is East
-!  - Z axis is up
-!
-! To report bugs or suggest improvements to the code, please send an email
-! to Jeroen Tromp <jtromp AT princeton.edu> and/or use our online
-! bug tracking system at http://www.geodynamics.org/roundup .
-!
-! Evolution of the code:
-! ---------------------
-!
-! MPI v. 2.0 "Sesame" (Spectral ElementS on Any MEsh), November 2010:
-! Dimitri Komatitsch, Nicolas Le Goff, Roland Martin and Pieyre Le Loher, University of Pau, France,
-! Jeroen Tromp and the Princeton group of developers, Princeton University, USA,
-! and Emanuele Casarotti, INGV Roma, Italy:
-!  support for CUBIT meshes decomposed by SCOTCH, METIS or ZOLTAN;
-!  much faster solver using Michel Deville's inlined matrix products.
-!
-! MPI v. 1.4 Dimitri Komatitsch, University of Pau, Qinya Liu and others, Caltech, September 2006:
-!  better adjoint and kernel calculations, faster and better I/Os
-!  on very large systems, many small improvements and bug fixes
-!
-! MPI v. 1.3 Dimitri Komatitsch, University of Pau, and Qinya Liu, Caltech, July 2005:
-!  serial version, regular mesh, adjoint and kernel calculations, ParaView support
-!
-! MPI v. 1.2 Min Chen and Dimitri Komatitsch, Caltech, July 2004:
-!  full anisotropy, volume movie
-!
-! MPI v. 1.1 Dimitri Komatitsch, Caltech, October 2002: Zhu's Moho map, scaling
-!  of Vs with depth, Hauksson's regional model, attenuation, oceans, movies
-!
-! MPI v. 1.0 Dimitri Komatitsch, Caltech, May 2002: first MPI version based on global code
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  module generate_databases_par
-
-  implicit none
-
-  include "constants.h"
-
-! number of spectral elements in each block
-  integer nspec,npointot
-
-! local to global indexing array
-  integer, dimension(:,:,:,:), allocatable :: ibool
-
-! arrays with the mesh in double precision
-  double precision, dimension(:,:,:,:), allocatable :: xstore,ystore,zstore
-
-! proc numbers for MPI
-  integer :: myrank,sizeprocs,ier
-
-! use integer array to store topography values
-  integer :: UTM_PROJECTION_ZONE
-  logical :: SUPPRESS_UTM_PROJECTION
-  integer :: NX_TOPO,NY_TOPO
-  double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
-  character(len=100) :: topo_file
-  integer, dimension(:,:), allocatable :: itopo_bathy
-
-! timer MPI
-  double precision, external :: wtime
-  double precision :: time_start,tCPU
-
-! parameters read from parameter file
-  integer :: NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,SIMULATION_TYPE
-  integer :: NSOURCES
-
-  double precision :: DT,HDUR_MOVIE
-
-  logical :: ATTENUATION,USE_OLSEN_ATTENUATION, &
-          OCEANS, TOPOGRAPHY, SAVE_FORWARD
-  logical :: ANISOTROPY,ABSORBING_CONDITIONS,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
-
-  logical :: MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
-          USE_HIGHRES_FOR_MOVIES
-  integer :: NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO,NTSTEP_BETWEEN_READ_ADJSRC
-
-  character(len=256) OUTPUT_FILES,LOCAL_PATH
-
-! parameters deduced from parameters read from file
-  integer :: NPROC
-
-! static memory size that will be needed by the solver
-  double precision :: max_static_memory_size,max_static_memory_size_request
-
-! this for all the regions
-  integer NSPEC_AB,NGLOB_AB
-
-  integer NSPEC2D_BOTTOM,NSPEC2D_TOP
-
-  double precision min_elevation,max_elevation
-  double precision min_elevation_all,max_elevation_all
-
-! for Databases of external meshes
-  character(len=256) prname
-  integer :: dummy_node
-  integer :: dummy_elmnt
-  integer :: ispec, inode, num_interface,ie,imat,iface,icorner
-  integer :: nnodes_ext_mesh, nelmnts_ext_mesh
-  integer  :: num_interfaces_ext_mesh
-  integer  :: max_interface_size_ext_mesh
-  integer  :: nmat_ext_mesh, nundefMat_ext_mesh
-  integer, dimension(:), allocatable  :: my_neighbours_ext_mesh
-  integer, dimension(:), allocatable  :: my_nelmnts_neighbours_ext_mesh
-  integer, dimension(:,:,:), allocatable  :: my_interfaces_ext_mesh
-  integer, dimension(:,:), allocatable  :: ibool_interfaces_ext_mesh
-  integer, dimension(:), allocatable  :: nibool_interfaces_ext_mesh
-  double precision, dimension(:,:), allocatable :: nodes_coords_ext_mesh
-  integer, dimension(:,:), allocatable :: elmnts_ext_mesh
-  integer, dimension(:,:), allocatable :: mat_ext_mesh
-  integer :: max_nibool_interfaces_ext_mesh
-  integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh_dummy
-
-! boundaries and materials
-  integer  :: ispec2D, boundary_number
-  integer  :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, nspec2D_bottom_ext, nspec2D_top_ext
-  character (len=30), dimension(:,:), allocatable :: undef_mat_prop
-  integer, dimension(:), allocatable  :: ibelm_xmin,ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top
-  integer, dimension(:,:), allocatable  :: nodes_ibelm_xmin,nodes_ibelm_xmax, &
-              nodes_ibelm_ymin, nodes_ibelm_ymax, nodes_ibelm_bottom, nodes_ibelm_top
-  double precision, dimension(:,:), allocatable :: materials_ext_mesh
-
-! moho (optional)
-  integer :: nspec2D_moho_ext
-  integer, dimension(:), allocatable  :: ibelm_moho
-  integer, dimension(:,:), allocatable  :: nodes_ibelm_moho
-
-  integer :: nglob,nglob_total,nspec_total
-
-  logical,dimension(:),allocatable :: ispec_is_surface_external_mesh,iglob_is_surface_external_mesh
-  integer :: nfaces_surface_ext_mesh,nfaces_surface_glob_ext_mesh
-
-! flag for noise simulation
-  integer :: NOISE_TOMOGRAPHY
-
-  end module generate_databases_par
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine generate_databases
-
-  use generate_databases_par
-  implicit none
-
-! sizeprocs returns number of processes started (should be equal to NPROC).
-! myrank is the rank of each process, between 0 and NPROC-1.
-! as usual in MPI, process 0 is in charge of coordinating everything
-! and also takes care of the main output
-  call world_size(sizeprocs)
-  call world_rank(myrank)
-
-! get the base pathname for output files
-  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
-
-! open main output file, only written to by process 0
-  if(myrank == 0 .and. IMAIN /= ISTANDARD_OUTPUT) &
-    open(unit=IMAIN,file=trim(OUTPUT_FILES)//'/output_mesher.txt',status='unknown')
-
-! get MPI starting time
-  time_start = wtime()
-
-  if(myrank == 0) then
-    write(IMAIN,*)
-    write(IMAIN,*) '******************************************'
-    write(IMAIN,*) '*** Specfem3D MPI Mesher - f90 version ***'
-    write(IMAIN,*) '******************************************'
-    write(IMAIN,*)
-  endif
-
-! read the parameter file
-  call gd_read_parameters()
-
-! makes sure processes are synchronized
-  call sync_all()
-
-! reads topography and bathymetry file
-  call gd_read_topography()
-
-  if(myrank == 0) then
-    write(IMAIN,*)
-    write(IMAIN,*) '**************************'
-    write(IMAIN,*) 'creating mesh in the model'
-    write(IMAIN,*) '**************************'
-    write(IMAIN,*)
-  endif
-
-! reads Databases files
-  call gd_read_partition_files()
-
-! external mesh creation
-  call gd_setup_mesh()
-
-! finalize mesher
-  call gd_finalize()
-
-  end subroutine generate_databases
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine gd_read_parameters
-
-! reads and checks user input parameters
-
-  use generate_databases_par
-  implicit none
-
-! reads Par_file
-  call read_parameter_file( NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
-                        UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
-                        ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
-                        OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
-                        MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
-                        NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
-                        SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION, &
-                        NTSTEP_BETWEEN_OUTPUT_INFO,SIMULATION_TYPE,SAVE_FORWARD, &
-                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
-
-! check that the code is running with the requested nb of processes
-  if(sizeprocs /= NPROC) then
-    if( myrank == 0 ) then
-      write(IMAIN,*) 'error: number of processors supposed to run on: ',NPROC
-      write(IMAIN,*) 'error: number of processors actually run on: ',sizeprocs
-    endif
-    call exit_MPI(myrank,'wrong number of MPI processes')
-  endif
-
-! there would be a problem with absorbing boundaries for different NGLLX,NGLLY,NGLLZ values
-! just to be sure for now..
-  if( ABSORBING_CONDITIONS ) then
-    if( NGLLX /= NGLLY .and. NGLLY /= NGLLZ ) &
-      call exit_MPI(myrank,'must have NGLLX = NGLLY = NGLLZ for external meshes')
-  endif
-
-! info about external mesh simulation
-  if(myrank == 0) then
-    write(IMAIN,*) 'This is process ',myrank
-    write(IMAIN,*) 'There are ',sizeprocs,' MPI processes'
-    write(IMAIN,*) 'Processes are numbered from 0 to ',sizeprocs-1
-    write(IMAIN,*)
-    write(IMAIN,*) 'There is a total of ',NPROC,' slices'
-    write(IMAIN,*)
-    write(IMAIN,*) 'NGLLX = ',NGLLX
-    write(IMAIN,*) 'NGLLY = ',NGLLY
-    write(IMAIN,*) 'NGLLZ = ',NGLLZ
-
-    write(IMAIN,*)
-    write(IMAIN,*) 'Shape functions defined by NGNOD = ',NGNOD,' control nodes'
-    write(IMAIN,*) 'Surface shape functions defined by NGNOD2D = ',NGNOD2D,' control nodes'
-    write(IMAIN,*)
-  endif
-
-! check that reals are either 4 or 8 bytes
-  if(CUSTOM_REAL /= SIZE_REAL .and. CUSTOM_REAL /= SIZE_DOUBLE) &
-    call exit_MPI(myrank,'wrong size of CUSTOM_REAL for reals')
-
-  if(NGNOD /= 8) call exit_MPI(myrank,'number of control nodes must be 8')
-  if(NGNOD2D /= 4) call exit_MPI(myrank,'elements with 8 points should have NGNOD2D = 4')
-
-! for the number of standard linear solids for attenuation
-  if(N_SLS /= 3) call exit_MPI(myrank,'number of SLS must be 3')
-
-  ! exclusive movie flags
-  if( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP ) then
-    MOVIE_SURFACE = .false.
-    CREATE_SHAKEMAP = .false.
-  endif
-
-  ! for noise simulations, we need to save movies at the surface (where the noise is generated)
-  ! and thus we force MOVIE_SURFACE to be .true., in order to use variables defined for surface movies later
-  if ( NOISE_TOMOGRAPHY /= 0 ) then
-    MOVIE_SURFACE = .true.
-    CREATE_SHAKEMAP = .false.
-    if( ( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP ) .and. myrank == 0 ) then
-        write(IMAIN,*) 'error: when running noise simulations ( NOISE_TOMOGRAPHY /= 0 ),'
-        write(IMAIN,*) '       we can NOT use EXTERNAL_MESH_MOVIE_SURFACE or EXTERNAL_MESH_CREATE_SHAKEMAP'
-        write(IMAIN,*) '       change EXTERNAL_MESH_MOVIE_SURFACE & EXTERNAL_MESH_CREATE_SHAKEMAP in constant.h'
-        call exit_MPI(myrank,'incompatible NOISE_TOMOGRAPHY, EXTERNAL_MESH_MOVIE_SURFACE, EXTERNAL_MESH_CREATE_SHAKEMAP')
-    endif
-  endif
-
-
-  if(myrank == 0) then
-! chris: I am not sure if we should suppress the following. topography should appear in the external mesh
-! leave it for now
-
-    write(IMAIN,*)
-    if(SUPPRESS_UTM_PROJECTION) then
-      write(IMAIN,*) 'suppressing UTM projection'
-    else
-      write(IMAIN,*) 'using UTM projection in region ',UTM_PROJECTION_ZONE
-    endif
-
-    write(IMAIN,*)
-    if(ATTENUATION) then
-      write(IMAIN,*) 'incorporating attenuation using ',N_SLS,' standard linear solids'
-      if(USE_OLSEN_ATTENUATION) then
-        write(IMAIN,*) 'using Olsen''s attenuation'
-      else
-        write(IMAIN,*) 'not using Olsen''s attenuation'
-      endif
-    else
-      write(IMAIN,*) 'no attenuation'
-    endif
-
-    write(IMAIN,*)
-    if(ANISOTROPY) then
-      write(IMAIN,*) 'incorporating anisotropy'
-    else
-      write(IMAIN,*) 'no anisotropy'
-    endif
-
-    write(IMAIN,*)
-    if(OCEANS) then
-      write(IMAIN,*) 'incorporating the oceans using equivalent load'
-      if( TOPOGRAPHY ) write(IMAIN,*) ' with elevation from topography file'
-    else
-      write(IMAIN,*) 'no oceans'
-    endif
-
-    write(IMAIN,*)
-
-  endif
-
-  end subroutine gd_read_parameters
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine gd_read_topography
-
-! reads in topography files
-
-  use generate_databases_par
-  implicit none
-
-  if( OCEANS .and. TOPOGRAPHY ) then
-
-    ! for Southern California
-    NX_TOPO = NX_TOPO_SOCAL
-    NY_TOPO = NY_TOPO_SOCAL
-    ORIG_LAT_TOPO = ORIG_LAT_TOPO_SOCAL
-    ORIG_LONG_TOPO = ORIG_LONG_TOPO_SOCAL
-    DEGREES_PER_CELL_TOPO = DEGREES_PER_CELL_TOPO_SOCAL
-    topo_file = TOPO_FILE_SOCAL
-
-    allocate(itopo_bathy(NX_TOPO,NY_TOPO),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array itopo_bathy'
-
-    call read_topo_bathy_file(itopo_bathy,NX_TOPO,NY_TOPO,topo_file)
-
-    if(myrank == 0) then
-      write(IMAIN,*)
-      write(IMAIN,*) 'regional topography file read ranges in m from ',minval(itopo_bathy),' to ',maxval(itopo_bathy)
-      write(IMAIN,*)
-    endif
-  else
-    NX_TOPO = 1
-    NY_TOPO = 1
-    allocate(itopo_bathy(NX_TOPO,NY_TOPO),stat=ier)
-    if( ier /= 0 ) stop 'error allocating dummy array itopo_bathy'
-
-  endif
-
-!! read basement map
-!  if(BASEMENT_MAP) then
-!    call get_value_string(BASEMENT_MAP_FILE,'model.BASEMENT_MAP_FILE', &
-!                                   '../in_data_files/la_basement/reggridbase2_filtered_ascii.dat')
-!    open(unit=55,file=BASEMENT_MAP_FILE,status='old',action='read')
-!    do ix=1,NX_BASEMENT
-!      do iy=1,NY_BASEMENT
-!        read(55,*) iz_basement
-!        z_basement(ix,iy) = dble(iz_basement)
-!      enddo
-!    enddo
-!    close(55)
-!  endif
-
-  end subroutine gd_read_topography
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine gd_read_partition_files
-
-! reads in proc***_Databases files
-
-  use generate_databases_par
-  implicit none
-
-  integer :: num_xmin,num_xmax,num_ymin,num_ymax,num_top,num_bottom,num
-  integer :: num_moho
-  integer :: j
-  character(len=128) :: line
-
-! read databases about external mesh simulation
-! global node coordinates
-  call create_name_database(prname,myrank,LOCAL_PATH)
-  open(unit=IIN,file=prname(1:len_trim(prname))//'Database',status='old',action='read',form='formatted',iostat=ier)
-  if( ier /= 0 ) then
-    write(IMAIN,*) 'error opening file: ',prname(1:len_trim(prname))//'Database'
-    write(IMAIN,*) 'make sure file exists'
-    call exit_mpi(myrank,'error opening database file')
-  endif
-  read(IIN,*) nnodes_ext_mesh
-  allocate(nodes_coords_ext_mesh(NDIM,nnodes_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array nodes_coords_ext_mesh'
-  do inode = 1, nnodes_ext_mesh
-     read(IIN,*) dummy_node, nodes_coords_ext_mesh(1,inode), nodes_coords_ext_mesh(2,inode), &
-                nodes_coords_ext_mesh(3,inode)
-  enddo
-
-  call sum_all_i(nnodes_ext_mesh,num)
-  if(myrank == 0) then
-    write(IMAIN,*) '  external mesh points: ',num
-  endif
-  call sync_all()
-
-! read materials' physical properties
-  read(IIN,*) nmat_ext_mesh, nundefMat_ext_mesh
-  allocate(materials_ext_mesh(6,nmat_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array materials_ext_mesh'
-  do imat = 1, nmat_ext_mesh
-     ! format:        #(1) rho   #(2) vp  #(3) vs  #(4) Q_flag  #(5) anisotropy_flag  #(6) material_domain_id
-     read(IIN,*) materials_ext_mesh(1,imat),  materials_ext_mesh(2,imat),  materials_ext_mesh(3,imat), &
-          materials_ext_mesh(4,imat),  materials_ext_mesh(5,imat), materials_ext_mesh(6,imat)
-
-     ! output
-     !print*,'materials:',materials_ext_mesh(1,imat),  materials_ext_mesh(2,imat),  materials_ext_mesh(3,imat), &
-     !     materials_ext_mesh(4,imat),  materials_ext_mesh(5,imat), materials_ext_mesh(6,imat)
-  end do
-
-  if(myrank == 0) then
-    write(IMAIN,*) '  defined materials: ',nmat_ext_mesh
-  endif
-  call sync_all()
-
-  allocate(undef_mat_prop(6,nundefMat_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array undef_mat_prop'
-  do imat = 1, nundefMat_ext_mesh
-     ! format example tomography:
-     ! -1 tomography elastic tomography_model.xyz 1 2
-     ! format example interface:
-     ! -1 interface 14 15 1 2
-     read(IIN,*) undef_mat_prop(1,imat),undef_mat_prop(2,imat),undef_mat_prop(3,imat),undef_mat_prop(4,imat), &
-          undef_mat_prop(5,imat), undef_mat_prop(6,imat)
-
-     ! output debug
-     !print*,'undefined materials:'
-     !print*,undef_mat_prop(:,imat)
-  end do
-
-  if(myrank == 0) then
-    write(IMAIN,*) '  undefined materials: ',nundefMat_ext_mesh
-  endif
-  call sync_all()
-
-! element indexing
-  read(IIN,*) nelmnts_ext_mesh
-  allocate(elmnts_ext_mesh(esize,nelmnts_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array elmnts_ext_mesh'
-  allocate(mat_ext_mesh(2,nelmnts_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array mat_ext_mesh'
-
-  ! reads in material association for each spectral element and corner node indices
-  do ispec = 1, nelmnts_ext_mesh
-     ! format:
-     ! # ispec_local # material_index_1 # material_index_2 # corner_id1 # corner_id2 # ... # corner_id8
-     read(IIN,*) dummy_elmnt, mat_ext_mesh(1,ispec),mat_ext_mesh(2,ispec), &
-          elmnts_ext_mesh(1,ispec), elmnts_ext_mesh(2,ispec), elmnts_ext_mesh(3,ispec), elmnts_ext_mesh(4,ispec), &
-          elmnts_ext_mesh(5,ispec), elmnts_ext_mesh(6,ispec), elmnts_ext_mesh(7,ispec), elmnts_ext_mesh(8,ispec)
-
-     ! check debug
-     if( dummy_elmnt /= ispec) stop "error ispec order in materials file"
-
-  enddo
-  NSPEC_AB = nelmnts_ext_mesh
-
-  call sum_all_i(nspec_ab,num)
-  if(myrank == 0) then
-    write(IMAIN,*) '  spectral elements: ',num
-  endif
-  call sync_all()
-
-
-! read boundaries
-  read(IIN,*) boundary_number ,nspec2D_xmin
-  if(boundary_number /= 1) stop "Error : invalid database file"
-  read(IIN,*) boundary_number ,nspec2D_xmax
-  if(boundary_number /= 2) stop "Error : invalid database file"
-  read(IIN,*) boundary_number ,nspec2D_ymin
-  if(boundary_number /= 3) stop "Error : invalid database file"
-  read(IIN,*) boundary_number ,nspec2D_ymax
-  if(boundary_number /= 4) stop "Error : invalid database file"
-  read(IIN,*) boundary_number ,nspec2D_bottom_ext
-  if(boundary_number /= 5) stop "Error : invalid database file"
-  read(IIN,*) boundary_number ,nspec2D_top_ext
-  if(boundary_number /= 6) stop "Error : invalid database file"
-
-  NSPEC2D_BOTTOM = nspec2D_bottom_ext
-  NSPEC2D_TOP = nspec2D_top_ext
-
-  allocate(ibelm_xmin(nspec2D_xmin),nodes_ibelm_xmin(4,nspec2D_xmin),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array ibelm_xmin etc.'
-  do ispec2D = 1,nspec2D_xmin
-     read(IIN,*) ibelm_xmin(ispec2D),(nodes_ibelm_xmin(j,ispec2D),j=1,4)
-  end do
-
-  allocate(ibelm_xmax(nspec2D_xmax),nodes_ibelm_xmax(4,nspec2D_xmax),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array ibelm_xmax etc.'
-  do ispec2D = 1,nspec2D_xmax
-     read(IIN,*) ibelm_xmax(ispec2D),(nodes_ibelm_xmax(j,ispec2D),j=1,4)
-  end do
-
-  allocate(ibelm_ymin(nspec2D_ymin),nodes_ibelm_ymin(4,nspec2D_ymin),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array ibelm_ymin'
-  do ispec2D = 1,nspec2D_ymin
-     read(IIN,*) ibelm_ymin(ispec2D),(nodes_ibelm_ymin(j,ispec2D),j=1,4)
-  end do
-
-  allocate(ibelm_ymax(nspec2D_ymax),nodes_ibelm_ymax(4,nspec2D_ymax),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array ibelm_ymax etc.'
-  do ispec2D = 1,nspec2D_ymax
-     read(IIN,*) ibelm_ymax(ispec2D),(nodes_ibelm_ymax(j,ispec2D),j=1,4)
-  end do
-
-  allocate(ibelm_bottom(nspec2D_bottom_ext),nodes_ibelm_bottom(4,nspec2D_bottom_ext),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array ibelm_bottom etc.'
-  do ispec2D = 1,nspec2D_bottom_ext
-     read(IIN,*) ibelm_bottom(ispec2D),(nodes_ibelm_bottom(j,ispec2D),j=1,4)
-  end do
-
-  allocate(ibelm_top(nspec2D_top_ext),nodes_ibelm_top(4,nspec2D_top_ext),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array ibelm_top etc.'
-  do ispec2D = 1,nspec2D_top_ext
-     read(IIN,*) ibelm_top(ispec2D),(nodes_ibelm_top(j,ispec2D),j=1,4)
-  end do
-
-  call sum_all_i(nspec2D_xmin,num_xmin)
-  call sum_all_i(nspec2D_xmax,num_xmax)
-  call sum_all_i(nspec2D_ymin,num_ymin)
-  call sum_all_i(nspec2D_ymax,num_ymax)
-  call sum_all_i(nspec2D_top_ext,num_top)
-  call sum_all_i(nspec2D_bottom_ext,num_bottom)
-
-  if(myrank == 0) then
-    write(IMAIN,*) '  absorbing boundaries: '
-    write(IMAIN,*) '    xmin,xmax: ',num_xmin,num_xmax
-    write(IMAIN,*) '    ymin,ymax: ',num_ymin,num_ymax
-    write(IMAIN,*) '    bottom,top: ',num_bottom,num_top
-  endif
-  call sync_all()
-
-! MPI interfaces between different partitions
-  ! format: #number_of_MPI_interfaces  #maximum_number_of_elements_on_each_interface
-  read(IIN,*) num_interfaces_ext_mesh, max_interface_size_ext_mesh
-
-  ! allocates interfaces
-  allocate(my_neighbours_ext_mesh(num_interfaces_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array my_neighbours_ext_mesh'
-  allocate(my_nelmnts_neighbours_ext_mesh(num_interfaces_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array my_nelmnts_neighbours_ext_mesh'
-  allocate(my_interfaces_ext_mesh(6,max_interface_size_ext_mesh,num_interfaces_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array my_interfaces_ext_mesh'
-  allocate(ibool_interfaces_ext_mesh(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array ibool_interfaces_ext_mesh'
-  allocate(nibool_interfaces_ext_mesh(num_interfaces_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array nibool_interfaces_ext_mesh'
-
-  ! loops over MPI interfaces with other partitions
-  do num_interface = 1, num_interfaces_ext_mesh
-    ! format: #process_interface_id  #number_of_elements_on_interface
-    ! where
-    !     process_interface_id = rank of (neighbor) process to share MPI interface with
-    !     number_of_elements_on_interface = number of interface elements
-    read(IIN,*) my_neighbours_ext_mesh(num_interface), my_nelmnts_neighbours_ext_mesh(num_interface)
-
-    ! loops over interface elements
-    do ie = 1, my_nelmnts_neighbours_ext_mesh(num_interface)
-      ! format: #(1)spectral_element_id  #(2)interface_type  #(3)node_id1  #(4)node_id2 #(5)...
-      !
-      ! interface types:
-      !     1  -  corner point only
-      !     2  -  element edge
-      !     4  -  element face
-      read(IIN,*) my_interfaces_ext_mesh(1,ie,num_interface), my_interfaces_ext_mesh(2,ie,num_interface), &
-                  my_interfaces_ext_mesh(3,ie,num_interface), my_interfaces_ext_mesh(4,ie,num_interface), &
-                  my_interfaces_ext_mesh(5,ie,num_interface), my_interfaces_ext_mesh(6,ie,num_interface)
-    enddo
-  enddo
-
-  call sum_all_i(num_interfaces_ext_mesh,num)
-  if(myrank == 0) then
-    write(IMAIN,*) '  number of MPI partition interfaces: ',num
-  endif
-  call sync_all()
-
-  ! optional moho
-  if( SAVE_MOHO_MESH ) then
-    ! checks if additional line exists
-    read(IIN,'(a128)',iostat=ier) line
-    if( ier /= 0 ) then
-      ! no moho informations given
-      nspec2D_moho_ext = 0
-      boundary_number = 7
-    else
-      ! tries to read in number of moho elements
-      read(line,*,iostat=ier) boundary_number ,nspec2D_moho_ext
-      if( ier /= 0 ) call exit_mpi(myrank,'error reading moho mesh in database')
-    endif
-    if(boundary_number /= 7) stop "Error : invalid database file"
-
-    ! checks total number of elements
-    call sum_all_i(nspec2D_moho_ext,num_moho)
-    if( num_moho == 0 ) call exit_mpi(myrank,'error no moho mesh in database')
-
-    ! reads in element informations
-    allocate(ibelm_moho(nspec2D_moho_ext),nodes_ibelm_moho(4,nspec2D_moho_ext),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array ibelm_moho etc.'
-    do ispec2D = 1,nspec2D_moho_ext
-      ! format: #element_id #node_id1 #node_id2 #node_id3 #node_id4
-      read(IIN,*) ibelm_moho(ispec2D),(nodes_ibelm_moho(j,ispec2D),j=1,4)
-    end do
-
-    ! user output
-    if(myrank == 0) then
-      write(IMAIN,*) '  moho surfaces: ',num_moho
-    endif
-    call sync_all()
-  endif
-
-  close(IIN)
-
-  end subroutine gd_read_partition_files
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine gd_setup_mesh
-
-! mesh creation for static solver
-
-  use generate_databases_par
-  implicit none
-
-! assign theoretical number of elements
-  nspec = NSPEC_AB
-
-! compute maximum number of points
-  npointot = nspec * NGLLCUBE
-
-! use dynamic allocation to allocate memory for arrays
-!  allocate(idoubling(nspec))
-  allocate(ibool(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array ibool'
-  allocate(xstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array xstore'
-  allocate(ystore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array ystore'
-  allocate(zstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
-  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-  call memory_eval_mesher(myrank,nspec,npointot,nnodes_ext_mesh, &
-                        nelmnts_ext_mesh,nmat_ext_mesh,num_interfaces_ext_mesh, &
-                        max_interface_size_ext_mesh,nspec2D_xmin,nspec2D_xmax, &
-                        nspec2D_ymin,nspec2D_ymax,nspec2D_bottom,nspec2D_top, &
-                        max_static_memory_size_request)
-
-  max_static_memory_size = max_static_memory_size_request
-
-! make sure everybody is synchronized
-  call sync_all()
-
-! main working routine to create all the regions of the mesh
-  if(myrank == 0) then
-    write(IMAIN,*) 'create regions: '
-  endif
-  call create_regions_mesh_ext(ibool, &
-                        xstore, ystore, zstore, nspec, npointot, myrank, LOCAL_PATH, &
-                        nnodes_ext_mesh, nelmnts_ext_mesh, &
-                        nodes_coords_ext_mesh, elmnts_ext_mesh, &
-                        max_static_memory_size, mat_ext_mesh, materials_ext_mesh, &
-                        nmat_ext_mesh, undef_mat_prop, nundefMat_ext_mesh, &
-                        num_interfaces_ext_mesh, max_interface_size_ext_mesh, &
-                        my_neighbours_ext_mesh, my_nelmnts_neighbours_ext_mesh, &
-                        my_interfaces_ext_mesh, &
-                        ibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh, &
-                        nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, &
-                        NSPEC2D_BOTTOM, NSPEC2D_TOP,&
-                        ibelm_xmin, ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top, &
-                        nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax, &
-                        nodes_ibelm_bottom,nodes_ibelm_top, &
-                        SAVE_MESH_FILES,nglob, &
-                        ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
-                        ATTENUATION,USE_OLSEN_ATTENUATION, &
-                        UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
-                        ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
-                        itopo_bathy)
-
-! Moho boundary parameters, 2-D jacobians and normals
-  if( SAVE_MOHO_MESH ) then
-    call create_regions_mesh_save_moho(myrank,nglob,nspec, &
-                        nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
-                        nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
-  endif
-
-! defines global number of nodes in model
-  NGLOB_AB = nglob
-
-! print min and max of topography included
-  min_elevation = HUGEVAL
-  max_elevation = -HUGEVAL
-  do iface = 1,nspec2D_top_ext
-     do icorner = 1,NGNOD2D
-        inode = nodes_ibelm_top(icorner,iface)
-        if (nodes_coords_ext_mesh(3,inode) < min_elevation) then
-           min_elevation = nodes_coords_ext_mesh(3,inode)
-        end if
-        if (nodes_coords_ext_mesh(3,inode) > max_elevation) then
-           max_elevation = nodes_coords_ext_mesh(3,inode)
-        end if
-     end do
-  end do
-
-! compute the maximum of the maxima for all the slices using an MPI reduction
-  call min_all_dp(min_elevation,min_elevation_all)
-  call max_all_dp(max_elevation,max_elevation_all)
-
-  if(myrank == 0) then
-     write(IMAIN,*)
-     write(IMAIN,*) 'min and max of topography included in mesh in m is ',min_elevation_all,' ',max_elevation_all
-     write(IMAIN,*)
-  endif
-
-! clean-up
-  deallocate(xstore,ystore,zstore)
-
-! make sure everybody is synchronized
-  call sync_all()
-
-  end subroutine gd_setup_mesh
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine gd_finalize
-
-! checks user input parameters
-
-  use generate_databases_par
-  implicit none
-
-  integer :: i
-
-! print number of points and elements in the mesh
-  call sum_all_i(NGLOB_AB,nglob_total)
-  call sum_all_i(NSPEC_AB,nspec_total)
-  call sync_all()
-  if(myrank == 0) then
-    write(IMAIN,*)
-    write(IMAIN,*) 'Repartition of elements:'
-    write(IMAIN,*) '-----------------------'
-    write(IMAIN,*)
-    write(IMAIN,*) 'total number of elements in each slice: ',NSPEC_AB
-    write(IMAIN,*) 'total number of points in each slice: ',NGLOB_AB
-    write(IMAIN,*)
-    write(IMAIN,*) 'total number of elements in entire mesh: ',nspec_total     ! NSPEC_AB*NPROC
-    write(IMAIN,*) 'total number of points in entire mesh: ',nglob_total        !NGLOB_AB*NPROC
-    write(IMAIN,*) 'total number of DOFs in entire mesh: ',nglob_total*NDIM   !NGLOB_AB*NPROC*NDIM
-    write(IMAIN,*)
-    write(IMAIN,*) 'total number of time steps in the solver will be: ',NSTEP
-    write(IMAIN,*)
-    ! write information about precision used for floating-point operations
-    if(CUSTOM_REAL == SIZE_REAL) then
-      write(IMAIN,*) 'using single precision for the calculations'
-    else
-      write(IMAIN,*) 'using double precision for the calculations'
-    endif
-    write(IMAIN,*)
-    write(IMAIN,*) 'smallest and largest possible floating-point numbers are: ',tiny(1._CUSTOM_REAL),huge(1._CUSTOM_REAL)
-    write(IMAIN,*)
-  endif
-
-! gets number of surface elements (for movie outputs)
-  allocate( ispec_is_surface_external_mesh(NSPEC_AB), &
-           iglob_is_surface_external_mesh(NGLOB_AB),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array'
-  max_nibool_interfaces_ext_mesh = maxval(nibool_interfaces_ext_mesh)
-  allocate(ibool_interfaces_ext_mesh_dummy(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array'
-  do i = 1, num_interfaces_ext_mesh
-     ibool_interfaces_ext_mesh_dummy(:,:) = ibool_interfaces_ext_mesh(1:max_nibool_interfaces_ext_mesh,:)
-  enddo
-  call sync_all()
-  call detect_surface(NPROC,NGLOB_AB,NSPEC_AB,ibool, &
-                        ispec_is_surface_external_mesh, &
-                        iglob_is_surface_external_mesh, &
-                        nfaces_surface_ext_mesh, &
-                        num_interfaces_ext_mesh, &
-                        max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh, &
-                        my_neighbours_ext_mesh, &
-                        ibool_interfaces_ext_mesh_dummy )
-
-  deallocate(ibool)
-  deallocate(ispec_is_surface_external_mesh)
-  deallocate(iglob_is_surface_external_mesh)
-  deallocate(ibool_interfaces_ext_mesh_dummy)
-
-  ! takes number of faces for top, free surface only
-  if( MOVIE_SURFACE .or. CREATE_SHAKEMAP ) then
-    nfaces_surface_ext_mesh = NSPEC2D_TOP
-  endif
-
-! number of surface faces for all partitions together
-  call sum_all_i(nfaces_surface_ext_mesh,nfaces_surface_glob_ext_mesh)
-
-
-! copy number of elements and points in an include file for the solver
-  if( myrank == 0 ) then
-    call save_header_file(NSPEC_AB,NGLOB_AB,NPROC, &
-               ATTENUATION,ANISOTROPY,NSTEP,DT, &
-               SIMULATION_TYPE,max_static_memory_size,nfaces_surface_glob_ext_mesh)
-  endif
-
-! elapsed time since beginning of mesh generation
-  if(myrank == 0) then
-    tCPU = wtime() - time_start
-    write(IMAIN,*)
-    write(IMAIN,*) 'Elapsed time for mesh generation and buffer creation in seconds = ',tCPU
-    write(IMAIN,*) 'End of mesh generation'
-    write(IMAIN,*)
-  endif
-
-! close main output file
-  if(myrank == 0) then
-    write(IMAIN,*) 'done'
-    write(IMAIN,*)
-    close(IMAIN)
-  endif
-
-! synchronize all the processes to make sure everybody has finished
-  call sync_all()
-
-  end subroutine gd_finalize
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+!
+! United States and French Government Sponsorship Acknowledged.
+!
+!=============================================================================!
+!                                                                             !
+!  generate_databases produces a spectral element grid                        !
+!  for a local or regional model.                                             !
+!  The mesher uses the UTM projection                                         !
+!                                                                             !
+!=============================================================================!
+!
+! If you use this code for your own research, please cite at least one article
+! written by the developers of the package, for instance:
+!
+! @ARTICLE{TrKoLi08,
+! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
+! title = {Spectral-Element and Adjoint Methods in Seismology},
+! journal = {Communications in Computational Physics},
+! year = {2008},
+! volume = {3},
+! pages = {1-32},
+! number = {1}}
+!
+! or
+!
+! @ARTICLE{LiPoKoTr04,
+! author = {Qinya Liu and Jascha Polet and Dimitri Komatitsch and Jeroen Tromp},
+! title = {Spectral-element moment tensor inversions for earthquakes in {S}outhern {C}alifornia},
+! journal={Bull. Seismol. Soc. Am.},
+! year = {2004},
+! volume = {94},
+! pages = {1748-1761},
+! number = {5},
+! doi = {10.1785/012004038}}
+!
+! @INCOLLECTION{ChKoViCaVaFe07,
+! author = {Emmanuel Chaljub and Dimitri Komatitsch and Jean-Pierre Vilotte and
+! Yann Capdeville and Bernard Valette and Gaetano Festa},
+! title = {Spectral Element Analysis in Seismology},
+! booktitle = {Advances in Wave Propagation in Heterogeneous Media},
+! publisher = {Elsevier - Academic Press},
+! year = {2007},
+! editor = {Ru-Shan Wu and Val\'erie Maupin},
+! volume = {48},
+! series = {Advances in Geophysics},
+! pages = {365-419}}
+!
+! @ARTICLE{KoVi98,
+! author={D. Komatitsch and J. P. Vilotte},
+! title={The spectral-element method: an efficient tool to simulate the seismic response of 2{D} and 3{D} geological structures},
+! journal={Bull. Seismol. Soc. Am.},
+! year=1998,
+! volume=88,
+! number=2,
+! pages={368-392}}
+!
+! @ARTICLE{KoTr99,
+! author={D. Komatitsch and J. Tromp},
+! year=1999,
+! title={Introduction to the spectral-element method for 3-{D} seismic wave propagation},
+! journal={Geophys. J. Int.},
+! volume=139,
+! number=3,
+! pages={806-822},
+! doi={10.1046/j.1365-246x.1999.00967.x}}
+!
+! @ARTICLE{KoLiTrSuStSh04,
+! author={Dimitri Komatitsch and Qinya Liu and Jeroen Tromp and Peter S\"{u}ss
+!   and Christiane Stidham and John H. Shaw},
+! year=2004,
+! title={Simulations of Ground Motion in the {L}os {A}ngeles {B}asin
+!   based upon the Spectral-Element Method},
+! journal={Bull. Seism. Soc. Am.},
+! volume=94,
+! number=1,
+! pages={187-206}}
+!
+! and/or another article from http://web.univ-pau.fr/~dkomati1/publications.html
+!
+!
+! If you use the kernel capabilities of the code, please cite at least one article
+! written by the developers of the package, for instance:
+!
+! @ARTICLE{TrKoLi08,
+! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
+! title = {Spectral-Element and Adjoint Methods in Seismology},
+! journal = {Communications in Computational Physics},
+! year = {2008},
+! volume = {3},
+! pages = {1-32},
+! number = {1}}
+!
+! or
+!
+! @ARTICLE{LiTr06,
+! author={Qinya Liu and Jeroen Tromp},
+! title={Finite-frequency kernels based on adjoint methods},
+! journal={Bull. Seismol. Soc. Am.},
+! year=2006,
+! volume=96,
+! number=6,
+! pages={2383-2397},
+! doi={10.1785/0120060041}}
+!
+!
+! Reference frame - convention:
+! ----------------------------
+!
+! The code uses the following convention for the reference frame:
+!
+!  - X axis is East
+!  - Y axis is North
+!  - Z axis is up
+!
+! Note that this convention is different from both the Aki-Richards convention
+! and the Harvard CMT convention.
+!
+! Let us recall that the Aki-Richards convention is:
+!
+!  - X axis is North
+!  - Y axis is East
+!  - Z axis is down
+!
+! and that the Harvard CMT convention is:
+!
+!  - X axis is South
+!  - Y axis is East
+!  - Z axis is up
+!
+! To report bugs or suggest improvements to the code, please send an email
+! to Jeroen Tromp <jtromp AT princeton.edu> and/or use our online
+! bug tracking system at http://www.geodynamics.org/roundup .
+!
+! Evolution of the code:
+! ---------------------
+!
+! MPI v. 2.0 "Sesame" (Spectral ElementS on Any MEsh), November 2010:
+! Dimitri Komatitsch, Nicolas Le Goff, Roland Martin and Pieyre Le Loher, University of Pau, France,
+! Jeroen Tromp and the Princeton group of developers, Princeton University, USA,
+! and Emanuele Casarotti, INGV Roma, Italy:
+!  support for CUBIT meshes decomposed by SCOTCH, METIS or ZOLTAN;
+!  much faster solver using Michel Deville's inlined matrix products.
+!
+! MPI v. 1.4 Dimitri Komatitsch, University of Pau, Qinya Liu and others, Caltech, September 2006:
+!  better adjoint and kernel calculations, faster and better I/Os
+!  on very large systems, many small improvements and bug fixes
+!
+! MPI v. 1.3 Dimitri Komatitsch, University of Pau, and Qinya Liu, Caltech, July 2005:
+!  serial version, regular mesh, adjoint and kernel calculations, ParaView support
+!
+! MPI v. 1.2 Min Chen and Dimitri Komatitsch, Caltech, July 2004:
+!  full anisotropy, volume movie
+!
+! MPI v. 1.1 Dimitri Komatitsch, Caltech, October 2002: Zhu's Moho map, scaling
+!  of Vs with depth, Hauksson's regional model, attenuation, oceans, movies
+!
+! MPI v. 1.0 Dimitri Komatitsch, Caltech, May 2002: first MPI version based on global code
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  module generate_databases_par
+
+  implicit none
+
+  include "constants.h"
+
+! number of spectral elements in each block
+  integer nspec,npointot
+
+! local to global indexing array
+  integer, dimension(:,:,:,:), allocatable :: ibool
+
+! arrays with the mesh in double precision
+  double precision, dimension(:,:,:,:), allocatable :: xstore,ystore,zstore
+
+! proc numbers for MPI
+  integer :: myrank,sizeprocs,ier
+
+! use integer array to store topography values
+  integer :: UTM_PROJECTION_ZONE
+  logical :: SUPPRESS_UTM_PROJECTION
+  integer :: NX_TOPO,NY_TOPO
+  double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
+  character(len=100) :: topo_file
+  integer, dimension(:,:), allocatable :: itopo_bathy
+
+! timer MPI
+  double precision, external :: wtime
+  double precision :: time_start,tCPU
+
+! parameters read from parameter file
+  integer :: NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,SIMULATION_TYPE
+  integer :: NSOURCES
+
+  double precision :: DT,HDUR_MOVIE
+
+  logical :: ATTENUATION,USE_OLSEN_ATTENUATION, &
+          OCEANS, TOPOGRAPHY, SAVE_FORWARD
+  logical :: ANISOTROPY,ABSORBING_CONDITIONS,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
+
+  logical :: MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+          USE_HIGHRES_FOR_MOVIES
+  integer :: NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO,NTSTEP_BETWEEN_READ_ADJSRC
+
+  character(len=256) OUTPUT_FILES,LOCAL_PATH
+
+! parameters deduced from parameters read from file
+  integer :: NPROC
+
+! static memory size that will be needed by the solver
+  double precision :: max_static_memory_size,max_static_memory_size_request
+
+! this for all the regions
+  integer NSPEC_AB,NGLOB_AB
+
+  integer NSPEC2D_BOTTOM,NSPEC2D_TOP
+
+  double precision min_elevation,max_elevation
+  double precision min_elevation_all,max_elevation_all
+
+! for Databases of external meshes
+  double precision, dimension(:,:), allocatable :: nodes_coords_ext_mesh
+
+  integer :: dummy_node
+  integer :: dummy_elmnt
+
+  integer :: ispec, inode, num_interface,ie,imat,iface,icorner
+  integer :: nnodes_ext_mesh, nelmnts_ext_mesh
+  integer  :: num_interfaces_ext_mesh
+  integer  :: max_interface_size_ext_mesh
+  integer  :: nmat_ext_mesh, nundefMat_ext_mesh
+  integer, dimension(:), allocatable  :: my_neighbours_ext_mesh
+  integer, dimension(:), allocatable  :: my_nelmnts_neighbours_ext_mesh
+
+  integer, dimension(:,:,:), allocatable  :: my_interfaces_ext_mesh
+  integer, dimension(:,:), allocatable  :: ibool_interfaces_ext_mesh
+  integer, dimension(:), allocatable  :: nibool_interfaces_ext_mesh
+
+  integer, dimension(:,:), allocatable :: elmnts_ext_mesh
+  integer, dimension(:,:), allocatable :: mat_ext_mesh
+  integer :: max_nibool_interfaces_ext_mesh
+  integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh_dummy
+
+  character(len=256) prname
+
+! boundaries and materials
+  double precision, dimension(:,:), allocatable :: materials_ext_mesh
+
+  integer  :: ispec2D, boundary_number
+  integer  :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, nspec2D_bottom_ext, nspec2D_top_ext
+
+  integer, dimension(:), allocatable  :: ibelm_xmin,ibelm_xmax, &
+              ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top
+  integer, dimension(:,:), allocatable  :: nodes_ibelm_xmin,nodes_ibelm_xmax, &
+              nodes_ibelm_ymin, nodes_ibelm_ymax, nodes_ibelm_bottom, nodes_ibelm_top
+
+  character (len=30), dimension(:,:), allocatable :: undef_mat_prop
+
+! moho (optional)
+  integer :: nspec2D_moho_ext
+  integer, dimension(:), allocatable  :: ibelm_moho
+  integer, dimension(:,:), allocatable  :: nodes_ibelm_moho
+
+  integer :: nglob,nglob_total,nspec_total
+
+  logical,dimension(:),allocatable :: ispec_is_surface_external_mesh,iglob_is_surface_external_mesh
+  integer :: nfaces_surface_ext_mesh,nfaces_surface_glob_ext_mesh
+
+! flag for noise simulation
+  integer :: NOISE_TOMOGRAPHY
+
+  end module generate_databases_par
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine generate_databases
+
+  use generate_databases_par
+  implicit none
+
+! sizeprocs returns number of processes started (should be equal to NPROC).
+! myrank is the rank of each process, between 0 and NPROC-1.
+! as usual in MPI, process 0 is in charge of coordinating everything
+! and also takes care of the main output
+  call world_size(sizeprocs)
+  call world_rank(myrank)
+
+! get the base pathname for output files
+  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
+
+! open main output file, only written to by process 0
+  if(myrank == 0 .and. IMAIN /= ISTANDARD_OUTPUT) &
+    open(unit=IMAIN,file=trim(OUTPUT_FILES)//'/output_mesher.txt',status='unknown')
+
+! get MPI starting time
+  time_start = wtime()
+
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) '******************************************'
+    write(IMAIN,*) '*** Specfem3D MPI Mesher - f90 version ***'
+    write(IMAIN,*) '******************************************'
+    write(IMAIN,*)
+  endif
+
+! read the parameter file
+  call gd_read_parameters()
+
+! makes sure processes are synchronized
+  call sync_all()
+
+! reads topography and bathymetry file
+  call gd_read_topography()
+
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) '**************************'
+    write(IMAIN,*) 'creating mesh in the model'
+    write(IMAIN,*) '**************************'
+    write(IMAIN,*)
+  endif
+
+! reads Databases files
+  call gd_read_partition_files()
+
+! external mesh creation
+  call gd_setup_mesh()
+
+! finalize mesher
+  call gd_finalize()
+
+  end subroutine generate_databases
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine gd_read_parameters
+
+! reads and checks user input parameters
+
+  use generate_databases_par
+  implicit none
+
+! reads Par_file
+  call read_parameter_file( NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
+                        UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+                        ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
+                        OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
+                        MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+                        NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
+                        SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION, &
+                        NTSTEP_BETWEEN_OUTPUT_INFO,SIMULATION_TYPE,SAVE_FORWARD, &
+                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
+
+! check that the code is running with the requested nb of processes
+  if(sizeprocs /= NPROC) then
+    if( myrank == 0 ) then
+      write(IMAIN,*) 'error: number of processors supposed to run on: ',NPROC
+      write(IMAIN,*) 'error: number of processors actually run on: ',sizeprocs
+    endif
+    call exit_MPI(myrank,'wrong number of MPI processes')
+  endif
+
+! there would be a problem with absorbing boundaries for different NGLLX,NGLLY,NGLLZ values
+! just to be sure for now..
+  if( ABSORBING_CONDITIONS ) then
+    if( NGLLX /= NGLLY .and. NGLLY /= NGLLZ ) &
+      call exit_MPI(myrank,'must have NGLLX = NGLLY = NGLLZ for external meshes')
+  endif
+
+! info about external mesh simulation
+  if(myrank == 0) then
+    write(IMAIN,*) 'This is process ',myrank
+    write(IMAIN,*) 'There are ',sizeprocs,' MPI processes'
+    write(IMAIN,*) 'Processes are numbered from 0 to ',sizeprocs-1
+    write(IMAIN,*)
+    write(IMAIN,*) 'There is a total of ',NPROC,' slices'
+    write(IMAIN,*)
+    write(IMAIN,*) 'NGLLX = ',NGLLX
+    write(IMAIN,*) 'NGLLY = ',NGLLY
+    write(IMAIN,*) 'NGLLZ = ',NGLLZ
+
+    write(IMAIN,*)
+    write(IMAIN,*) 'Shape functions defined by NGNOD = ',NGNOD,' control nodes'
+    write(IMAIN,*) 'Surface shape functions defined by NGNOD2D = ',NGNOD2D,' control nodes'
+    write(IMAIN,*)
+  endif
+
+! check that reals are either 4 or 8 bytes
+  if(CUSTOM_REAL /= SIZE_REAL .and. CUSTOM_REAL /= SIZE_DOUBLE) &
+    call exit_MPI(myrank,'wrong size of CUSTOM_REAL for reals')
+
+  if(NGNOD /= 8) call exit_MPI(myrank,'number of control nodes must be 8')
+  if(NGNOD2D /= 4) call exit_MPI(myrank,'elements with 8 points should have NGNOD2D = 4')
+
+! for the number of standard linear solids for attenuation
+  if(N_SLS /= 3) call exit_MPI(myrank,'number of SLS must be 3')
+
+  ! exclusive movie flags
+  if( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP ) then
+    MOVIE_SURFACE = .false.
+    CREATE_SHAKEMAP = .false.
+  endif
+
+  ! for noise simulations, we need to save movies at the surface (where the noise is generated)
+  ! and thus we force MOVIE_SURFACE to be .true., in order to use variables defined for surface movies later
+  if ( NOISE_TOMOGRAPHY /= 0 ) then
+    MOVIE_SURFACE = .true.
+    CREATE_SHAKEMAP = .false.
+    if( ( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP ) .and. myrank == 0 ) then
+        write(IMAIN,*) 'error: when running noise simulations ( NOISE_TOMOGRAPHY /= 0 ),'
+        write(IMAIN,*) '       we can NOT use EXTERNAL_MESH_MOVIE_SURFACE or EXTERNAL_MESH_CREATE_SHAKEMAP'
+        write(IMAIN,*) '       change EXTERNAL_MESH_MOVIE_SURFACE & EXTERNAL_MESH_CREATE_SHAKEMAP in constant.h'
+        call exit_MPI(myrank,'incompatible NOISE_TOMOGRAPHY, EXTERNAL_MESH_MOVIE_SURFACE, EXTERNAL_MESH_CREATE_SHAKEMAP')
+    endif
+  endif
+
+
+  if(myrank == 0) then
+! chris: I am not sure if we should suppress the following. topography should appear in the external mesh
+! leave it for now
+
+    write(IMAIN,*)
+    if(SUPPRESS_UTM_PROJECTION) then
+      write(IMAIN,*) 'suppressing UTM projection'
+    else
+      write(IMAIN,*) 'using UTM projection in region ',UTM_PROJECTION_ZONE
+    endif
+
+    write(IMAIN,*)
+    if(ATTENUATION) then
+      write(IMAIN,*) 'incorporating attenuation using ',N_SLS,' standard linear solids'
+      if(USE_OLSEN_ATTENUATION) then
+        write(IMAIN,*) 'using Olsen''s attenuation'
+      else
+        write(IMAIN,*) 'not using Olsen''s attenuation'
+      endif
+    else
+      write(IMAIN,*) 'no attenuation'
+    endif
+
+    write(IMAIN,*)
+    if(ANISOTROPY) then
+      write(IMAIN,*) 'incorporating anisotropy'
+    else
+      write(IMAIN,*) 'no anisotropy'
+    endif
+
+    write(IMAIN,*)
+    if(OCEANS) then
+      write(IMAIN,*) 'incorporating the oceans using equivalent load'
+      if( TOPOGRAPHY ) write(IMAIN,*) ' with elevation from topography file'
+    else
+      write(IMAIN,*) 'no oceans'
+    endif
+
+    write(IMAIN,*)
+
+  endif
+
+  end subroutine gd_read_parameters
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine gd_read_topography
+
+! reads in topography files
+
+  use generate_databases_par
+  implicit none
+
+  if( OCEANS .and. TOPOGRAPHY ) then
+
+    ! for Southern California
+    NX_TOPO = NX_TOPO_SOCAL
+    NY_TOPO = NY_TOPO_SOCAL
+    ORIG_LAT_TOPO = ORIG_LAT_TOPO_SOCAL
+    ORIG_LONG_TOPO = ORIG_LONG_TOPO_SOCAL
+    DEGREES_PER_CELL_TOPO = DEGREES_PER_CELL_TOPO_SOCAL
+    topo_file = TOPO_FILE_SOCAL
+
+    allocate(itopo_bathy(NX_TOPO,NY_TOPO),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array itopo_bathy'
+
+    call read_topo_bathy_file(itopo_bathy,NX_TOPO,NY_TOPO,topo_file)
+
+    if(myrank == 0) then
+      write(IMAIN,*)
+      write(IMAIN,*) 'regional topography file read ranges in m from ',minval(itopo_bathy),' to ',maxval(itopo_bathy)
+      write(IMAIN,*)
+    endif
+  else
+    NX_TOPO = 1
+    NY_TOPO = 1
+    allocate(itopo_bathy(NX_TOPO,NY_TOPO),stat=ier)
+    if( ier /= 0 ) stop 'error allocating dummy array itopo_bathy'
+
+  endif
+
+!! read basement map
+!  if(BASEMENT_MAP) then
+!    call get_value_string(BASEMENT_MAP_FILE,'model.BASEMENT_MAP_FILE', &
+!                                   '../in_data_files/la_basement/reggridbase2_filtered_ascii.dat')
+!    open(unit=55,file=BASEMENT_MAP_FILE,status='old',action='read')
+!    do ix=1,NX_BASEMENT
+!      do iy=1,NY_BASEMENT
+!        read(55,*) iz_basement
+!        z_basement(ix,iy) = dble(iz_basement)
+!      enddo
+!    enddo
+!    close(55)
+!  endif
+
+  end subroutine gd_read_topography
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine gd_read_partition_files
+
+! reads in proc***_Databases files
+
+  use generate_databases_par
+  implicit none
+
+  integer :: num_xmin,num_xmax,num_ymin,num_ymax,num_top,num_bottom,num
+  integer :: num_moho
+  integer :: j
+  character(len=128) :: line
+
+! read databases about external mesh simulation
+! global node coordinates
+  call create_name_database(prname,myrank,LOCAL_PATH)
+  !open(unit=IIN,file=prname(1:len_trim(prname))//'Database', &
+  !      status='old',action='read',form='formatted',iostat=ier)
+  open(unit=IIN,file=prname(1:len_trim(prname))//'Database', &
+        status='old',action='read',form='unformatted',iostat=ier)
+  if( ier /= 0 ) then
+    write(IMAIN,*) 'error opening file: ',prname(1:len_trim(prname))//'Database'
+    write(IMAIN,*) 'make sure file exists'
+    call exit_mpi(myrank,'error opening database file')
+  endif
+  !read(IIN,*) nnodes_ext_mesh
+  read(IIN) nnodes_ext_mesh
+
+  allocate(nodes_coords_ext_mesh(NDIM,nnodes_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array nodes_coords_ext_mesh'
+
+  do inode = 1, nnodes_ext_mesh
+     !read(IIN,*) dummy_node, nodes_coords_ext_mesh(1,inode), nodes_coords_ext_mesh(2,inode), &
+     !           nodes_coords_ext_mesh(3,inode)
+
+     read(IIN) dummy_node, nodes_coords_ext_mesh(1,inode), nodes_coords_ext_mesh(2,inode), &
+                nodes_coords_ext_mesh(3,inode)
+
+  enddo
+
+  call sum_all_i(nnodes_ext_mesh,num)
+  if(myrank == 0) then
+    write(IMAIN,*) '  external mesh points: ',num
+  endif
+  call sync_all()
+
+! read materials' physical properties
+  !read(IIN,*) nmat_ext_mesh, nundefMat_ext_mesh
+  read(IIN) nmat_ext_mesh, nundefMat_ext_mesh
+
+  allocate(materials_ext_mesh(6,nmat_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array materials_ext_mesh'
+  do imat = 1, nmat_ext_mesh
+     ! format:        #(1) rho   #(2) vp  #(3) vs  #(4) Q_flag  #(5) anisotropy_flag  #(6) material_domain_id
+     !read(IIN,*) materials_ext_mesh(1,imat),  materials_ext_mesh(2,imat),  materials_ext_mesh(3,imat), &
+     !     materials_ext_mesh(4,imat),  materials_ext_mesh(5,imat), materials_ext_mesh(6,imat)
+
+     read(IIN) materials_ext_mesh(1,imat),  materials_ext_mesh(2,imat),  materials_ext_mesh(3,imat), &
+          materials_ext_mesh(4,imat),  materials_ext_mesh(5,imat), materials_ext_mesh(6,imat)
+
+     ! output
+     !print*,'materials:',materials_ext_mesh(1,imat),  materials_ext_mesh(2,imat),  materials_ext_mesh(3,imat), &
+     !     materials_ext_mesh(4,imat),  materials_ext_mesh(5,imat), materials_ext_mesh(6,imat)
+  end do
+
+  if(myrank == 0) then
+    write(IMAIN,*) '  defined materials: ',nmat_ext_mesh
+  endif
+  call sync_all()
+
+  allocate(undef_mat_prop(6,nundefMat_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array undef_mat_prop'
+  do imat = 1, nundefMat_ext_mesh
+     ! format example tomography:
+     ! -1 tomography elastic tomography_model.xyz 1 2
+     ! format example interface:
+     ! -1 interface 14 15 1 2
+     !read(IIN,*) undef_mat_prop(1,imat),undef_mat_prop(2,imat),undef_mat_prop(3,imat),undef_mat_prop(4,imat), &
+     !     undef_mat_prop(5,imat), undef_mat_prop(6,imat)
+
+     read(IIN) undef_mat_prop(1,imat),undef_mat_prop(2,imat),undef_mat_prop(3,imat),undef_mat_prop(4,imat), &
+          undef_mat_prop(5,imat), undef_mat_prop(6,imat)
+
+     ! output debug
+     !print*,'undefined materials:'
+     !print*,undef_mat_prop(:,imat)
+  end do
+
+  if(myrank == 0) then
+    write(IMAIN,*) '  undefined materials: ',nundefMat_ext_mesh
+  endif
+  call sync_all()
+
+! element indexing
+  !read(IIN,*) nelmnts_ext_mesh
+  read(IIN) nelmnts_ext_mesh
+  allocate(elmnts_ext_mesh(esize,nelmnts_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array elmnts_ext_mesh'
+  allocate(mat_ext_mesh(2,nelmnts_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array mat_ext_mesh'
+
+  ! reads in material association for each spectral element and corner node indices
+  do ispec = 1, nelmnts_ext_mesh
+     ! format:
+     ! # ispec_local # material_index_1 # material_index_2 # corner_id1 # corner_id2 # ... # corner_id8
+     !read(IIN,*) dummy_elmnt, mat_ext_mesh(1,ispec),mat_ext_mesh(2,ispec), &
+     !     elmnts_ext_mesh(1,ispec), elmnts_ext_mesh(2,ispec), elmnts_ext_mesh(3,ispec), elmnts_ext_mesh(4,ispec), &
+     !     elmnts_ext_mesh(5,ispec), elmnts_ext_mesh(6,ispec), elmnts_ext_mesh(7,ispec), elmnts_ext_mesh(8,ispec)
+
+     read(IIN) dummy_elmnt, mat_ext_mesh(1,ispec),mat_ext_mesh(2,ispec), &
+          elmnts_ext_mesh(1,ispec), elmnts_ext_mesh(2,ispec), elmnts_ext_mesh(3,ispec), elmnts_ext_mesh(4,ispec), &
+          elmnts_ext_mesh(5,ispec), elmnts_ext_mesh(6,ispec), elmnts_ext_mesh(7,ispec), elmnts_ext_mesh(8,ispec)
+
+     ! check debug
+     if( dummy_elmnt /= ispec) stop "error ispec order in materials file"
+
+  enddo
+  NSPEC_AB = nelmnts_ext_mesh
+
+  call sum_all_i(nspec_ab,num)
+  if(myrank == 0) then
+    write(IMAIN,*) '  spectral elements: ',num
+  endif
+  call sync_all()
+
+
+! read boundaries
+  !read(IIN,*) boundary_number ,nspec2D_xmin
+  read(IIN) boundary_number ,nspec2D_xmin
+  if(boundary_number /= 1) stop "Error : invalid database file"
+
+  !read(IIN,*) boundary_number ,nspec2D_xmax
+  read(IIN) boundary_number ,nspec2D_xmax
+  if(boundary_number /= 2) stop "Error : invalid database file"
+
+  !read(IIN,*) boundary_number ,nspec2D_ymin
+  read(IIN) boundary_number ,nspec2D_ymin
+  if(boundary_number /= 3) stop "Error : invalid database file"
+
+  !read(IIN,*) boundary_number ,nspec2D_ymax
+  read(IIN) boundary_number ,nspec2D_ymax
+  if(boundary_number /= 4) stop "Error : invalid database file"
+
+  !read(IIN,*) boundary_number ,nspec2D_bottom_ext
+  read(IIN) boundary_number ,nspec2D_bottom_ext
+  if(boundary_number /= 5) stop "Error : invalid database file"
+
+  !read(IIN,*) boundary_number ,nspec2D_top_ext
+  read(IIN) boundary_number ,nspec2D_top_ext
+  if(boundary_number /= 6) stop "Error : invalid database file"
+
+  NSPEC2D_BOTTOM = nspec2D_bottom_ext
+  NSPEC2D_TOP = nspec2D_top_ext
+
+  allocate(ibelm_xmin(nspec2D_xmin),nodes_ibelm_xmin(4,nspec2D_xmin),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array ibelm_xmin etc.'
+  do ispec2D = 1,nspec2D_xmin
+     !read(IIN,*) ibelm_xmin(ispec2D),(nodes_ibelm_xmin(j,ispec2D),j=1,4)
+     read(IIN) ibelm_xmin(ispec2D),(nodes_ibelm_xmin(j,ispec2D),j=1,4)
+  end do
+
+  allocate(ibelm_xmax(nspec2D_xmax),nodes_ibelm_xmax(4,nspec2D_xmax),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array ibelm_xmax etc.'
+  do ispec2D = 1,nspec2D_xmax
+     !read(IIN,*) ibelm_xmax(ispec2D),(nodes_ibelm_xmax(j,ispec2D),j=1,4)
+     read(IIN) ibelm_xmax(ispec2D),(nodes_ibelm_xmax(j,ispec2D),j=1,4)
+  end do
+
+  allocate(ibelm_ymin(nspec2D_ymin),nodes_ibelm_ymin(4,nspec2D_ymin),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array ibelm_ymin'
+  do ispec2D = 1,nspec2D_ymin
+     !read(IIN,*) ibelm_ymin(ispec2D),(nodes_ibelm_ymin(j,ispec2D),j=1,4)
+     read(IIN) ibelm_ymin(ispec2D),(nodes_ibelm_ymin(j,ispec2D),j=1,4)
+  end do
+
+  allocate(ibelm_ymax(nspec2D_ymax),nodes_ibelm_ymax(4,nspec2D_ymax),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array ibelm_ymax etc.'
+  do ispec2D = 1,nspec2D_ymax
+     !read(IIN,*) ibelm_ymax(ispec2D),(nodes_ibelm_ymax(j,ispec2D),j=1,4)
+     read(IIN) ibelm_ymax(ispec2D),(nodes_ibelm_ymax(j,ispec2D),j=1,4)
+  end do
+
+  allocate(ibelm_bottom(nspec2D_bottom_ext),nodes_ibelm_bottom(4,nspec2D_bottom_ext),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array ibelm_bottom etc.'
+  do ispec2D = 1,nspec2D_bottom_ext
+     !read(IIN,*) ibelm_bottom(ispec2D),(nodes_ibelm_bottom(j,ispec2D),j=1,4)
+     read(IIN) ibelm_bottom(ispec2D),(nodes_ibelm_bottom(j,ispec2D),j=1,4)
+  end do
+
+  allocate(ibelm_top(nspec2D_top_ext),nodes_ibelm_top(4,nspec2D_top_ext),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array ibelm_top etc.'
+  do ispec2D = 1,nspec2D_top_ext
+     !read(IIN,*) ibelm_top(ispec2D),(nodes_ibelm_top(j,ispec2D),j=1,4)
+     read(IIN) ibelm_top(ispec2D),(nodes_ibelm_top(j,ispec2D),j=1,4)
+  end do
+
+  call sum_all_i(nspec2D_xmin,num_xmin)
+  call sum_all_i(nspec2D_xmax,num_xmax)
+  call sum_all_i(nspec2D_ymin,num_ymin)
+  call sum_all_i(nspec2D_ymax,num_ymax)
+  call sum_all_i(nspec2D_top_ext,num_top)
+  call sum_all_i(nspec2D_bottom_ext,num_bottom)
+
+  if(myrank == 0) then
+    write(IMAIN,*) '  absorbing boundaries: '
+    write(IMAIN,*) '    xmin,xmax: ',num_xmin,num_xmax
+    write(IMAIN,*) '    ymin,ymax: ',num_ymin,num_ymax
+    write(IMAIN,*) '    bottom,top: ',num_bottom,num_top
+  endif
+  call sync_all()
+
+! MPI interfaces between different partitions
+  if( NPROC > 1 ) then
+    ! format: #number_of_MPI_interfaces  #maximum_number_of_elements_on_each_interface
+    !read(IIN,*) num_interfaces_ext_mesh, max_interface_size_ext_mesh
+    read(IIN) num_interfaces_ext_mesh, max_interface_size_ext_mesh
+  else
+    num_interfaces_ext_mesh = 0
+    max_interface_size_ext_mesh = 0
+  endif
+
+  ! allocates interfaces
+  allocate(my_neighbours_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array my_neighbours_ext_mesh'
+  allocate(my_nelmnts_neighbours_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array my_nelmnts_neighbours_ext_mesh'
+  allocate(my_interfaces_ext_mesh(6,max_interface_size_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array my_interfaces_ext_mesh'
+  allocate(ibool_interfaces_ext_mesh(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array ibool_interfaces_ext_mesh'
+  allocate(nibool_interfaces_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array nibool_interfaces_ext_mesh'
+
+  ! loops over MPI interfaces with other partitions
+  do num_interface = 1, num_interfaces_ext_mesh
+    ! format: #process_interface_id  #number_of_elements_on_interface
+    ! where
+    !     process_interface_id = rank of (neighbor) process to share MPI interface with
+    !     number_of_elements_on_interface = number of interface elements
+    !read(IIN,*) my_neighbours_ext_mesh(num_interface), my_nelmnts_neighbours_ext_mesh(num_interface)
+    read(IIN) my_neighbours_ext_mesh(num_interface), my_nelmnts_neighbours_ext_mesh(num_interface)
+
+    ! loops over interface elements
+    do ie = 1, my_nelmnts_neighbours_ext_mesh(num_interface)
+      ! format: #(1)spectral_element_id  #(2)interface_type  #(3)node_id1  #(4)node_id2 #(5)...
+      !
+      ! interface types:
+      !     1  -  corner point only
+      !     2  -  element edge
+      !     4  -  element face
+      !read(IIN,*) my_interfaces_ext_mesh(1,ie,num_interface), my_interfaces_ext_mesh(2,ie,num_interface), &
+      !            my_interfaces_ext_mesh(3,ie,num_interface), my_interfaces_ext_mesh(4,ie,num_interface), &
+      !            my_interfaces_ext_mesh(5,ie,num_interface), my_interfaces_ext_mesh(6,ie,num_interface)
+
+      read(IIN) my_interfaces_ext_mesh(1,ie,num_interface), my_interfaces_ext_mesh(2,ie,num_interface), &
+                  my_interfaces_ext_mesh(3,ie,num_interface), my_interfaces_ext_mesh(4,ie,num_interface), &
+                  my_interfaces_ext_mesh(5,ie,num_interface), my_interfaces_ext_mesh(6,ie,num_interface)
+    enddo
+  enddo
+
+  call sum_all_i(num_interfaces_ext_mesh,num)
+  if(myrank == 0) then
+    write(IMAIN,*) '  number of MPI partition interfaces: ',num
+  endif
+  call sync_all()
+
+  ! optional moho
+  if( SAVE_MOHO_MESH ) then
+    ! checks if additional line exists
+    !read(IIN,'(a128)',iostat=ier) line
+    read(IIN,iostat=ier) boundary_number,nspec2D_moho_ext
+    if( ier /= 0 ) then
+      ! no moho informations given
+      nspec2D_moho_ext = 0
+      boundary_number = 7
+    else
+      ! tries to read in number of moho elements
+      read(line,*,iostat=ier) boundary_number ,nspec2D_moho_ext
+      if( ier /= 0 ) call exit_mpi(myrank,'error reading moho mesh in database')
+    endif
+    if(boundary_number /= 7) stop "Error : invalid database file"
+
+    ! checks total number of elements
+    call sum_all_i(nspec2D_moho_ext,num_moho)
+    if( num_moho == 0 ) call exit_mpi(myrank,'error no moho mesh in database')
+
+    ! reads in element informations
+    allocate(ibelm_moho(nspec2D_moho_ext),nodes_ibelm_moho(4,nspec2D_moho_ext),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array ibelm_moho etc.'
+    do ispec2D = 1,nspec2D_moho_ext
+      ! format: #element_id #node_id1 #node_id2 #node_id3 #node_id4
+      !read(IIN,*) ibelm_moho(ispec2D),(nodes_ibelm_moho(j,ispec2D),j=1,4)
+      read(IIN) ibelm_moho(ispec2D),(nodes_ibelm_moho(j,ispec2D),j=1,4)
+    end do
+
+    ! user output
+    if(myrank == 0) then
+      write(IMAIN,*) '  moho surfaces: ',num_moho
+    endif
+    call sync_all()
+  endif
+
+  close(IIN)
+
+  end subroutine gd_read_partition_files
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine gd_setup_mesh
+
+! mesh creation for static solver
+
+  use generate_databases_par
+  implicit none
+
+! assign theoretical number of elements
+  nspec = NSPEC_AB
+
+! compute maximum number of points
+  npointot = nspec * NGLLCUBE
+
+! use dynamic allocation to allocate memory for arrays
+!  allocate(idoubling(nspec))
+  allocate(ibool(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array ibool'
+  allocate(xstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array xstore'
+  allocate(ystore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array ystore'
+  allocate(zstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+  if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+  call memory_eval_mesher(myrank,nspec,npointot,nnodes_ext_mesh, &
+                        nelmnts_ext_mesh,nmat_ext_mesh,num_interfaces_ext_mesh, &
+                        max_interface_size_ext_mesh,nspec2D_xmin,nspec2D_xmax, &
+                        nspec2D_ymin,nspec2D_ymax,nspec2D_bottom,nspec2D_top, &
+                        max_static_memory_size_request)
+
+  max_static_memory_size = max_static_memory_size_request
+
+! make sure everybody is synchronized
+  call sync_all()
+
+! main working routine to create all the regions of the mesh
+  if(myrank == 0) then
+    write(IMAIN,*) 'create regions: '
+  endif
+  call create_regions_mesh_ext(ibool, &
+                        xstore, ystore, zstore, nspec, npointot, myrank, LOCAL_PATH, &
+                        nnodes_ext_mesh, nelmnts_ext_mesh, &
+                        nodes_coords_ext_mesh, elmnts_ext_mesh, &
+                        max_static_memory_size, mat_ext_mesh, materials_ext_mesh, &
+                        nmat_ext_mesh, undef_mat_prop, nundefMat_ext_mesh, &
+                        num_interfaces_ext_mesh, max_interface_size_ext_mesh, &
+                        my_neighbours_ext_mesh, my_nelmnts_neighbours_ext_mesh, &
+                        my_interfaces_ext_mesh, &
+                        ibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh, &
+                        nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, &
+                        NSPEC2D_BOTTOM, NSPEC2D_TOP,&
+                        ibelm_xmin, ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top, &
+                        nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax, &
+                        nodes_ibelm_bottom,nodes_ibelm_top, &
+                        SAVE_MESH_FILES,nglob, &
+                        ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
+                        ATTENUATION,USE_OLSEN_ATTENUATION, &
+                        UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
+                        ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
+                        itopo_bathy)
+
+! Moho boundary parameters, 2-D jacobians and normals
+  if( SAVE_MOHO_MESH ) then
+    call create_regions_mesh_save_moho(myrank,nglob,nspec, &
+                        nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
+                        nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
+  endif
+
+! defines global number of nodes in model
+  NGLOB_AB = nglob
+
+! print min and max of topography included
+  min_elevation = HUGEVAL
+  max_elevation = -HUGEVAL
+  do iface = 1,nspec2D_top_ext
+     do icorner = 1,NGNOD2D
+        inode = nodes_ibelm_top(icorner,iface)
+        if (nodes_coords_ext_mesh(3,inode) < min_elevation) then
+           min_elevation = nodes_coords_ext_mesh(3,inode)
+        end if
+        if (nodes_coords_ext_mesh(3,inode) > max_elevation) then
+           max_elevation = nodes_coords_ext_mesh(3,inode)
+        end if
+     end do
+  end do
+
+! compute the maximum of the maxima for all the slices using an MPI reduction
+  call min_all_dp(min_elevation,min_elevation_all)
+  call max_all_dp(max_elevation,max_elevation_all)
+
+  if(myrank == 0) then
+     write(IMAIN,*)
+     write(IMAIN,*) 'min and max of topography included in mesh in m is ',min_elevation_all,' ',max_elevation_all
+     write(IMAIN,*)
+  endif
+
+! clean-up
+  deallocate(xstore,ystore,zstore)
+
+! make sure everybody is synchronized
+  call sync_all()
+
+  end subroutine gd_setup_mesh
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine gd_finalize
+
+! checks user input parameters
+
+  use generate_databases_par
+  implicit none
+
+  integer :: i
+
+! print number of points and elements in the mesh
+  call sum_all_i(NGLOB_AB,nglob_total)
+  call sum_all_i(NSPEC_AB,nspec_total)
+  call sync_all()
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) 'Repartition of elements:'
+    write(IMAIN,*) '-----------------------'
+    write(IMAIN,*)
+    write(IMAIN,*) 'total number of elements in each slice: ',NSPEC_AB
+    write(IMAIN,*) 'total number of points in each slice: ',NGLOB_AB
+    write(IMAIN,*)
+    write(IMAIN,*) 'total number of elements in entire mesh: ',nspec_total     ! NSPEC_AB*NPROC
+    write(IMAIN,*) 'total number of points in entire mesh: ',nglob_total        !NGLOB_AB*NPROC
+    write(IMAIN,*) 'total number of DOFs in entire mesh: ',nglob_total*NDIM   !NGLOB_AB*NPROC*NDIM
+    write(IMAIN,*)
+    write(IMAIN,*) 'total number of time steps in the solver will be: ',NSTEP
+    write(IMAIN,*)
+    ! write information about precision used for floating-point operations
+    if(CUSTOM_REAL == SIZE_REAL) then
+      write(IMAIN,*) 'using single precision for the calculations'
+    else
+      write(IMAIN,*) 'using double precision for the calculations'
+    endif
+    write(IMAIN,*)
+    write(IMAIN,*) 'smallest and largest possible floating-point numbers are: ',tiny(1._CUSTOM_REAL),huge(1._CUSTOM_REAL)
+    write(IMAIN,*)
+  endif
+
+! gets number of surface elements (for movie outputs)
+  allocate( ispec_is_surface_external_mesh(NSPEC_AB), &
+           iglob_is_surface_external_mesh(NGLOB_AB),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array'
+  max_nibool_interfaces_ext_mesh = maxval(nibool_interfaces_ext_mesh)
+  allocate(ibool_interfaces_ext_mesh_dummy(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array'
+  do i = 1, num_interfaces_ext_mesh
+     ibool_interfaces_ext_mesh_dummy(:,:) = ibool_interfaces_ext_mesh(1:max_nibool_interfaces_ext_mesh,:)
+  enddo
+  call sync_all()
+  call detect_surface(NPROC,NGLOB_AB,NSPEC_AB,ibool, &
+                        ispec_is_surface_external_mesh, &
+                        iglob_is_surface_external_mesh, &
+                        nfaces_surface_ext_mesh, &
+                        num_interfaces_ext_mesh, &
+                        max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh, &
+                        my_neighbours_ext_mesh, &
+                        ibool_interfaces_ext_mesh_dummy )
+
+  deallocate(ibool)
+  deallocate(ispec_is_surface_external_mesh)
+  deallocate(iglob_is_surface_external_mesh)
+  deallocate(ibool_interfaces_ext_mesh_dummy)
+
+  ! takes number of faces for top, free surface only
+  if( MOVIE_SURFACE .or. CREATE_SHAKEMAP ) then
+    nfaces_surface_ext_mesh = NSPEC2D_TOP
+  endif
+
+! number of surface faces for all partitions together
+  call sum_all_i(nfaces_surface_ext_mesh,nfaces_surface_glob_ext_mesh)
+
+
+! copy number of elements and points in an include file for the solver
+  if( myrank == 0 ) then
+    call save_header_file(NSPEC_AB,NGLOB_AB,NPROC, &
+               ATTENUATION,ANISOTROPY,NSTEP,DT, &
+               SIMULATION_TYPE,max_static_memory_size,nfaces_surface_glob_ext_mesh)
+  endif
+
+! elapsed time since beginning of mesh generation
+  if(myrank == 0) then
+    tCPU = wtime() - time_start
+    write(IMAIN,*)
+    write(IMAIN,*) 'Elapsed time for mesh generation and buffer creation in seconds = ',tCPU
+    write(IMAIN,*) 'End of mesh generation'
+    write(IMAIN,*)
+  endif
+
+! close main output file
+  if(myrank == 0) then
+    write(IMAIN,*) 'done'
+    write(IMAIN,*)
+    close(IMAIN)
+  endif
+
+! synchronize all the processes to make sure everybody has finished
+  call sync_all()
+
+  end subroutine gd_finalize

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_global.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_global.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_global.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,295 +1,295 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine get_global(nspec,xp,yp,zp,iglob,loc,ifseg,nglob,npointot,UTM_X_MIN,UTM_X_MAX)
-
-! this routine MUST be in double precision to avoid sensitivity
-! to roundoff errors in the coordinates of the points
-
-! non-structured global numbering software provided by Paul F. Fischer
-
-! leave the sorting subroutines in the same source file to allow for inlining
-
-  implicit none
-
-  include "constants.h"
-
-
-  integer npointot
-  integer nspec,nglob
-  integer iglob(npointot),loc(npointot)
-  logical ifseg(npointot)
-  double precision xp(npointot),yp(npointot),zp(npointot)
-  double precision UTM_X_MIN,UTM_X_MAX
-
-  integer ispec,i,j,ier
-  integer ieoff,ilocnum,nseg,ioff,iseg,ig
-
-  integer, dimension(:), allocatable :: ind,ninseg,iwork
-  double precision, dimension(:), allocatable :: work
-
-! geometry tolerance parameter to calculate number of independent grid points
-! small value for double precision and to avoid sensitivity to roundoff
-  double precision SMALLVALTOL
-
-! define geometrical tolerance based upon typical size of the model
-  SMALLVALTOL = 1.d-10 * dabs(UTM_X_MAX - UTM_X_MIN)
-
-! dynamically allocate arrays
-  allocate(ind(npointot), &
-          ninseg(npointot), &
-          iwork(npointot), &
-          work(npointot),stat=ier)
-  if( ier /= 0 ) stop 'error allocating arrays'
-
-! establish initial pointers
-  do ispec=1,nspec
-    ieoff=NGLLCUBE*(ispec-1)
-    do ilocnum=1,NGLLCUBE
-      loc(ilocnum+ieoff)=ilocnum+ieoff
-    enddo
-  enddo
-
-  ifseg(:)=.false.
-
-  nseg=1
-  ifseg(1)=.true.
-  ninseg(1)=npointot
-
-  do j=1,NDIM
-
-! sort within each segment
-    ioff=1
-    do iseg=1,nseg
-      if(j == 1) then
-        call rank(xp(ioff),ind,ninseg(iseg))
-      else if(j == 2) then
-        call rank(yp(ioff),ind,ninseg(iseg))
-      else
-        call rank(zp(ioff),ind,ninseg(iseg))
-      endif
-      call swap_all(loc(ioff),xp(ioff),yp(ioff),zp(ioff),iwork,work,ind,ninseg(iseg))
-      ioff=ioff+ninseg(iseg)
-    enddo
-
-! check for jumps in current coordinate
-! compare the coordinates of the points within a small tolerance
-    if(j == 1) then
-      do i=2,npointot
-        if(dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
-      enddo
-    else if(j == 2) then
-      do i=2,npointot
-        if(dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
-      enddo
-    else
-      do i=2,npointot
-        if(dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
-      enddo
-    endif
-
-! count up number of different segments
-    nseg=0
-    do i=1,npointot
-      if(ifseg(i)) then
-        nseg=nseg+1
-        ninseg(nseg)=1
-      else
-        ninseg(nseg)=ninseg(nseg)+1
-      endif
-    enddo
-  enddo
-
-! assign global node numbers (now sorted lexicographically)
-  ig=0
-  do i=1,npointot
-    if(ifseg(i)) ig=ig+1
-    iglob(loc(i))=ig
-  enddo
-
-  nglob=ig
-
-! deallocate arrays
-  deallocate(ind)
-  deallocate(ninseg)
-  deallocate(iwork)
-  deallocate(work)
-
-  end subroutine get_global
-
-! -----------------------------------
-
-! sorting routines put in same file to allow for inlining
-
-  subroutine rank(A,IND,N)
-!
-! Use Heap Sort (Numerical Recipes)
-!
-  implicit none
-
-  integer n
-  double precision A(n)
-  integer IND(n)
-
-  integer i,j,l,ir,indx
-  double precision q
-
-  do j=1,n
-   IND(j)=j
-  enddo
-
-  if (n == 1) return
-
-  L=n/2+1
-  ir=n
-  100 CONTINUE
-   IF (l>1) THEN
-      l=l-1
-      indx=ind(l)
-      q=a(indx)
-   ELSE
-      indx=ind(ir)
-      q=a(indx)
-      ind(ir)=ind(1)
-      ir=ir-1
-      if (ir == 1) then
-         ind(1)=indx
-         return
-      endif
-   ENDIF
-   i=l
-   j=l+l
-  200    CONTINUE
-   IF (J <= IR) THEN
-      IF (J<IR) THEN
-         IF ( A(IND(j))<A(IND(j+1)) ) j=j+1
-      ENDIF
-      IF (q<A(IND(j))) THEN
-         IND(I)=IND(J)
-         I=J
-         J=J+J
-      ELSE
-         J=IR+1
-      ENDIF
-   goto 200
-   ENDIF
-   IND(I)=INDX
-  goto 100
-
-  end subroutine rank
-
-! ------------------------------------------------------------------
-
-  subroutine swap_all(IA,A,B,C,IW,W,ind,n)
-!
-! swap arrays IA, A, B and C according to addressing in array IND
-!
-  implicit none
-
-  integer n
-
-  integer IND(n)
-  integer IA(n),IW(n)
-  double precision A(n),B(n),C(n),W(n)
-
-  integer i
-
-  IW(:) = IA(:)
-  W(:) = A(:)
-
-  do i=1,n
-    IA(i)=IW(ind(i))
-    A(i)=W(ind(i))
-  enddo
-
-  W(:) = B(:)
-
-  do i=1,n
-    B(i)=W(ind(i))
-  enddo
-
-  W(:) = C(:)
-
-  do i=1,n
-    C(i)=W(ind(i))
-  enddo
-
-end subroutine swap_all
-
-! ------------------------------------------------------------------
-
-
-  subroutine get_global_indirect_addressing(nspec,nglob,ibool)
-
-!
-!- we can create a new indirect addressing to reduce cache misses
-! (put into this subroutine but compiler keeps on complaining that it can't vectorize loops...)
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: nspec,nglob
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-
-! mask to sort ibool
-  integer, dimension(:), allocatable :: mask_ibool
-  integer, dimension(:,:,:,:), allocatable :: copy_ibool_ori
-  integer :: inumber
-  integer:: i,j,k,ispec,ier
-
-! copies original array
-  allocate(copy_ibool_ori(NGLLX,NGLLY,NGLLZ,nspec),stat=ier); if(ier /= 0) stop 'error in allocate'
-  allocate(mask_ibool(nglob),stat=ier); if(ier /= 0) stop 'error in allocate'
-
-  mask_ibool(:) = -1
-  copy_ibool_ori(:,:,:,:) = ibool(:,:,:,:)
-
-! reduces misses
-  inumber = 0
-  do ispec=1,nspec
-    do k=1,NGLLZ
-      do j=1,NGLLY
-        do i=1,NGLLX
-          if(mask_ibool(copy_ibool_ori(i,j,k,ispec)) == -1) then
-! create a new point
-            inumber = inumber + 1
-            ibool(i,j,k,ispec) = inumber
-            mask_ibool(copy_ibool_ori(i,j,k,ispec)) = inumber
-          else
-! use an existing point created previously
-            ibool(i,j,k,ispec) = mask_ibool(copy_ibool_ori(i,j,k,ispec))
-          endif
-        enddo
-      enddo
-    enddo
-  enddo
-
-! cleanup
-  deallocate(copy_ibool_ori,stat=ier); if(ier /= 0) stop 'error in deallocate'
-  deallocate(mask_ibool,stat=ier); if(ier /= 0) stop 'error in deallocate'
-
-end subroutine get_global_indirect_addressing
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine get_global(nspec,xp,yp,zp,iglob,loc,ifseg,nglob,npointot,UTM_X_MIN,UTM_X_MAX)
+
+! this routine MUST be in double precision to avoid sensitivity
+! to roundoff errors in the coordinates of the points
+
+! non-structured global numbering software provided by Paul F. Fischer
+
+! leave the sorting subroutines in the same source file to allow for inlining
+
+  implicit none
+
+  include "constants.h"
+
+
+  integer npointot
+  integer nspec,nglob
+  integer iglob(npointot),loc(npointot)
+  logical ifseg(npointot)
+  double precision xp(npointot),yp(npointot),zp(npointot)
+  double precision UTM_X_MIN,UTM_X_MAX
+
+  integer ispec,i,j,ier
+  integer ieoff,ilocnum,nseg,ioff,iseg,ig
+
+  integer, dimension(:), allocatable :: ind,ninseg,iwork
+  double precision, dimension(:), allocatable :: work
+
+! geometry tolerance parameter to calculate number of independent grid points
+! small value for double precision and to avoid sensitivity to roundoff
+  double precision SMALLVALTOL
+
+! define geometrical tolerance based upon typical size of the model
+  SMALLVALTOL = 1.d-10 * dabs(UTM_X_MAX - UTM_X_MIN)
+
+! dynamically allocate arrays
+  allocate(ind(npointot), &
+          ninseg(npointot), &
+          iwork(npointot), &
+          work(npointot),stat=ier)
+  if( ier /= 0 ) stop 'error allocating arrays'
+
+! establish initial pointers
+  do ispec=1,nspec
+    ieoff=NGLLCUBE*(ispec-1)
+    do ilocnum=1,NGLLCUBE
+      loc(ilocnum+ieoff)=ilocnum+ieoff
+    enddo
+  enddo
+
+  ifseg(:)=.false.
+
+  nseg=1
+  ifseg(1)=.true.
+  ninseg(1)=npointot
+
+  do j=1,NDIM
+
+! sort within each segment
+    ioff=1
+    do iseg=1,nseg
+      if(j == 1) then
+        call rank(xp(ioff),ind,ninseg(iseg))
+      else if(j == 2) then
+        call rank(yp(ioff),ind,ninseg(iseg))
+      else
+        call rank(zp(ioff),ind,ninseg(iseg))
+      endif
+      call swap_all(loc(ioff),xp(ioff),yp(ioff),zp(ioff),iwork,work,ind,ninseg(iseg))
+      ioff=ioff+ninseg(iseg)
+    enddo
+
+! check for jumps in current coordinate
+! compare the coordinates of the points within a small tolerance
+    if(j == 1) then
+      do i=2,npointot
+        if(dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+      enddo
+    else if(j == 2) then
+      do i=2,npointot
+        if(dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+      enddo
+    else
+      do i=2,npointot
+        if(dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+      enddo
+    endif
+
+! count up number of different segments
+    nseg=0
+    do i=1,npointot
+      if(ifseg(i)) then
+        nseg=nseg+1
+        ninseg(nseg)=1
+      else
+        ninseg(nseg)=ninseg(nseg)+1
+      endif
+    enddo
+  enddo
+
+! assign global node numbers (now sorted lexicographically)
+  ig=0
+  do i=1,npointot
+    if(ifseg(i)) ig=ig+1
+    iglob(loc(i))=ig
+  enddo
+
+  nglob=ig
+
+! deallocate arrays
+  deallocate(ind)
+  deallocate(ninseg)
+  deallocate(iwork)
+  deallocate(work)
+
+  end subroutine get_global
+
+! -----------------------------------
+
+! sorting routines put in same file to allow for inlining
+
+  subroutine rank(A,IND,N)
+!
+! Use Heap Sort (Numerical Recipes)
+!
+  implicit none
+
+  integer n
+  double precision A(n)
+  integer IND(n)
+
+  integer i,j,l,ir,indx
+  double precision q
+
+  do j=1,n
+   IND(j)=j
+  enddo
+
+  if (n == 1) return
+
+  L=n/2+1
+  ir=n
+  100 CONTINUE
+   IF (l>1) THEN
+      l=l-1
+      indx=ind(l)
+      q=a(indx)
+   ELSE
+      indx=ind(ir)
+      q=a(indx)
+      ind(ir)=ind(1)
+      ir=ir-1
+      if (ir == 1) then
+         ind(1)=indx
+         return
+      endif
+   ENDIF
+   i=l
+   j=l+l
+  200    CONTINUE
+   IF (J <= IR) THEN
+      IF (J<IR) THEN
+         IF ( A(IND(j))<A(IND(j+1)) ) j=j+1
+      ENDIF
+      IF (q<A(IND(j))) THEN
+         IND(I)=IND(J)
+         I=J
+         J=J+J
+      ELSE
+         J=IR+1
+      ENDIF
+   goto 200
+   ENDIF
+   IND(I)=INDX
+  goto 100
+
+  end subroutine rank
+
+! ------------------------------------------------------------------
+
+  subroutine swap_all(IA,A,B,C,IW,W,ind,n)
+!
+! swap arrays IA, A, B and C according to addressing in array IND
+!
+  implicit none
+
+  integer n
+
+  integer IND(n)
+  integer IA(n),IW(n)
+  double precision A(n),B(n),C(n),W(n)
+
+  integer i
+
+  IW(:) = IA(:)
+  W(:) = A(:)
+
+  do i=1,n
+    IA(i)=IW(ind(i))
+    A(i)=W(ind(i))
+  enddo
+
+  W(:) = B(:)
+
+  do i=1,n
+    B(i)=W(ind(i))
+  enddo
+
+  W(:) = C(:)
+
+  do i=1,n
+    C(i)=W(ind(i))
+  enddo
+
+  end subroutine swap_all
+
+! ------------------------------------------------------------------
+
+
+  subroutine get_global_indirect_addressing(nspec,nglob,ibool)
+
+!
+!- we can create a new indirect addressing to reduce cache misses
+! (put into this subroutine but compiler keeps on complaining that it can't vectorize loops...)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+! mask to sort ibool
+  integer, dimension(:), allocatable :: mask_ibool
+  integer, dimension(:,:,:,:), allocatable :: copy_ibool_ori
+  integer :: inumber
+  integer:: i,j,k,ispec,ier
+
+! copies original array
+  allocate(copy_ibool_ori(NGLLX,NGLLY,NGLLZ,nspec),stat=ier); if(ier /= 0) stop 'error in allocate'
+  allocate(mask_ibool(nglob),stat=ier); if(ier /= 0) stop 'error in allocate'
+
+  mask_ibool(:) = -1
+  copy_ibool_ori(:,:,:,:) = ibool(:,:,:,:)
+
+! reduces misses
+  inumber = 0
+  do ispec=1,nspec
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+          if(mask_ibool(copy_ibool_ori(i,j,k,ispec)) == -1) then
+! create a new point
+            inumber = inumber + 1
+            ibool(i,j,k,ispec) = inumber
+            mask_ibool(copy_ibool_ori(i,j,k,ispec)) = inumber
+          else
+! use an existing point created previously
+            ibool(i,j,k,ispec) = mask_ibool(copy_ibool_ori(i,j,k,ispec))
+          endif
+        enddo
+      enddo
+    enddo
+  enddo
+
+! cleanup
+  deallocate(copy_ibool_ori,stat=ier); if(ier /= 0) stop 'error in deallocate'
+  deallocate(mask_ibool,stat=ier); if(ier /= 0) stop 'error in deallocate'
+
+  end subroutine get_global_indirect_addressing

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -69,8 +69,6 @@
   ! use acoustic domains for simulation
   logical,parameter :: USE_PURE_ACOUSTIC_MOD = .false.
 
-
-
   ! initializes element domain flags
   ispec_is_acoustic(:) = .false.
   ispec_is_elastic(:) = .false.
@@ -387,7 +385,7 @@
 
   ! anisotropy
   logical :: ANISOTROPY
-  
+
   ! local parameters
   real(kind=CUSTOM_REAL) :: vp,vs,rho,qmu_atten
   real(kind=CUSTOM_REAL) :: c11,c12,c13,c14,c15,c16,c22,c23,c24,c25, &
@@ -492,7 +490,7 @@
               idomain_id = materials_ext_mesh(6,iflag)
 
            else if ( mat_ext_mesh(2,ispec) == 2 ) then
-              
+
               imaterial_PB = abs(imaterial_id)
               ! material definition undefined, uses definition from tomography model
               ! GLL point location

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/memory_eval.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/memory_eval.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/memory_eval.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -152,10 +152,10 @@
 ! compute the approximate amount of static memory needed to run the mesher
 
  subroutine memory_eval_mesher(myrank,nspec,npointot,nnodes_ext_mesh, &
-              nelmnts_ext_mesh,nmat_ext_mesh,num_interfaces_ext_mesh, &
-              max_interface_size_ext_mesh,nspec2D_xmin,nspec2D_xmax, &
-              nspec2D_ymin,nspec2D_ymax,nspec2D_bottom,nspec2D_top, &
-              static_memory_size_request)
+                              nelmnts_ext_mesh,nmat_ext_mesh,num_interfaces_ext_mesh, &
+                              max_interface_size_ext_mesh,nspec2D_xmin,nspec2D_xmax, &
+                              nspec2D_ymin,nspec2D_ymax,nspec2D_bottom,nspec2D_top, &
+                              static_memory_size_request)
 
   implicit none
 
@@ -166,7 +166,7 @@
            max_interface_size_ext_mesh,nspec2D_xmin,nspec2D_xmax, &
            nspec2D_ymin,nspec2D_ymax,nspec2D_bottom,nspec2D_top
 
-  integer :: static_memory_size_request
+  double precision :: static_memory_size_request
 
   integer :: static_memory_size
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_external_values.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_external_values.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_external_values.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -63,7 +63,7 @@
 
   include "constants.h"
   ! standard include of the MPI library
-  include 'mpif.h'
+  !include 'mpif.h'
 
   integer :: myrank
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_tomography.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_tomography.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_tomography.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -167,7 +167,7 @@
 
  subroutine PREM_routine(xloc,yloc,zloc,ro_prem,vp_prem,vs_prem,idom)
 
-double precision, intent(in) :: xloc,yloc,zloc 
+double precision, intent(in) :: xloc,yloc,zloc
 integer, intent(in)          :: idom
 double precision             :: r0,r,x_prem
 !double precision             :: ro_prem,vp_prem,vs_prem

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,441 +1,455 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-
-! for external mesh
-
-  subroutine save_arrays_solver_ext_mesh(nspec,nglob, &
-                    xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, &
-                    gammaxstore,gammaystore,gammazstore, &
-                    jacobianstore, rho_vp,rho_vs,qmu_attenuation_store, &
-                    rhostore,kappastore,mustore, &
-                    rmass,rmass_acoustic,rmass_solid_poroelastic,rmass_fluid_poroelastic, &
-                    OCEANS,rmass_ocean_load,NGLOB_OCEAN,&
-                    ibool, &
-                    xstore_dummy,ystore_dummy,zstore_dummy, &
-                    abs_boundary_normal,abs_boundary_jacobian2Dw, &
-                    abs_boundary_ijk,abs_boundary_ispec, &
-                    num_abs_boundary_faces, &
-                    free_surface_normal,free_surface_jacobian2Dw, &
-                    free_surface_ijk,free_surface_ispec, &
-                    num_free_surface_faces, &
-                    coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
-                    coupling_ac_el_ijk,coupling_ac_el_ispec, &
-                    num_coupling_ac_el_faces, &
-                    num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
-                    max_interface_size_ext_mesh,ibool_interfaces_ext_mesh, &
-                    prname,SAVE_MESH_FILES, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store, &
-                    c22store,c23store,c24store,c25store,c26store,c33store, &
-                    c34store,c35store,c36store,c44store,c45store,c46store, &
-                    c55store,c56store,c66store, &
-                    ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: nspec,nglob
-
-! jacobian
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xixstore,xiystore,xizstore, &
-            etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore,jacobianstore
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rho_vp,rho_vs
-
-! attenuation
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: qmu_attenuation_store
-
-! material
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rhostore,kappastore,mustore
-  real(kind=CUSTOM_REAL), dimension(nglob) :: rmass,rmass_acoustic, &
-            rmass_solid_poroelastic,rmass_fluid_poroelastic
-! ocean load
-  logical :: OCEANS
-  integer :: NGLOB_OCEAN
-  real(kind=CUSTOM_REAL),dimension(NGLOB_OCEAN) :: rmass_ocean_load
-
-! mesh coordinates
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
-
-! absorbing boundary surface
-  integer :: num_abs_boundary_faces
-  real(kind=CUSTOM_REAL) :: abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces)
-  real(kind=CUSTOM_REAL) :: abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces)
-  integer :: abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces)
-  integer :: abs_boundary_ispec(num_abs_boundary_faces)
-
-! free surface
-  integer :: num_free_surface_faces
-  real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
-  real(kind=CUSTOM_REAL) :: free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces)
-  integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
-  integer :: free_surface_ispec(num_free_surface_faces)
-
-! acoustic-elastic coupling surface
-  integer :: num_coupling_ac_el_faces
-  real(kind=CUSTOM_REAL) :: coupling_ac_el_normal(NDIM,NGLLSQUARE,num_coupling_ac_el_faces)
-  real(kind=CUSTOM_REAL) :: coupling_ac_el_jacobian2Dw(NGLLSQUARE,num_coupling_ac_el_faces)
-  integer :: coupling_ac_el_ijk(3,NGLLSQUARE,num_coupling_ac_el_faces)
-  integer :: coupling_ac_el_ispec(num_coupling_ac_el_faces)
-
-! MPI interfaces
-  integer :: num_interfaces_ext_mesh
-  integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh
-  integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
-  integer :: max_interface_size_ext_mesh
-  integer, dimension(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
-
-! file name
-  character(len=256) prname
-  logical :: SAVE_MESH_FILES
-
-! anisotropy
-  logical :: ANISOTROPY
-  integer :: NSPEC_ANISO
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
-            c11store,c12store,c13store,c14store,c15store,c16store, &
-            c22store,c23store,c24store,c25store,c26store,c33store, &
-            c34store,c35store,c36store,c44store,c45store,c46store, &
-            c55store,c56store,c66store
-
-! material domain flags
-  logical, dimension(nspec) :: ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic
-
-! local parameters
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: v_tmp
-  integer,dimension(:),allocatable :: v_tmp_i
-
-  !real(kind=CUSTOM_REAL) :: minimum(1)
-  integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh_dummy
-  integer :: ier,i
-  logical :: ACOUSTIC_SIMULATION,ELASTIC_SIMULATION,POROELASTIC_SIMULATION
-  character(len=256) :: filename
-
-  integer, dimension(:), allocatable :: iglob_tmp
-  integer :: j,inum
-
-! saves mesh file proc***_external_mesh.bin
-  filename = prname(1:len_trim(prname))//'external_mesh.bin'
-  open(unit=IOUT,file=trim(filename),status='unknown',action='write',form='unformatted',iostat=ier)
-  if( ier /= 0 ) stop 'error opening database proc######_external_mesh.bin'
-
-  write(IOUT) nspec
-  write(IOUT) nglob
-
-  write(IOUT) ibool
-
-  write(IOUT) xstore_dummy
-  write(IOUT) ystore_dummy
-  write(IOUT) zstore_dummy
-
-  write(IOUT) xixstore
-  write(IOUT) xiystore
-  write(IOUT) xizstore
-  write(IOUT) etaxstore
-  write(IOUT) etaystore
-  write(IOUT) etazstore
-  write(IOUT) gammaxstore
-  write(IOUT) gammaystore
-  write(IOUT) gammazstore
-  write(IOUT) jacobianstore
-
-  write(IOUT) kappastore
-  write(IOUT) mustore
-
-  write(IOUT) ispec_is_acoustic
-  write(IOUT) ispec_is_elastic
-  write(IOUT) ispec_is_poroelastic
-
-! acoustic
-! all processes will have acoustic_simulation set if any flag is .true. somewhere
-  call any_all_l( ANY(ispec_is_acoustic), ACOUSTIC_SIMULATION )
-  if( ACOUSTIC_SIMULATION ) then
-    write(IOUT) rmass_acoustic
-    write(IOUT) rhostore
-  endif
-
-! elastic
-  call any_all_l( ANY(ispec_is_elastic), ELASTIC_SIMULATION )
-  if( ELASTIC_SIMULATION ) then
-    write(IOUT) rmass
-    if( OCEANS) then
-      write(IOUT) rmass_ocean_load
-    endif
-    !pll Stacey
-    write(IOUT) rho_vp
-    write(IOUT) rho_vs
-
-  endif
-
-! poroelastic
-  call any_all_l( ANY(ispec_is_poroelastic), POROELASTIC_SIMULATION )
-  if( POROELASTIC_SIMULATION ) then
-    write(IOUT) rmass_solid_poroelastic
-    write(IOUT) rmass_fluid_poroelastic
-  endif
-
-! absorbing boundary surface
-  write(IOUT) num_abs_boundary_faces
-  write(IOUT) abs_boundary_ispec
-  write(IOUT) abs_boundary_ijk
-  write(IOUT) abs_boundary_jacobian2Dw
-  write(IOUT) abs_boundary_normal
-
-! free surface
-  write(IOUT) num_free_surface_faces
-  write(IOUT) free_surface_ispec
-  write(IOUT) free_surface_ijk
-  write(IOUT) free_surface_jacobian2Dw
-  write(IOUT) free_surface_normal
-
-! acoustic-elastic coupling surface
-  write(IOUT) num_coupling_ac_el_faces
-  write(IOUT) coupling_ac_el_ispec
-  write(IOUT) coupling_ac_el_ijk
-  write(IOUT) coupling_ac_el_jacobian2Dw
-  write(IOUT) coupling_ac_el_normal
-
-!MPI interfaces
-  write(IOUT) num_interfaces_ext_mesh
-  write(IOUT) maxval(nibool_interfaces_ext_mesh(:))
-  write(IOUT) my_neighbours_ext_mesh
-  write(IOUT) nibool_interfaces_ext_mesh
-
-  allocate(ibool_interfaces_ext_mesh_dummy(maxval(nibool_interfaces_ext_mesh(:)),num_interfaces_ext_mesh),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array'
-
-  do i = 1, num_interfaces_ext_mesh
-     ibool_interfaces_ext_mesh_dummy(:,i) = ibool_interfaces_ext_mesh(1:maxval(nibool_interfaces_ext_mesh(:)),i)
-  enddo
-  write(IOUT) ibool_interfaces_ext_mesh_dummy
-
-! anisotropy
-  if( ANISOTROPY ) then
-    write(IOUT) c11store
-    write(IOUT) c12store
-    write(IOUT) c13store
-    write(IOUT) c14store
-    write(IOUT) c15store
-    write(IOUT) c16store
-    write(IOUT) c22store
-    write(IOUT) c23store
-    write(IOUT) c24store
-    write(IOUT) c25store
-    write(IOUT) c26store
-    write(IOUT) c33store
-    write(IOUT) c34store
-    write(IOUT) c35store
-    write(IOUT) c36store
-    write(IOUT) c44store
-    write(IOUT) c45store
-    write(IOUT) c46store
-    write(IOUT) c55store
-    write(IOUT) c56store
-    write(IOUT) c66store
-  endif
-
-  close(IOUT)
-
-
-! stores arrays in binary files
-  if( SAVE_MESH_FILES ) then
-
-    ! mesh arrays used for example in combine_vol_data.f90
-    !--- x coordinate
-    open(unit=27,file=prname(1:len_trim(prname))//'x.bin',status='unknown',form='unformatted',iostat=ier)
-    if( ier /= 0 ) stop 'error opening file x.bin'
-    write(27) xstore_dummy
-    close(27)
-
-    !--- y coordinate
-    open(unit=27,file=prname(1:len_trim(prname))//'y.bin',status='unknown',form='unformatted',iostat=ier)
-    if( ier /= 0 ) stop 'error opening file y.bin'
-    write(27) ystore_dummy
-    close(27)
-
-    !--- z coordinate
-    open(unit=27,file=prname(1:len_trim(prname))//'z.bin',status='unknown',form='unformatted',iostat=ier)
-    if( ier /= 0 ) stop 'error opening file z.bin'
-    write(27) zstore_dummy
-    close(27)
-
-    ! ibool
-    open(unit=27,file=prname(1:len_trim(prname))//'ibool.bin',status='unknown',form='unformatted',iostat=ier)
-    if( ier /= 0 ) stop 'error opening file ibool.bin'
-    write(27) ibool
-    close(27)
-
-    allocate( v_tmp(NGLLX,NGLLY,NGLLZ,nspec), stat=ier); if( ier /= 0 ) stop 'error allocating array '
-
-    ! vp (for checking the mesh and model)
-    !minimum = minval( abs(rho_vp) )
-    !if( minimum(1) /= 0.0 ) then
-    !  v_tmp = (FOUR_THIRDS * mustore + kappastore) / rho_vp
-    !else
-    !  v_tmp = 0.0
-    !endif
-    v_tmp = 0.0
-    where( rho_vp /= 0._CUSTOM_REAL ) v_tmp = (FOUR_THIRDS * mustore + kappastore) / rho_vp
-    open(unit=27,file=prname(1:len_trim(prname))//'vp.bin',status='unknown',form='unformatted',iostat=ier)
-    if( ier /= 0 ) stop 'error opening file vp.bin'
-    write(27) v_tmp
-    close(27)
-
-    ! VTK file output
-    ! vp values
-    filename = prname(1:len_trim(prname))//'vp'
-    call write_VTK_data_gll_cr(nspec,nglob, &
-                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-                        v_tmp,filename)
-
-
-    ! vs (for checking the mesh and model)
-    !minimum = minval( abs(rho_vs) )
-    !if( minimum(1) /= 0.0 ) then
-    !  v_tmp = mustore / rho_vs
-    !else
-    !  v_tmp = 0.0
-    !endif
-    v_tmp = 0.0
-    where( rho_vs /= 0._CUSTOM_REAL )  v_tmp = mustore / rho_vs
-    open(unit=27,file=prname(1:len_trim(prname))//'vs.bin',status='unknown',form='unformatted',iostat=ier)
-    if( ier /= 0 ) stop 'error opening file vs.bin'
-    write(27) v_tmp
-    close(27)
-
-    ! VTK file output
-    ! vs values
-    filename = prname(1:len_trim(prname))//'vs'
-    call write_VTK_data_gll_cr(nspec,nglob, &
-                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-                        v_tmp,filename)
-
-    ! outputs density model for check
-    v_tmp = 0.0
-    where( rho_vp /= 0._CUSTOM_REAL ) v_tmp = rho_vp**2 / (FOUR_THIRDS * mustore + kappastore)
-    open(unit=27,file=prname(1:len_trim(prname))//'rho.bin',status='unknown',form='unformatted',iostat=ier)
-    if( ier /= 0 ) stop 'error opening file rho.bin'
-    write(27) v_tmp
-    close(27)
-
-    ! VTK file output
-    ! saves attenuation flag assigned on each gll point into a vtk file
-    filename = prname(1:len_trim(prname))//'attenuation'
-    call write_VTK_data_gll_cr(nspec,nglob, &
-                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-                        qmu_attenuation_store,filename)
-
-    ! VTK file output
-    ! acoustic-elastic domains
-    if( ACOUSTIC_SIMULATION .and. ELASTIC_SIMULATION ) then
-      ! saves points on acoustic-elastic coupling interface
-      allocate( iglob_tmp(NGLLSQUARE*num_coupling_ac_el_faces),stat=ier)
-      if( ier /= 0 ) stop 'error allocating array iglob_tmp'
-      inum = 0
-      iglob_tmp(:) = 0
-      do i=1,num_coupling_ac_el_faces
-        do j=1,NGLLSQUARE
-          inum = inum+1
-          iglob_tmp(inum) = ibool(coupling_ac_el_ijk(1,j,i), &
-                                  coupling_ac_el_ijk(2,j,i), &
-                                  coupling_ac_el_ijk(3,j,i), &
-                                  coupling_ac_el_ispec(i) )
-        enddo
-      enddo
-      filename = prname(1:len_trim(prname))//'coupling_acoustic_elastic'
-      call write_VTK_data_points(nglob, &
-                        xstore_dummy,ystore_dummy,zstore_dummy, &
-                        iglob_tmp,NGLLSQUARE*num_coupling_ac_el_faces, &
-                        filename)
-
-      ! saves acoustic/elastic flag
-      allocate(v_tmp_i(nspec),stat=ier)
-      if( ier /= 0 ) stop 'error allocating array v_tmp_i'
-      do i=1,nspec
-        if( ispec_is_acoustic(i) ) then
-          v_tmp_i(i) = 1
-        else if( ispec_is_elastic(i) ) then
-          v_tmp_i(i) = 2
-        else
-          v_tmp_i(i) = 0
-        endif
-      enddo
-      filename = prname(1:len_trim(prname))//'acoustic_elastic_flag'
-      call write_VTK_data_elem_i(nspec,nglob, &
-                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-                        v_tmp_i,filename)
-
-      deallocate(iglob_tmp,v_tmp_i)
-    endif
-
-    ! saves free surface points
-    if( num_free_surface_faces > 0 ) then
-      ! saves free surface interface points
-      allocate( iglob_tmp(NGLLSQUARE*num_free_surface_faces),stat=ier)
-      if( ier /= 0 ) stop 'error allocating array iglob_tmp'
-      inum = 0
-      iglob_tmp(:) = 0
-      do i=1,num_free_surface_faces
-        do j=1,NGLLSQUARE
-          inum = inum+1
-          iglob_tmp(inum) = ibool(free_surface_ijk(1,j,i), &
-                                  free_surface_ijk(2,j,i), &
-                                  free_surface_ijk(3,j,i), &
-                                  free_surface_ispec(i) )
-        enddo
-      enddo
-      filename = prname(1:len_trim(prname))//'free_surface'
-      call write_VTK_data_points(nglob, &
-                        xstore_dummy,ystore_dummy,zstore_dummy, &
-                        iglob_tmp,NGLLSQUARE*num_free_surface_faces, &
-                        filename)
-
-      deallocate(iglob_tmp)
-    endif
-
-
-    !! saves 1. MPI interface
-    !    if( num_interfaces_ext_mesh >= 1 ) then
-    !      filename = prname(1:len_trim(prname))//'MPI_1_points'
-    !      call write_VTK_data_points(nglob, &
-    !                        xstore_dummy,ystore_dummy,zstore_dummy, &
-    !                        ibool_interfaces_ext_mesh_dummy(1:nibool_interfaces_ext_mesh(1),1), &
-    !                        nibool_interfaces_ext_mesh(1), &
-    !                        filename)
-    !    endif
-    !
-
-    deallocate(v_tmp)
-
-  endif ! SAVE_MESH_FILES
-
-! cleanup
-  deallocate(ibool_interfaces_ext_mesh_dummy,stat=ier); if( ier /= 0 ) stop 'error deallocating array'
-
-
-  end subroutine save_arrays_solver_ext_mesh
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+! for external mesh
+
+  subroutine save_arrays_solver_ext_mesh(nspec,nglob, &
+                    xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, &
+                    gammaxstore,gammaystore,gammazstore, &
+                    jacobianstore, rho_vp,rho_vs,qmu_attenuation_store, &
+                    rhostore,kappastore,mustore, &
+                    rmass,rmass_acoustic,rmass_solid_poroelastic,rmass_fluid_poroelastic, &
+                    OCEANS,rmass_ocean_load,NGLOB_OCEAN,&
+                    ibool, &
+                    xstore_dummy,ystore_dummy,zstore_dummy, &
+                    abs_boundary_normal,abs_boundary_jacobian2Dw, &
+                    abs_boundary_ijk,abs_boundary_ispec, &
+                    num_abs_boundary_faces, &
+                    free_surface_normal,free_surface_jacobian2Dw, &
+                    free_surface_ijk,free_surface_ispec, &
+                    num_free_surface_faces, &
+                    coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
+                    coupling_ac_el_ijk,coupling_ac_el_ispec, &
+                    num_coupling_ac_el_faces, &
+                    num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
+                    max_interface_size_ext_mesh,ibool_interfaces_ext_mesh, &
+                    prname,SAVE_MESH_FILES, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store, &
+                    c22store,c23store,c24store,c25store,c26store,c33store, &
+                    c34store,c35store,c36store,c44store,c45store,c46store, &
+                    c55store,c56store,c66store, &
+                    ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob
+
+! jacobian
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xixstore,xiystore,xizstore, &
+            etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore,jacobianstore
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rho_vp,rho_vs
+
+! attenuation
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: qmu_attenuation_store
+
+! material
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rhostore,kappastore,mustore
+  real(kind=CUSTOM_REAL), dimension(nglob) :: rmass,rmass_acoustic, &
+            rmass_solid_poroelastic,rmass_fluid_poroelastic
+! ocean load
+  logical :: OCEANS
+  integer :: NGLOB_OCEAN
+  real(kind=CUSTOM_REAL),dimension(NGLOB_OCEAN) :: rmass_ocean_load
+
+! mesh coordinates
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+! absorbing boundary surface
+  integer :: num_abs_boundary_faces
+  real(kind=CUSTOM_REAL) :: abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces)
+  real(kind=CUSTOM_REAL) :: abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces)
+  integer :: abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces)
+  integer :: abs_boundary_ispec(num_abs_boundary_faces)
+
+! free surface
+  integer :: num_free_surface_faces
+  real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
+  real(kind=CUSTOM_REAL) :: free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces)
+  integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
+  integer :: free_surface_ispec(num_free_surface_faces)
+
+! acoustic-elastic coupling surface
+  integer :: num_coupling_ac_el_faces
+  real(kind=CUSTOM_REAL) :: coupling_ac_el_normal(NDIM,NGLLSQUARE,num_coupling_ac_el_faces)
+  real(kind=CUSTOM_REAL) :: coupling_ac_el_jacobian2Dw(NGLLSQUARE,num_coupling_ac_el_faces)
+  integer :: coupling_ac_el_ijk(3,NGLLSQUARE,num_coupling_ac_el_faces)
+  integer :: coupling_ac_el_ispec(num_coupling_ac_el_faces)
+
+! MPI interfaces
+  integer :: num_interfaces_ext_mesh
+  integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh
+  integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+  integer :: max_interface_size_ext_mesh
+  integer, dimension(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+  integer :: max_nibool_interfaces_ext_mesh
+
+! file name
+  character(len=256) prname
+  logical :: SAVE_MESH_FILES
+
+! anisotropy
+  logical :: ANISOTROPY
+  integer :: NSPEC_ANISO
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
+            c11store,c12store,c13store,c14store,c15store,c16store, &
+            c22store,c23store,c24store,c25store,c26store,c33store, &
+            c34store,c35store,c36store,c44store,c45store,c46store, &
+            c55store,c56store,c66store
+
+! material domain flags
+  logical, dimension(nspec) :: ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic
+
+! local parameters
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: v_tmp
+  integer,dimension(:),allocatable :: v_tmp_i
+
+  !real(kind=CUSTOM_REAL) :: minimum(1)
+  integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh_dummy
+  integer :: ier,i
+  logical :: ACOUSTIC_SIMULATION,ELASTIC_SIMULATION,POROELASTIC_SIMULATION
+  character(len=256) :: filename
+
+  integer, dimension(:), allocatable :: iglob_tmp
+  integer :: j,inum
+
+! saves mesh file proc***_external_mesh.bin
+  filename = prname(1:len_trim(prname))//'external_mesh.bin'
+  open(unit=IOUT,file=trim(filename),status='unknown',action='write',form='unformatted',iostat=ier)
+  if( ier /= 0 ) stop 'error opening database proc######_external_mesh.bin'
+
+  write(IOUT) nspec
+  write(IOUT) nglob
+
+  write(IOUT) ibool
+
+  write(IOUT) xstore_dummy
+  write(IOUT) ystore_dummy
+  write(IOUT) zstore_dummy
+
+  write(IOUT) xixstore
+  write(IOUT) xiystore
+  write(IOUT) xizstore
+  write(IOUT) etaxstore
+  write(IOUT) etaystore
+  write(IOUT) etazstore
+  write(IOUT) gammaxstore
+  write(IOUT) gammaystore
+  write(IOUT) gammazstore
+  write(IOUT) jacobianstore
+
+  write(IOUT) kappastore
+  write(IOUT) mustore
+
+  write(IOUT) ispec_is_acoustic
+  write(IOUT) ispec_is_elastic
+  write(IOUT) ispec_is_poroelastic
+
+! acoustic
+! all processes will have acoustic_simulation set if any flag is .true. somewhere
+  call any_all_l( ANY(ispec_is_acoustic), ACOUSTIC_SIMULATION )
+  if( ACOUSTIC_SIMULATION ) then
+    write(IOUT) rmass_acoustic
+    write(IOUT) rhostore
+  endif
+
+! elastic
+  call any_all_l( ANY(ispec_is_elastic), ELASTIC_SIMULATION )
+  if( ELASTIC_SIMULATION ) then
+    write(IOUT) rmass
+
+    if( OCEANS) then
+      write(IOUT) rmass_ocean_load
+    endif
+
+    !pll Stacey
+    write(IOUT) rho_vp
+    write(IOUT) rho_vs
+
+  endif
+
+! poroelastic
+  call any_all_l( ANY(ispec_is_poroelastic), POROELASTIC_SIMULATION )
+  if( POROELASTIC_SIMULATION ) then
+    stop 'not implemented yet: write rmass_solid_poroelastic .. '
+
+    write(IOUT) rmass_solid_poroelastic
+    write(IOUT) rmass_fluid_poroelastic
+  endif
+
+! absorbing boundary surface
+  write(IOUT) num_abs_boundary_faces
+  if( num_abs_boundary_faces > 0 ) then
+    write(IOUT) abs_boundary_ispec
+    write(IOUT) abs_boundary_ijk
+    write(IOUT) abs_boundary_jacobian2Dw
+    write(IOUT) abs_boundary_normal
+  endif
+
+! free surface
+  write(IOUT) num_free_surface_faces
+  if( num_free_surface_faces > 0 ) then
+    write(IOUT) free_surface_ispec
+    write(IOUT) free_surface_ijk
+    write(IOUT) free_surface_jacobian2Dw
+    write(IOUT) free_surface_normal
+  endif
+
+! acoustic-elastic coupling surface
+  write(IOUT) num_coupling_ac_el_faces
+  if( num_coupling_ac_el_faces > 0 ) then
+    write(IOUT) coupling_ac_el_ispec
+    write(IOUT) coupling_ac_el_ijk
+    write(IOUT) coupling_ac_el_jacobian2Dw
+    write(IOUT) coupling_ac_el_normal
+  endif
+
+!MPI interfaces
+  max_nibool_interfaces_ext_mesh = maxval(nibool_interfaces_ext_mesh(:))
+  allocate(ibool_interfaces_ext_mesh_dummy(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array'
+  do i = 1, num_interfaces_ext_mesh
+     ibool_interfaces_ext_mesh_dummy(:,i) = ibool_interfaces_ext_mesh(1:max_nibool_interfaces_ext_mesh,i)
+  enddo
+
+  write(IOUT) num_interfaces_ext_mesh
+  if( num_interfaces_ext_mesh > 0 ) then
+    write(IOUT) max_nibool_interfaces_ext_mesh
+    write(IOUT) my_neighbours_ext_mesh
+    write(IOUT) nibool_interfaces_ext_mesh
+    write(IOUT) ibool_interfaces_ext_mesh_dummy
+  endif
+
+! anisotropy
+  if( ANISOTROPY ) then
+    write(IOUT) c11store
+    write(IOUT) c12store
+    write(IOUT) c13store
+    write(IOUT) c14store
+    write(IOUT) c15store
+    write(IOUT) c16store
+    write(IOUT) c22store
+    write(IOUT) c23store
+    write(IOUT) c24store
+    write(IOUT) c25store
+    write(IOUT) c26store
+    write(IOUT) c33store
+    write(IOUT) c34store
+    write(IOUT) c35store
+    write(IOUT) c36store
+    write(IOUT) c44store
+    write(IOUT) c45store
+    write(IOUT) c46store
+    write(IOUT) c55store
+    write(IOUT) c56store
+    write(IOUT) c66store
+  endif
+
+  close(IOUT)
+
+
+! stores arrays in binary files
+  if( SAVE_MESH_FILES ) then
+
+    ! mesh arrays used for example in combine_vol_data.f90
+    !--- x coordinate
+    open(unit=27,file=prname(1:len_trim(prname))//'x.bin',status='unknown',form='unformatted',iostat=ier)
+    if( ier /= 0 ) stop 'error opening file x.bin'
+    write(27) xstore_dummy
+    close(27)
+
+    !--- y coordinate
+    open(unit=27,file=prname(1:len_trim(prname))//'y.bin',status='unknown',form='unformatted',iostat=ier)
+    if( ier /= 0 ) stop 'error opening file y.bin'
+    write(27) ystore_dummy
+    close(27)
+
+    !--- z coordinate
+    open(unit=27,file=prname(1:len_trim(prname))//'z.bin',status='unknown',form='unformatted',iostat=ier)
+    if( ier /= 0 ) stop 'error opening file z.bin'
+    write(27) zstore_dummy
+    close(27)
+
+    ! ibool
+    open(unit=27,file=prname(1:len_trim(prname))//'ibool.bin',status='unknown',form='unformatted',iostat=ier)
+    if( ier /= 0 ) stop 'error opening file ibool.bin'
+    write(27) ibool
+    close(27)
+
+    allocate( v_tmp(NGLLX,NGLLY,NGLLZ,nspec), stat=ier); if( ier /= 0 ) stop 'error allocating array '
+
+    ! vp (for checking the mesh and model)
+    !minimum = minval( abs(rho_vp) )
+    !if( minimum(1) /= 0.0 ) then
+    !  v_tmp = (FOUR_THIRDS * mustore + kappastore) / rho_vp
+    !else
+    !  v_tmp = 0.0
+    !endif
+    v_tmp = 0.0
+    where( rho_vp /= 0._CUSTOM_REAL ) v_tmp = (FOUR_THIRDS * mustore + kappastore) / rho_vp
+    open(unit=27,file=prname(1:len_trim(prname))//'vp.bin',status='unknown',form='unformatted',iostat=ier)
+    if( ier /= 0 ) stop 'error opening file vp.bin'
+    write(27) v_tmp
+    close(27)
+
+    ! VTK file output
+    ! vp values
+    filename = prname(1:len_trim(prname))//'vp'
+    call write_VTK_data_gll_cr(nspec,nglob, &
+                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+                        v_tmp,filename)
+
+
+    ! vs (for checking the mesh and model)
+    !minimum = minval( abs(rho_vs) )
+    !if( minimum(1) /= 0.0 ) then
+    !  v_tmp = mustore / rho_vs
+    !else
+    !  v_tmp = 0.0
+    !endif
+    v_tmp = 0.0
+    where( rho_vs /= 0._CUSTOM_REAL )  v_tmp = mustore / rho_vs
+    open(unit=27,file=prname(1:len_trim(prname))//'vs.bin',status='unknown',form='unformatted',iostat=ier)
+    if( ier /= 0 ) stop 'error opening file vs.bin'
+    write(27) v_tmp
+    close(27)
+
+    ! VTK file output
+    ! vs values
+    filename = prname(1:len_trim(prname))//'vs'
+    call write_VTK_data_gll_cr(nspec,nglob, &
+                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+                        v_tmp,filename)
+
+    ! outputs density model for check
+    v_tmp = 0.0
+    where( rho_vp /= 0._CUSTOM_REAL ) v_tmp = rho_vp**2 / (FOUR_THIRDS * mustore + kappastore)
+    open(unit=27,file=prname(1:len_trim(prname))//'rho.bin',status='unknown',form='unformatted',iostat=ier)
+    if( ier /= 0 ) stop 'error opening file rho.bin'
+    write(27) v_tmp
+    close(27)
+
+    ! VTK file output
+    ! saves attenuation flag assigned on each gll point into a vtk file
+    filename = prname(1:len_trim(prname))//'attenuation'
+    call write_VTK_data_gll_cr(nspec,nglob, &
+                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+                        qmu_attenuation_store,filename)
+
+    ! VTK file output
+    ! acoustic-elastic domains
+    if( ACOUSTIC_SIMULATION .and. ELASTIC_SIMULATION ) then
+      ! saves points on acoustic-elastic coupling interface
+      allocate( iglob_tmp(NGLLSQUARE*num_coupling_ac_el_faces),stat=ier)
+      if( ier /= 0 ) stop 'error allocating array iglob_tmp'
+      inum = 0
+      iglob_tmp(:) = 0
+      do i=1,num_coupling_ac_el_faces
+        do j=1,NGLLSQUARE
+          inum = inum+1
+          iglob_tmp(inum) = ibool(coupling_ac_el_ijk(1,j,i), &
+                                  coupling_ac_el_ijk(2,j,i), &
+                                  coupling_ac_el_ijk(3,j,i), &
+                                  coupling_ac_el_ispec(i) )
+        enddo
+      enddo
+      filename = prname(1:len_trim(prname))//'coupling_acoustic_elastic'
+      call write_VTK_data_points(nglob, &
+                        xstore_dummy,ystore_dummy,zstore_dummy, &
+                        iglob_tmp,NGLLSQUARE*num_coupling_ac_el_faces, &
+                        filename)
+
+      ! saves acoustic/elastic flag
+      allocate(v_tmp_i(nspec),stat=ier)
+      if( ier /= 0 ) stop 'error allocating array v_tmp_i'
+      do i=1,nspec
+        if( ispec_is_acoustic(i) ) then
+          v_tmp_i(i) = 1
+        else if( ispec_is_elastic(i) ) then
+          v_tmp_i(i) = 2
+        else
+          v_tmp_i(i) = 0
+        endif
+      enddo
+      filename = prname(1:len_trim(prname))//'acoustic_elastic_flag'
+      call write_VTK_data_elem_i(nspec,nglob, &
+                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+                        v_tmp_i,filename)
+
+      deallocate(iglob_tmp,v_tmp_i)
+    endif
+
+    ! saves free surface points
+    if( num_free_surface_faces > 0 ) then
+      ! saves free surface interface points
+      allocate( iglob_tmp(NGLLSQUARE*num_free_surface_faces),stat=ier)
+      if( ier /= 0 ) stop 'error allocating array iglob_tmp'
+      inum = 0
+      iglob_tmp(:) = 0
+      do i=1,num_free_surface_faces
+        do j=1,NGLLSQUARE
+          inum = inum+1
+          iglob_tmp(inum) = ibool(free_surface_ijk(1,j,i), &
+                                  free_surface_ijk(2,j,i), &
+                                  free_surface_ijk(3,j,i), &
+                                  free_surface_ispec(i) )
+        enddo
+      enddo
+      filename = prname(1:len_trim(prname))//'free_surface'
+      call write_VTK_data_points(nglob, &
+                        xstore_dummy,ystore_dummy,zstore_dummy, &
+                        iglob_tmp,NGLLSQUARE*num_free_surface_faces, &
+                        filename)
+
+      deallocate(iglob_tmp)
+    endif
+
+
+    !! saves 1. MPI interface
+    if( num_interfaces_ext_mesh >= 1 ) then
+      filename = prname(1:len_trim(prname))//'MPI_1_points'
+      call write_VTK_data_points(nglob, &
+                        xstore_dummy,ystore_dummy,zstore_dummy, &
+                        ibool_interfaces_ext_mesh_dummy(1:nibool_interfaces_ext_mesh(1),1), &
+                        nibool_interfaces_ext_mesh(1), &
+                        filename)
+    endif
+
+
+    deallocate(v_tmp)
+
+  endif ! SAVE_MESH_FILES
+
+! cleanup
+  deallocate(ibool_interfaces_ext_mesh_dummy,stat=ier); if( ier /= 0 ) stop 'error deallocating array'
+
+
+  end subroutine save_arrays_solver_ext_mesh
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -26,11 +26,11 @@
 
   module vtk
 
-    
+
     !-------------------------------------------------------------
     ! USER PARAMETER
 
-    ! outputs as VTK ASCII file 
+    ! outputs as VTK ASCII file
     logical,parameter :: USE_VTK_OUTPUT = .true.
 
     !-------------------------------------------------------------
@@ -38,7 +38,7 @@
 
     ! global point data
     real,dimension(:),allocatable :: total_dat
-    
+
   end module vtk
 
 !
@@ -61,7 +61,7 @@
   implicit none
 
   include 'constants.h'
-  
+
   ! data must be of dimension: (NGLLX,NGLLY,NGLLZ,NSPEC_AB)
   double precision,dimension(:,:,:,:),allocatable :: data
   ! real array for data
@@ -182,7 +182,7 @@
     mesh_file = trim(outdir) // '/' // trim(filename)//'.vtk'
     open(IOVTK,file=mesh_file(1:len_trim(mesh_file)),status='unknown',iostat=ios)
     if( ios /= 0 ) stop 'error opening vtk output file'
-    
+
     write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
     write(IOVTK,'(a)') 'material model VTK file'
     write(IOVTK,'(a)') 'ASCII'
@@ -192,7 +192,7 @@
     mesh_file = trim(outdir) // '/' // trim(filename)//'.mesh'
     call open_file(trim(mesh_file)//char(0))
   endif
-  
+
   ! counts total number of points (all slices)
   npp = 0
   nee = 0
@@ -354,7 +354,7 @@
     ! close mesh file
     call close_file()
   endif
-  
+
   print *, 'Done writing '//trim(mesh_file)
 
   end program combine_paraview_data_ext_mesh
@@ -490,8 +490,8 @@
 
   ! writes out total number of points
   if (it == 1) then
-    if( USE_VTK_OUTPUT ) then      
-      write(IOVTK, '(a,i12,a)') 'POINTS ', npp, ' float'    
+    if( USE_VTK_OUTPUT ) then
+      write(IOVTK, '(a,i12,a)') 'POINTS ', npp, ' float'
       ! creates array to hold point data
       allocate(total_dat(npp),stat=ier)
       if( ier /= 0 ) stop 'error allocating total dat array'
@@ -543,7 +543,7 @@
       else
         call write_real(x)
         call write_real(y)
-        call write_real(z)      
+        call write_real(z)
         call write_real(dat(NGLLX,1,1,ispec))
       endif
       mask_ibool(iglob2) = .true.
@@ -556,7 +556,7 @@
       if( USE_VTK_OUTPUT ) then
         write(IOVTK,'(3e18.6)') x,y,z
         total_dat(np+numpoin) = dat(NGLLX,NGLLY,1,ispec)
-      else      
+      else
         call write_real(x)
         call write_real(y)
         call write_real(z)
@@ -571,8 +571,8 @@
       z = zstore(iglob4)
       if( USE_VTK_OUTPUT ) then
         write(IOVTK,'(3e18.6)') x,y,z
-        total_dat(np+numpoin) = dat(1,NGLLY,1,ispec)        
-      else      
+        total_dat(np+numpoin) = dat(1,NGLLY,1,ispec)
+      else
         call write_real(x)
         call write_real(y)
         call write_real(z)
@@ -587,8 +587,8 @@
       z = zstore(iglob5)
       if( USE_VTK_OUTPUT ) then
         write(IOVTK,'(3e18.6)') x,y,z
-        total_dat(np+numpoin) = dat(1,1,NGLLZ,ispec)        
-      else      
+        total_dat(np+numpoin) = dat(1,1,NGLLZ,ispec)
+      else
         call write_real(x)
         call write_real(y)
         call write_real(z)
@@ -603,8 +603,8 @@
       z = zstore(iglob6)
       if( USE_VTK_OUTPUT ) then
         write(IOVTK,'(3e18.6)') x,y,z
-        total_dat(np+numpoin) = dat(NGLLX,1,NGLLZ,ispec)        
-      else      
+        total_dat(np+numpoin) = dat(NGLLX,1,NGLLZ,ispec)
+      else
         call write_real(x)
         call write_real(y)
         call write_real(z)
@@ -619,8 +619,8 @@
       z = zstore(iglob7)
       if( USE_VTK_OUTPUT ) then
         write(IOVTK,'(3e18.6)') x,y,z
-        total_dat(np+numpoin) = dat(NGLLX,NGLLY,NGLLZ,ispec)        
-      else      
+        total_dat(np+numpoin) = dat(NGLLX,NGLLY,NGLLZ,ispec)
+      else
         call write_real(x)
         call write_real(y)
         call write_real(z)
@@ -635,8 +635,8 @@
       z = zstore(iglob8)
       if( USE_VTK_OUTPUT ) then
         write(IOVTK,'(3e18.6)') x,y,z
-        total_dat(np+numpoin) = dat(1,NGLLY,NGLLZ,ispec)        
-      else      
+        total_dat(np+numpoin) = dat(1,NGLLY,NGLLZ,ispec)
+      else
         call write_real(x)
         call write_real(y)
         call write_real(z)
@@ -673,13 +673,13 @@
 
   ! writes out total number of points
   if (it == 1) then
-    if( USE_VTK_OUTPUT ) then      
-      write(IOVTK, '(a,i12,a)') 'POINTS ', npp, ' float'    
+    if( USE_VTK_OUTPUT ) then
+      write(IOVTK, '(a,i12,a)') 'POINTS ', npp, ' float'
       ! creates array to hold point data
       allocate(total_dat(npp),stat=ier)
       if( ier /= 0 ) stop 'error allocating total dat array'
       total_dat(:) = 0.0
-    else  
+    else
       call write_integer(npp)
     endif
   endif
@@ -703,7 +703,7 @@
             if( USE_VTK_OUTPUT ) then
               write(IOVTK,'(3e18.6)') x,y,z
               total_dat(np+numpoin) = dat(i,j,k,ispec)
-            else            
+            else
               call write_real(x)
               call write_real(y)
               call write_real(z)
@@ -742,7 +742,7 @@
 
   ! outputs total number of elements for all slices
   if (it == 1) then
-    if( USE_VTK_OUTPUT ) then      
+    if( USE_VTK_OUTPUT ) then
       ! note: indices for vtk start at 0
       write(IOVTK,'(a,i12,i12)') "CELLS ",nee,nee*9
     else
@@ -821,7 +821,7 @@
     n7 = num_ibool(iglob7) -1 + np
     n8 = num_ibool(iglob8) -1 + np
 
-    if( USE_VTK_OUTPUT ) then  
+    if( USE_VTK_OUTPUT ) then
       write(IOVTK,'(9i12)') 8,n1,n2,n3,n4,n5,n6,n7,n8
     else
       call write_integer(n1)
@@ -833,9 +833,9 @@
       call write_integer(n7)
       call write_integer(n8)
     endif
-    
+
   enddo
-  
+
   ! elements written
   nelement = NSPEC_AB
 
@@ -869,10 +869,10 @@
 
   ! outputs total number of elements for all slices
   if (it == 1) then
-    if( USE_VTK_OUTPUT ) then      
+    if( USE_VTK_OUTPUT ) then
       ! note: indices for vtk start at 0
       write(IOVTK,'(a,i12,i12)') "CELLS ",nee,nee*9
-    else  
+    else
       !nee = nelement * num_node
       call write_integer(nee)
     endif
@@ -922,10 +922,10 @@
           n6 = num_ibool(iglob6)+np-1
           n7 = num_ibool(iglob7)+np-1
           n8 = num_ibool(iglob8)+np-1
-  
-          if( USE_VTK_OUTPUT ) then  
+
+          if( USE_VTK_OUTPUT ) then
             write(IOVTK,'(9i12)') 8,n1,n2,n3,n4,n5,n6,n7,n8
-          else          
+          else
             call write_integer(n1)
             call write_integer(n2)
             call write_integer(n3)

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_arrays_source.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_arrays_source.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_arrays_source.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,350 +1,352 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine compute_arrays_source(ispec_selected_source, &
-             xi_source,eta_source,gamma_source,sourcearray, &
-             Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
-             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-             xigll,yigll,zigll,nspec)
-
-  implicit none
-
-  include "constants.h"
-
-  integer ispec_selected_source
-  integer nspec
-
-  double precision xi_source,eta_source,gamma_source
-  double precision Mxx,Myy,Mzz,Mxy,Mxz,Myz
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xix,xiy,xiz,etax,etay,etaz, &
-        gammax,gammay,gammaz
-
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
-
-  double precision xixd,xiyd,xizd,etaxd,etayd,etazd,gammaxd,gammayd,gammazd
-
-! Gauss-Lobatto-Legendre points of integration and weights
-  double precision, dimension(NGLLX) :: xigll
-  double precision, dimension(NGLLY) :: yigll
-  double precision, dimension(NGLLZ) :: zigll
-
-! source arrays
-  double precision, dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearrayd
-  double precision, dimension(NGLLX,NGLLY,NGLLZ) :: G11,G12,G13,G21,G22,G23,G31,G32,G33
-  double precision, dimension(NGLLX) :: hxis,hpxis
-  double precision, dimension(NGLLY) :: hetas,hpetas
-  double precision, dimension(NGLLZ) :: hgammas,hpgammas
-
-  integer k,l,m
-
-! calculate G_ij for general source location
-! the source does not necessarily correspond to a Gauss-Lobatto point
-  do m=1,NGLLZ
-    do l=1,NGLLY
-      do k=1,NGLLX
-
-        xixd    = dble(xix(k,l,m,ispec_selected_source))
-        xiyd    = dble(xiy(k,l,m,ispec_selected_source))
-        xizd    = dble(xiz(k,l,m,ispec_selected_source))
-        etaxd   = dble(etax(k,l,m,ispec_selected_source))
-        etayd   = dble(etay(k,l,m,ispec_selected_source))
-        etazd   = dble(etaz(k,l,m,ispec_selected_source))
-        gammaxd = dble(gammax(k,l,m,ispec_selected_source))
-        gammayd = dble(gammay(k,l,m,ispec_selected_source))
-        gammazd = dble(gammaz(k,l,m,ispec_selected_source))
-
-        G11(k,l,m) = Mxx*xixd+Mxy*xiyd+Mxz*xizd
-        G12(k,l,m) = Mxx*etaxd+Mxy*etayd+Mxz*etazd
-        G13(k,l,m) = Mxx*gammaxd+Mxy*gammayd+Mxz*gammazd
-        G21(k,l,m) = Mxy*xixd+Myy*xiyd+Myz*xizd
-        G22(k,l,m) = Mxy*etaxd+Myy*etayd+Myz*etazd
-        G23(k,l,m) = Mxy*gammaxd+Myy*gammayd+Myz*gammazd
-        G31(k,l,m) = Mxz*xixd+Myz*xiyd+Mzz*xizd
-        G32(k,l,m) = Mxz*etaxd+Myz*etayd+Mzz*etazd
-        G33(k,l,m) = Mxz*gammaxd+Myz*gammayd+Mzz*gammazd
-
-      enddo
-    enddo
-  enddo
-
-! compute Lagrange polynomials at the source location
-  call lagrange_any(xi_source,NGLLX,xigll,hxis,hpxis)
-  call lagrange_any(eta_source,NGLLY,yigll,hetas,hpetas)
-  call lagrange_any(gamma_source,NGLLZ,zigll,hgammas,hpgammas)
-
-! calculate source array
-  do m=1,NGLLZ
-    do l=1,NGLLY
-      do k=1,NGLLX
-        call multiply_arrays_source(sourcearrayd,G11,G12,G13,G21,G22,G23, &
-                  G31,G32,G33,hxis,hpxis,hetas,hpetas,hgammas,hpgammas,k,l,m)
-      enddo
-    enddo
-  enddo
-
-! distinguish between single and double precision for reals
-  if(CUSTOM_REAL == SIZE_REAL) then
-    sourcearray(:,:,:,:) = sngl(sourcearrayd(:,:,:,:))
-  else
-    sourcearray(:,:,:,:) = sourcearrayd(:,:,:,:)
-  endif
-
-  end subroutine compute_arrays_source
-
-!=============================================================================
-
-  subroutine compute_arrays_adjoint_source(myrank, adj_source_file, &
-                    xi_receiver,eta_receiver,gamma_receiver, adj_sourcearray, &
-                    xigll,yigll,zigll, &
-                    it_sub_adj,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC)
-
-  implicit none
-
-  include 'constants.h'
-
-! input
-  integer myrank, NSTEP, it_sub_adj, NTSTEP_BETWEEN_READ_ADJSRC
-
-  double precision xi_receiver, eta_receiver, gamma_receiver
-
-  character(len=*) adj_source_file
-
-! output
-  real(kind=CUSTOM_REAL),dimension(NTSTEP_BETWEEN_READ_ADJSRC,NDIM,NGLLX,NGLLY,NGLLZ) :: adj_sourcearray
-
-! Gauss-Lobatto-Legendre points of integration and weights
-  double precision, dimension(NGLLX) :: xigll
-  double precision, dimension(NGLLY) :: yigll
-  double precision, dimension(NGLLZ) :: zigll
-
-  double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY), &
-        hgammar(NGLLZ), hpgammar(NGLLZ)
-
-  real(kind=CUSTOM_REAL) :: adj_src(NTSTEP_BETWEEN_READ_ADJSRC,NDIM)
-
-  integer icomp, itime, i, j, k, ios, it_start, it_end
-  double precision :: junk
-  ! note: should have same order as orientation in write_seismograms_to_file()
-  character(len=3),dimension(NDIM) :: comp != (/ "BHE", "BHN", "BHZ" /)
-  character(len=256) :: filename
-
-  ! gets channel names
-  do icomp=1,NDIM
-    call write_channel_name(icomp,comp(icomp))
-  enddo
-
-  ! range of the block we need to read
-  it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1
-  it_end   = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1
-
-  !adj_sourcearray(:,:,:,:,:) = 0.
-  adj_src = 0._CUSTOM_REAL
-
-  ! loops over components
-  do icomp = 1, NDIM
-
-    filename = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/../SEM/'//trim(adj_source_file) // '.'// comp(icomp) // '.adj'
-    open(unit=IIN,file=trim(filename),status='old',action='read',iostat = ios)
-    if (ios /= 0) cycle ! cycles to next file
-    !if (ios /= 0) call exit_MPI(myrank, ' file '//trim(filename)//'does not exist')
-
-    ! reads in adjoint source trace
-    !! skip unused blocks
-    do itime = 1, it_start-1
-      read(IIN,*,iostat=ios) junk, junk
-      if( ios /= 0 ) &
-        call exit_MPI(myrank, &
-          'file '//trim(filename)//' has wrong length, please check with your simulation duration (1111)')
-    enddo
-    !! read the block we need
-    do itime = it_start, it_end
-      read(IIN,*,iostat=ios) junk, adj_src(itime-it_start+1,icomp)
-      !!! used to check whether we read the correct block
-      ! if (icomp==1)      print *, junk, adj_src(itime-it_start+1,icomp)
-      if( ios /= 0 ) &
-        call exit_MPI(myrank, &
-          'file '//trim(filename)//' has wrong length, please check with your simulation duration (2222)')
-    enddo
-    close(IIN)
-
-  enddo
-
-  ! lagrange interpolators for receiver location
-  call lagrange_any(xi_receiver,NGLLX,xigll,hxir,hpxir)
-  call lagrange_any(eta_receiver,NGLLY,yigll,hetar,hpetar)
-  call lagrange_any(gamma_receiver,NGLLZ,zigll,hgammar,hpgammar)
-
-  ! interpolates adjoint source onto GLL points within this element
-  do k = 1, NGLLZ
-    do j = 1, NGLLY
-      do i = 1, NGLLX
-        adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
-      enddo
-    enddo
-  enddo
-
-end subroutine compute_arrays_adjoint_source
-
-
-! =======================================================================
-
-! compute array for acoustic source
-  subroutine compute_arrays_source_acoustic(xi_source,eta_source,gamma_source,&
-                        sourcearray,xigll,yigll,zigll,factor_source)
-
-  implicit none
-
-  include "constants.h"
-
-  double precision :: xi_source,eta_source,gamma_source
-  real(kind=CUSTOM_REAL) :: factor_source
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
-
-! Gauss-Lobatto-Legendre points of integration and weights
-  double precision, dimension(NGLLX) :: xigll
-  double precision, dimension(NGLLY) :: yigll
-  double precision, dimension(NGLLZ) :: zigll
-
-! local parameters
-! source arrays
-  double precision, dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearrayd
-  double precision, dimension(NGLLX) :: hxis,hpxis
-  double precision, dimension(NGLLY) :: hetas,hpetas
-  double precision, dimension(NGLLZ) :: hgammas,hpgammas
-  integer :: i,j,k
-
-! initializes
-  sourcearray(:,:,:,:) = 0._CUSTOM_REAL
-  sourcearrayd(:,:,:,:) = 0.d0
-
-! computes Lagrange polynomials at the source location
-  call lagrange_any(xi_source,NGLLX,xigll,hxis,hpxis)
-  call lagrange_any(eta_source,NGLLY,yigll,hetas,hpetas)
-  call lagrange_any(gamma_source,NGLLZ,zigll,hgammas,hpgammas)
-
-! calculates source array for interpolated location
-  do k=1,NGLLZ
-    do j=1,NGLLY
-      do i=1,NGLLX
-        ! identical source array components in x,y,z-direction
-        sourcearrayd(:,i,j,k) = hxis(i)*hetas(j)*hgammas(k)*dble(factor_source)
-      enddo
-    enddo
-  enddo
-
-! distinguish between single and double precision for reals
-  if(CUSTOM_REAL == SIZE_REAL) then
-    sourcearray(:,:,:,:) = sngl(sourcearrayd(:,:,:,:))
-  else
-    sourcearray(:,:,:,:) = sourcearrayd(:,:,:,:)
-  endif
-
-  end subroutine compute_arrays_source_acoustic
-
-
-! testing read in adjoint sources block by block
-
-!!!the original version
-!!!
-!!!subroutine compute_arrays_adjoint_source(myrank, adj_source_file, &
-!!!                    xi_receiver,eta_receiver,gamma_receiver, adj_sourcearray, &
-!!!                    xigll,yigll,zigll,NSTEP)
-!!!
-!!!
-!!!  implicit none
-!!!
-!!!  include 'constants.h'
-!!!
-!!!! input
-!!!  integer myrank, NSTEP
-!!!
-!!!  double precision xi_receiver, eta_receiver, gamma_receiver
-!!!
-!!!  character(len=*) adj_source_file
-!!!
-!!!! output
-!!!  real(kind=CUSTOM_REAL),dimension(NSTEP,NDIM,NGLLX,NGLLY,NGLLZ) :: adj_sourcearray
-!!!
-!!!! Gauss-Lobatto-Legendre points of integration and weights
-!!!  double precision, dimension(NGLLX) :: xigll
-!!!  double precision, dimension(NGLLY) :: yigll
-!!!  double precision, dimension(NGLLZ) :: zigll
-!!!
-!!!  double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY), &
-!!!        hgammar(NGLLZ), hpgammar(NGLLZ)
-!!!
-!!!  real(kind=CUSTOM_REAL) :: adj_src(NSTEP,NDIM)
-!!!
-!!!  integer icomp, itime, i, j, k, ios
-!!!  double precision :: junk
-!!!  ! note: should have same order as orientation in write_seismograms_to_file()
-!!!  character(len=3),dimension(NDIM) :: comp = (/ "BHE", "BHN", "BHZ" /)
-!!!  character(len=256) :: filename
-!!!
-!!!  !adj_sourcearray(:,:,:,:,:) = 0.
-!!!  adj_src = 0._CUSTOM_REAL
-!!!
-!!!  ! loops over components
-!!!  do icomp = 1, NDIM
-!!!
-!!!    filename = 'SEM/'//trim(adj_source_file) // '.'// comp(icomp) // '.adj'
-!!!    open(unit=IIN,file=trim(filename),status='old',action='read',iostat = ios)
-!!!    if (ios /= 0) cycle ! cycles to next file
-!!!    !if (ios /= 0) call exit_MPI(myrank, ' file '//trim(filename)//'does not exist')
-!!!
-!!!    ! reads in adjoint source trace
-!!!    do itime = 1, NSTEP
-!!!
-!!!      ! things become a bit tricky because of the Newark time scheme at
-!!!      ! the very beginning of the time loop. however, when we read in the backward/reconstructed
-!!!      ! wavefields at the end of the first time loop, we can use the adjoint source index from 1 to NSTEP
-!!!      ! (and then access it in reverse NSTEP-it+1  down to 1, for it=1,..NSTEP; see compute_add_sources*.f90).
-!!!      read(IIN,*,iostat=ios) junk, adj_src(itime,icomp)
-!!!      if( ios /= 0 ) &
-!!!        call exit_MPI(myrank, &
-!!!          'file '//trim(filename)//' has wrong length, please check with your simulation duration')
-!!!    enddo
-!!!    close(IIN)
-!!!
-!!!  enddo
-!!!
-!!!  ! lagrange interpolators for receiver location
-!!!  call lagrange_any(xi_receiver,NGLLX,xigll,hxir,hpxir)
-!!!  call lagrange_any(eta_receiver,NGLLY,yigll,hetar,hpetar)
-!!!  call lagrange_any(gamma_receiver,NGLLZ,zigll,hgammar,hpgammar)
-!!!
-!!!  ! interpolates adjoint source onto GLL points within this element
-!!!  do k = 1, NGLLZ
-!!!    do j = 1, NGLLY
-!!!      do i = 1, NGLLX
-!!!        adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
-!!!      enddo
-!!!    enddo
-!!!  enddo
-!!!
-!!!end subroutine compute_arrays_adjoint_source
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine compute_arrays_source(ispec_selected_source, &
+             xi_source,eta_source,gamma_source,sourcearray, &
+             Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
+             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+             xigll,yigll,zigll,nspec)
+
+  implicit none
+
+  include "constants.h"
+
+  integer ispec_selected_source
+  integer nspec
+
+  double precision xi_source,eta_source,gamma_source
+  double precision Mxx,Myy,Mzz,Mxy,Mxz,Myz
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xix,xiy,xiz,etax,etay,etaz, &
+        gammax,gammay,gammaz
+
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
+
+  double precision xixd,xiyd,xizd,etaxd,etayd,etazd,gammaxd,gammayd,gammazd
+
+! Gauss-Lobatto-Legendre points of integration and weights
+  double precision, dimension(NGLLX) :: xigll
+  double precision, dimension(NGLLY) :: yigll
+  double precision, dimension(NGLLZ) :: zigll
+
+! source arrays
+  double precision, dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearrayd
+  double precision, dimension(NGLLX,NGLLY,NGLLZ) :: G11,G12,G13,G21,G22,G23,G31,G32,G33
+  double precision, dimension(NGLLX) :: hxis,hpxis
+  double precision, dimension(NGLLY) :: hetas,hpetas
+  double precision, dimension(NGLLZ) :: hgammas,hpgammas
+
+  integer k,l,m
+
+! calculate G_ij for general source location
+! the source does not necessarily correspond to a Gauss-Lobatto point
+  do m=1,NGLLZ
+    do l=1,NGLLY
+      do k=1,NGLLX
+
+        xixd    = dble(xix(k,l,m,ispec_selected_source))
+        xiyd    = dble(xiy(k,l,m,ispec_selected_source))
+        xizd    = dble(xiz(k,l,m,ispec_selected_source))
+        etaxd   = dble(etax(k,l,m,ispec_selected_source))
+        etayd   = dble(etay(k,l,m,ispec_selected_source))
+        etazd   = dble(etaz(k,l,m,ispec_selected_source))
+        gammaxd = dble(gammax(k,l,m,ispec_selected_source))
+        gammayd = dble(gammay(k,l,m,ispec_selected_source))
+        gammazd = dble(gammaz(k,l,m,ispec_selected_source))
+
+        G11(k,l,m) = Mxx*xixd+Mxy*xiyd+Mxz*xizd
+        G12(k,l,m) = Mxx*etaxd+Mxy*etayd+Mxz*etazd
+        G13(k,l,m) = Mxx*gammaxd+Mxy*gammayd+Mxz*gammazd
+        G21(k,l,m) = Mxy*xixd+Myy*xiyd+Myz*xizd
+        G22(k,l,m) = Mxy*etaxd+Myy*etayd+Myz*etazd
+        G23(k,l,m) = Mxy*gammaxd+Myy*gammayd+Myz*gammazd
+        G31(k,l,m) = Mxz*xixd+Myz*xiyd+Mzz*xizd
+        G32(k,l,m) = Mxz*etaxd+Myz*etayd+Mzz*etazd
+        G33(k,l,m) = Mxz*gammaxd+Myz*gammayd+Mzz*gammazd
+
+      enddo
+    enddo
+  enddo
+
+! compute Lagrange polynomials at the source location
+  call lagrange_any(xi_source,NGLLX,xigll,hxis,hpxis)
+  call lagrange_any(eta_source,NGLLY,yigll,hetas,hpetas)
+  call lagrange_any(gamma_source,NGLLZ,zigll,hgammas,hpgammas)
+
+! calculate source array
+  do m=1,NGLLZ
+    do l=1,NGLLY
+      do k=1,NGLLX
+        call multiply_arrays_source(sourcearrayd,G11,G12,G13,G21,G22,G23, &
+                  G31,G32,G33,hxis,hpxis,hetas,hpetas,hgammas,hpgammas,k,l,m)
+      enddo
+    enddo
+  enddo
+
+! distinguish between single and double precision for reals
+  if(CUSTOM_REAL == SIZE_REAL) then
+    sourcearray(:,:,:,:) = sngl(sourcearrayd(:,:,:,:))
+  else
+    sourcearray(:,:,:,:) = sourcearrayd(:,:,:,:)
+  endif
+
+  end subroutine compute_arrays_source
+
+!=============================================================================
+
+  subroutine compute_arrays_adjoint_source(myrank, adj_source_file, &
+                    xi_receiver,eta_receiver,gamma_receiver, adj_sourcearray, &
+                    xigll,yigll,zigll, &
+                    it_sub_adj,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC)
+
+  implicit none
+
+  include 'constants.h'
+
+! input
+  integer myrank, NSTEP, it_sub_adj, NTSTEP_BETWEEN_READ_ADJSRC
+
+  double precision xi_receiver, eta_receiver, gamma_receiver
+
+  character(len=*) adj_source_file
+
+! output
+  real(kind=CUSTOM_REAL),dimension(NTSTEP_BETWEEN_READ_ADJSRC,NDIM,NGLLX,NGLLY,NGLLZ) :: adj_sourcearray
+
+! Gauss-Lobatto-Legendre points of integration and weights
+  double precision, dimension(NGLLX) :: xigll
+  double precision, dimension(NGLLY) :: yigll
+  double precision, dimension(NGLLZ) :: zigll
+
+  double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY), &
+        hgammar(NGLLZ), hpgammar(NGLLZ)
+
+  real(kind=CUSTOM_REAL) :: adj_src(NTSTEP_BETWEEN_READ_ADJSRC,NDIM)
+
+  integer icomp, itime, i, j, k, ios, it_start, it_end
+  double precision :: junk
+  ! note: should have same order as orientation in write_seismograms_to_file()
+  character(len=3),dimension(NDIM) :: comp != (/ "BHE", "BHN", "BHZ" /)
+  character(len=256) :: filename
+
+  ! gets channel names
+  do icomp=1,NDIM
+    call write_channel_name(icomp,comp(icomp))
+  enddo
+
+  ! range of the block we need to read
+  it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1
+  it_end   = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1
+
+  !adj_sourcearray(:,:,:,:,:) = 0.
+  adj_src = 0._CUSTOM_REAL
+
+  ! loops over components
+  do icomp = 1, NDIM
+
+    filename = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/../SEM/'//trim(adj_source_file)//'.'//comp(icomp)//'.adj'
+    open(unit=IIN,file=trim(filename),status='old',action='read',iostat = ios)
+    ! cycles to next file (this might be more error prone)
+    !if (ios /= 0) cycle
+    ! requires adjoint files to exist (users will have to be more careful in setting up adjoint runs)
+    if (ios /= 0) call exit_MPI(myrank, ' file '//trim(filename)//'does not exist')
+
+    ! reads in adjoint source trace
+    !! skip unused blocks
+    do itime = 1, it_start-1
+      read(IIN,*,iostat=ios) junk, junk
+      if( ios /= 0 ) &
+        call exit_MPI(myrank, &
+          'file '//trim(filename)//' has wrong length, please check with your simulation duration (1111)')
+    enddo
+    !! read the block we need
+    do itime = it_start, it_end
+      read(IIN,*,iostat=ios) junk, adj_src(itime-it_start+1,icomp)
+      !!! used to check whether we read the correct block
+      ! if (icomp==1)      print *, junk, adj_src(itime-it_start+1,icomp)
+      if( ios /= 0 ) &
+        call exit_MPI(myrank, &
+          'file '//trim(filename)//' has wrong length, please check with your simulation duration (2222)')
+    enddo
+    close(IIN)
+
+  enddo
+
+  ! lagrange interpolators for receiver location
+  call lagrange_any(xi_receiver,NGLLX,xigll,hxir,hpxir)
+  call lagrange_any(eta_receiver,NGLLY,yigll,hetar,hpetar)
+  call lagrange_any(gamma_receiver,NGLLZ,zigll,hgammar,hpgammar)
+
+  ! interpolates adjoint source onto GLL points within this element
+  do k = 1, NGLLZ
+    do j = 1, NGLLY
+      do i = 1, NGLLX
+        adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
+      enddo
+    enddo
+  enddo
+
+end subroutine compute_arrays_adjoint_source
+
+
+! =======================================================================
+
+! compute array for acoustic source
+  subroutine compute_arrays_source_acoustic(xi_source,eta_source,gamma_source,&
+                        sourcearray,xigll,yigll,zigll,factor_source)
+
+  implicit none
+
+  include "constants.h"
+
+  double precision :: xi_source,eta_source,gamma_source
+  real(kind=CUSTOM_REAL) :: factor_source
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
+
+! Gauss-Lobatto-Legendre points of integration and weights
+  double precision, dimension(NGLLX) :: xigll
+  double precision, dimension(NGLLY) :: yigll
+  double precision, dimension(NGLLZ) :: zigll
+
+! local parameters
+! source arrays
+  double precision, dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearrayd
+  double precision, dimension(NGLLX) :: hxis,hpxis
+  double precision, dimension(NGLLY) :: hetas,hpetas
+  double precision, dimension(NGLLZ) :: hgammas,hpgammas
+  integer :: i,j,k
+
+! initializes
+  sourcearray(:,:,:,:) = 0._CUSTOM_REAL
+  sourcearrayd(:,:,:,:) = 0.d0
+
+! computes Lagrange polynomials at the source location
+  call lagrange_any(xi_source,NGLLX,xigll,hxis,hpxis)
+  call lagrange_any(eta_source,NGLLY,yigll,hetas,hpetas)
+  call lagrange_any(gamma_source,NGLLZ,zigll,hgammas,hpgammas)
+
+! calculates source array for interpolated location
+  do k=1,NGLLZ
+    do j=1,NGLLY
+      do i=1,NGLLX
+        ! identical source array components in x,y,z-direction
+        sourcearrayd(:,i,j,k) = hxis(i)*hetas(j)*hgammas(k)*dble(factor_source)
+      enddo
+    enddo
+  enddo
+
+! distinguish between single and double precision for reals
+  if(CUSTOM_REAL == SIZE_REAL) then
+    sourcearray(:,:,:,:) = sngl(sourcearrayd(:,:,:,:))
+  else
+    sourcearray(:,:,:,:) = sourcearrayd(:,:,:,:)
+  endif
+
+  end subroutine compute_arrays_source_acoustic
+
+
+! testing read in adjoint sources block by block
+
+!!!the original version
+!!!
+!!!subroutine compute_arrays_adjoint_source(myrank, adj_source_file, &
+!!!                    xi_receiver,eta_receiver,gamma_receiver, adj_sourcearray, &
+!!!                    xigll,yigll,zigll,NSTEP)
+!!!
+!!!
+!!!  implicit none
+!!!
+!!!  include 'constants.h'
+!!!
+!!!! input
+!!!  integer myrank, NSTEP
+!!!
+!!!  double precision xi_receiver, eta_receiver, gamma_receiver
+!!!
+!!!  character(len=*) adj_source_file
+!!!
+!!!! output
+!!!  real(kind=CUSTOM_REAL),dimension(NSTEP,NDIM,NGLLX,NGLLY,NGLLZ) :: adj_sourcearray
+!!!
+!!!! Gauss-Lobatto-Legendre points of integration and weights
+!!!  double precision, dimension(NGLLX) :: xigll
+!!!  double precision, dimension(NGLLY) :: yigll
+!!!  double precision, dimension(NGLLZ) :: zigll
+!!!
+!!!  double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY), &
+!!!        hgammar(NGLLZ), hpgammar(NGLLZ)
+!!!
+!!!  real(kind=CUSTOM_REAL) :: adj_src(NSTEP,NDIM)
+!!!
+!!!  integer icomp, itime, i, j, k, ios
+!!!  double precision :: junk
+!!!  ! note: should have same order as orientation in write_seismograms_to_file()
+!!!  character(len=3),dimension(NDIM) :: comp = (/ "BHE", "BHN", "BHZ" /)
+!!!  character(len=256) :: filename
+!!!
+!!!  !adj_sourcearray(:,:,:,:,:) = 0.
+!!!  adj_src = 0._CUSTOM_REAL
+!!!
+!!!  ! loops over components
+!!!  do icomp = 1, NDIM
+!!!
+!!!    filename = 'SEM/'//trim(adj_source_file) // '.'// comp(icomp) // '.adj'
+!!!    open(unit=IIN,file=trim(filename),status='old',action='read',iostat = ios)
+!!!    if (ios /= 0) cycle ! cycles to next file
+!!!    !if (ios /= 0) call exit_MPI(myrank, ' file '//trim(filename)//'does not exist')
+!!!
+!!!    ! reads in adjoint source trace
+!!!    do itime = 1, NSTEP
+!!!
+!!!      ! things become a bit tricky because of the Newark time scheme at
+!!!      ! the very beginning of the time loop. however, when we read in the backward/reconstructed
+!!!      ! wavefields at the end of the first time loop, we can use the adjoint source index from 1 to NSTEP
+!!!      ! (and then access it in reverse NSTEP-it+1  down to 1, for it=1,..NSTEP; see compute_add_sources*.f90).
+!!!      read(IIN,*,iostat=ios) junk, adj_src(itime,icomp)
+!!!      if( ios /= 0 ) &
+!!!        call exit_MPI(myrank, &
+!!!          'file '//trim(filename)//' has wrong length, please check with your simulation duration')
+!!!    enddo
+!!!    close(IIN)
+!!!
+!!!  enddo
+!!!
+!!!  ! lagrange interpolators for receiver location
+!!!  call lagrange_any(xi_receiver,NGLLX,xigll,hxir,hpxir)
+!!!  call lagrange_any(eta_receiver,NGLLY,yigll,hetar,hpetar)
+!!!  call lagrange_any(gamma_receiver,NGLLZ,zigll,hgammar,hpgammar)
+!!!
+!!!  ! interpolates adjoint source onto GLL points within this element
+!!!  do k = 1, NGLLZ
+!!!    do j = 1, NGLLY
+!!!      do i = 1, NGLLX
+!!!        adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
+!!!      enddo
+!!!    enddo
+!!!  enddo
+!!!
+!!!end subroutine compute_arrays_adjoint_source
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in	2011-10-30 02:25:28 UTC (rev 19129)
@@ -73,6 +73,11 @@
 ! for optimized routines by Deville et al. (2002)
   integer, parameter :: m1 = NGLLX, m2 = NGLLX * NGLLY
 
+!!-----------------------------------------------------------
+!!
+!! seismogram and i/o output
+!!
+!!-----------------------------------------------------------
 ! ouput format of seismograms, ASCII or binary
   logical, parameter :: SEISMOGRAMS_BINARY = .false.
 ! output format of seismograms, Seismic Unix (binary with 240-byte-headers)
@@ -104,6 +109,12 @@
 ! ignore variable name field (junk) at the beginning of each input line
   logical, parameter :: IGNORE_JUNK = .true.,DONT_IGNORE_JUNK = .false.
 
+!!-----------------------------------------------------------
+!!
+!! source/receiver setup
+!!
+!!-----------------------------------------------------------
+
 ! flag to print the details of source location
   logical, parameter :: SHOW_DETAILS_LOCATE_SOURCE = .false.
 
@@ -126,6 +137,12 @@
 ! use directory OUTPUT_FILES/ for seismogram output
   logical,parameter :: USE_OUTPUT_FILES_PATH = .true.
 
+!!-----------------------------------------------------------
+!!
+!! absorption and PML
+!!
+!!-----------------------------------------------------------
+
 ! absorb top surface
 ! (defined in mesh as 'free_surface_file')
   logical,parameter :: ABSORB_FREE_SURFACE = .false.
@@ -136,6 +153,12 @@
 ! (user parameters can be specified in PML_init.f90)
   logical,parameter :: ABSORB_USE_PML = .false.
 
+!!-----------------------------------------------------------
+!!
+!! directory structure
+!!
+!!-----------------------------------------------------------
+
 ! paths for inputs and outputs files
   character(len=256), parameter :: IN_DATA_FILES_PATH = '../in_data_files/'
   character(len=256), parameter :: MF_IN_DATA_FILES_PATH = '../in_data_files/meshfem3D_files/'
@@ -197,6 +220,12 @@
   logical, parameter :: EXTERNAL_MESH_MOVIE_SURFACE = .false.
   logical, parameter :: EXTERNAL_MESH_CREATE_SHAKEMAP = .false.
 
+!!-----------------------------------------------------------
+!!
+!! image outputs
+!!
+!!-----------------------------------------------------------
+
 ! plots VTK cross-section planes instead of model surface
 ! (EXPERIMENTAL feature)
 ! (requires EXTERNAL_MESH_MOVIE_SURFACE set to true)
@@ -218,6 +247,17 @@
 ! this is an absolute value for normalized coordinates in the Earth
   double precision, parameter :: SMALLVAL_TOL = 1.d-10
 
+!!-----------------------------------------------------------
+!!
+!! mesh optimization
+!!
+!!-----------------------------------------------------------
+
+! add mesh coloring for the GPU + MPI implementation
+  logical, parameter :: USE_MESH_COLORING_GPU = .false.
+  integer, parameter :: MAX_NUMBER_OF_COLORS = 10000
+  integer, parameter :: NGNOD_HEXAHEDRA = 8
+
 !------------------------------------------------------
 !----------- do not modify anything below -------------
 !------------------------------------------------------

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/convolve_source_timefunction.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/convolve_source_timefunction.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/convolve_source_timefunction.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,134 +1,134 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  program convolve_source_time_function
-
-!
-! convolve seismograms computed for a Heaviside with given source time function
-!
-
-! we mimic a triangle of half duration equal to half_duration_triangle
-! using a Gaussian having a very close shape, as explained in Figure 4.2
-! of the manual
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: i,j,N_j,number_remove,nlines
-
-  double precision :: alpha,dt,tau_j,source,exponent,t1,t2,displ1,displ2,gamma,height,half_duration_triangle
-
-  logical :: triangle
-
-  double precision, dimension(:), allocatable :: time,sem,sem_fil
-
-! read file with number of lines in input
-  open(unit=33,file='input_convolve_code.txt',status='old',action='read')
-  read(33,*) nlines
-  read(33,*) half_duration_triangle
-  read(33,*) triangle
-  close(33)
-
-! allocate arrays
-  allocate(time(nlines),sem(nlines),sem_fil(nlines))
-
-! read the input seismogram
-  do i = 1,nlines
-    read(5,*) time(i),sem(i)
-  enddo
-
-! define a Gaussian with the right exponent to mimic a triangle of equivalent half duration
-  alpha = SOURCE_DECAY_MIMIC_TRIANGLE/half_duration_triangle
-
-! compute the time step
-  dt = time(2) - time(1)
-
-! number of integers for which the source wavelet is different from zero
-  if(triangle) then
-    N_j = ceiling(half_duration_triangle/dt)
-  else
-    N_j = ceiling(1.5d0*half_duration_triangle/dt)
-  endif
-
-  do i = 1,nlines
-
-    sem_fil(i) = 0.d0
-
-    do j = -N_j,N_j
-
-      if(i > j .and. i-j <= nlines) then
-
-      tau_j = dble(j)*dt
-
-! convolve with a triangle
-    if(triangle) then
-       height = 1.d0 / half_duration_triangle
-       if(abs(tau_j) > half_duration_triangle) then
-         source = 0.d0
-       else if (tau_j < 0.d0) then
-         t1 = - N_j * dt
-         displ1 = 0.d0
-         t2 = 0.d0
-         displ2 = height
-         gamma = (tau_j - t1) / (t2 - t1)
-         source= (1.d0 - gamma) * displ1 + gamma * displ2
-       else
-         t1 = 0.d0
-         displ1 = height
-         t2 = + N_j * dt
-         displ2 = 0.d0
-         gamma = (tau_j - t1) / (t2 - t1)
-         source= (1.d0 - gamma) * displ1 + gamma * displ2
-       endif
-
-      else
-
-! convolve with a Gaussian
-        exponent = alpha**2 * tau_j**2
-        if(exponent < 50.d0) then
-          source = alpha*exp(-exponent)/sqrt(PI)
-        else
-          source = 0.d0
-        endif
-
-      endif
-
-      sem_fil(i) = sem_fil(i) + sem(i-j)*source*dt
-
-      endif
-
-    enddo
-  enddo
-
-! compute number of samples to remove from end of seismograms
-  number_remove = N_j + 1
-  do i=1,nlines - number_remove
-    write(*,*) sngl(time(i)),' ',sngl(sem_fil(i))
-  enddo
-
-  end program convolve_source_time_function
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  program convolve_source_time_function
+
+!
+! convolve seismograms computed for a Heaviside with given source time function
+!
+
+! we mimic a triangle of half duration equal to half_duration_triangle
+! using a Gaussian having a very close shape, as explained in Figure 4.2
+! of the manual
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: i,j,N_j,number_remove,nlines
+
+  double precision :: alpha,dt,tau_j,source,exponent,t1,t2,displ1,displ2,gamma,height,half_duration_triangle
+
+  logical :: triangle
+
+  double precision, dimension(:), allocatable :: time,sem,sem_fil
+
+! read file with number of lines in input
+  open(unit=33,file='input_convolve_code.txt',status='old',action='read')
+  read(33,*) nlines
+  read(33,*) half_duration_triangle
+  read(33,*) triangle
+  close(33)
+
+! allocate arrays
+  allocate(time(nlines),sem(nlines),sem_fil(nlines))
+
+! read the input seismogram
+  do i = 1,nlines
+    read(5,*) time(i),sem(i)
+  enddo
+
+! define a Gaussian with the right exponent to mimic a triangle of equivalent half duration
+  alpha = SOURCE_DECAY_MIMIC_TRIANGLE/half_duration_triangle
+
+! compute the time step
+  dt = time(2) - time(1)
+
+! number of integers for which the source wavelet is different from zero
+  if(triangle) then
+    N_j = ceiling(half_duration_triangle/dt)
+  else
+    N_j = ceiling(1.5d0*half_duration_triangle/dt)
+  endif
+
+  do i = 1,nlines
+
+    sem_fil(i) = 0.d0
+
+    do j = -N_j,N_j
+
+      if(i > j .and. i-j <= nlines) then
+
+      tau_j = dble(j)*dt
+
+! convolve with a triangle
+    if(triangle) then
+       height = 1.d0 / half_duration_triangle
+       if(abs(tau_j) > half_duration_triangle) then
+         source = 0.d0
+       else if (tau_j < 0.d0) then
+         t1 = - N_j * dt
+         displ1 = 0.d0
+         t2 = 0.d0
+         displ2 = height
+         gamma = (tau_j - t1) / (t2 - t1)
+         source= (1.d0 - gamma) * displ1 + gamma * displ2
+       else
+         t1 = 0.d0
+         displ1 = height
+         t2 = + N_j * dt
+         displ2 = 0.d0
+         gamma = (tau_j - t1) / (t2 - t1)
+         source= (1.d0 - gamma) * displ1 + gamma * displ2
+       endif
+
+      else
+
+! convolve with a Gaussian
+        exponent = alpha**2 * tau_j**2
+        if(exponent < 50.d0) then
+          source = alpha*exp(-exponent)/sqrt(PI)
+        else
+          source = 0.d0
+        endif
+
+      endif
+
+      sem_fil(i) = sem_fil(i) + sem(i-j)*source*dt
+
+      endif
+
+    enddo
+  enddo
+
+! compute number of samples to remove from end of seismograms
+  number_remove = N_j + 1
+  do i=1,nlines - number_remove
+    write(*,*) sngl(time(i)),' ',sngl(sem_fil(i))
+  enddo
+
+  end program convolve_source_time_function
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,1039 +1,1039 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!
-!---  create a movie of the vertical component of surface displacement or velocity
-!---  or a ShakeMap(R) (i.e. map of the maximum absolute value of the two horizontal components
-!---  of the velocity vector) in AVS, OpenDX or GMT format
-!
-
-  program create_movie_shakemap
-
-  implicit none
-
-  include "constants.h"
-  include "surface_from_mesher.h"
-
-!-------------------------------------------------------------------------------------------------
-! user parameters
-! threshold in percent of the maximum below which we cut the amplitude
-  logical, parameter :: APPLY_THRESHOLD = .false.
-  real(kind=CUSTOM_REAL), parameter :: THRESHOLD = 1._CUSTOM_REAL / 100._CUSTOM_REAL
-
-! coefficient of power law used for non linear scaling
-  logical, parameter :: NONLINEAR_SCALING = .false.
-  real(kind=CUSTOM_REAL), parameter :: POWER_SCALING = 0.13_CUSTOM_REAL
-
-!-------------------------------------------------------------------------------------------------
-
-  integer it,it1,it2,ivalue,nspectot_AVS_max,ispec
-  integer iformat,nframes,iframe,inumber,inorm,iscaling_shake
-  integer ibool_number,ibool_number1,ibool_number2,ibool_number3,ibool_number4
-
-  logical USE_OPENDX,USE_AVS,USE_GMT,plot_shaking_map
-
-  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: x,y,z,display
-  real(kind=CUSTOM_REAL) xcoord,ycoord,zcoord
-  real(kind=CUSTOM_REAL) vectorx,vectory,vectorz
-
-  double precision min_field_current,max_field_current,max_absol
-
-  character(len=256) outputname
-
-  integer ipoin
-
-  ! GMT
-  double precision lat,long
-
-  ! for sorting routine
-  integer npointot,ilocnum,nglob,i,j,ielm,ieoff,ispecloc
-  integer, dimension(:), allocatable :: iglob,loc,ireorder
-  logical, dimension(:), allocatable :: ifseg,mask_point
-  double precision, dimension(:), allocatable :: xp,yp,zp,xp_save,yp_save,zp_save,field_display
-
-  ! movie files stored by solver
-  real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
-         store_val_x,store_val_y,store_val_z, &
-         store_val_ux,store_val_uy,store_val_uz
-
-  ! parameters read from parameter file
-  integer NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,UTM_PROJECTION_ZONE,SIMULATION_TYPE
-  integer NSOURCES,NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY
-  logical MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
-          USE_HIGHRES_FOR_MOVIES,SUPPRESS_UTM_PROJECTION
-  integer NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO
-  double precision DT
-  double precision HDUR_MOVIE
-  logical ATTENUATION,USE_OLSEN_ATTENUATION, &
-          OCEANS,TOPOGRAPHY
-  logical ABSORBING_CONDITIONS,SAVE_FORWARD
-  logical ANISOTROPY,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
-  character(len=256) OUTPUT_FILES,LOCAL_PATH
-  integer NPROC
-  integer ier
-
-
-!--------------------------------------------
-!!!! NL NL for external meshes
-!--------------------------------------------
-  ! muting source region
-  real(kind=CUSTOM_REAL), parameter :: RADIUS_TO_MUTE = 1000._CUSTOM_REAL
-  logical, parameter :: MUTE_SOURCE = .true.
-  real(kind=CUSTOM_REAL), parameter :: X_SOURCE_EXT_MESH = -9023.021484375
-  real(kind=CUSTOM_REAL), parameter :: Y_SOURCE_EXT_MESH = 6123.611328125
-  real(kind=CUSTOM_REAL), parameter :: Z_SOURCE_EXT_MESH = 17.96331405639648
-!--------------------------------------------
-!!!! NL NL
-
-  ! order of points representing the 2D square element
-  integer,dimension(NGNOD2D_AVS_DX),parameter :: iorder = (/1,3,2,4/)
-
-
-! ************** PROGRAM STARTS HERE **************
-
-  print *
-  print *,'Recombining all movie frames to create a movie'
-  print *
-
-  print *
-  print *,'reading parameter file'
-  print *
-
-  ! read the parameter file
-  call read_parameter_file(NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
-        UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
-        ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
-        OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
-        MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
-        NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
-        SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION, &
-        NTSTEP_BETWEEN_OUTPUT_INFO,SIMULATION_TYPE,SAVE_FORWARD, &
-        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
-
-  ! get the base pathname for output files
-  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
-
-  ! only one global array for movie data, but stored for all surfaces defined
-  ! in file 'surface_from_mesher.h'
-  if(USE_HIGHRES_FOR_MOVIES) then
-     ilocnum = NSPEC_SURFACE_EXT_MESH*NGLLSQUARE
-  else
-     ilocnum = NSPEC_SURFACE_EXT_MESH*NGNOD2D_AVS_DX
-  endif
-  print*,'  moviedata element surfaces: ',NSPEC_SURFACE_EXT_MESH
-  print*,'  moviedata total elements all: ',ilocnum
-  print *
-
-  if(SAVE_DISPLACEMENT) then
-    print *,'Vertical displacement will be shown in movie'
-  else
-    print *,'Vertical velocity will be shown in movie'
-  endif
-  print *
-
-
-  ! user input
-  print *,'1 = create files in OpenDX format'
-  print *,'2 = create files in AVS UCD format'
-  print *,'3 = create files in GMT xyz Ascii long/lat/Uz format'
-  print *,'any other value = exit'
-  print *
-  print *,'enter value:'
-  read(5,*) iformat
-  if(iformat < 1 .or. iformat > 3) stop 'exiting...'
-
-  plot_shaking_map = .false.
-  print *,'movie frames have been saved every ',NTSTEP_BETWEEN_FRAMES,' time steps'
-  print *
-  print *,'enter first time step of movie (e.g. 1, enter -1 for shaking map)'
-  read(5,*) it1
-  if(it1 == 0 ) it1 = 1
-  if(it1 == -1) plot_shaking_map = .true.
-  if(.not. plot_shaking_map) then
-    print *,'enter last time step of movie (e.g. ',NSTEP,')'
-    read(5,*) it2
-    print *
-    print *,'1 = define file names using frame number'
-    print *,'2 = define file names using time step number'
-    print *,'any other value = exit'
-    print *
-    print *,'enter value:'
-    read(5,*) inumber
-    if(inumber<1 .or. inumber>2) stop 'exiting...'
-    print *
-    print *,'looping from ',it1,' to ',it2,' every ',NTSTEP_BETWEEN_FRAMES,' time steps'
-    ! count number of movie frames
-    nframes = 0
-    do it = it1,it2
-      if(mod(it,NTSTEP_BETWEEN_FRAMES) == 0) nframes = nframes + 1
-    enddo
-  else
-    ! only one frame if shaking map
-    nframes = 1
-    it1 = 1
-    it2 = 1
-  endif
-  print *
-  print *,'total number of frames will be ',nframes
-  if(nframes == 0) stop 'null number of frames'
-
-  iscaling_shake = 0
-  if(plot_shaking_map) then
-    print *
-    print *,'norm to display in shaking map:'
-    print *,'1=displacement  2=velocity  3=acceleration'
-    print *
-    read(5,*) inorm
-    if(inorm < 1 .or. inorm > 3) stop 'incorrect value of inorm'
-    print *
-    print *,'apply non-linear scaling to shaking map:'
-    print *,'1=non-linear  2=no scaling'
-    print *
-    read(5,*) iscaling_shake
-    if(iscaling_shake < 1 .or. iscaling_shake > 2) stop 'incorrect value of iscaling_shake'
-  else
-    print *
-    print *,'movie data:'
-    print *,'1= norm of velocity  2=velocity x-comp 3=velocity y-comp 4=velocity z-comp'
-    print *
-    read(5,*) inorm
-    if(inorm < 1 .or. inorm > 4) stop 'incorrect value of inorm'
-  endif
-
-! file format flags
-  if(iformat == 1) then
-    USE_OPENDX = .true.
-    USE_AVS = .false.
-    USE_GMT = .false.
-  else if(iformat == 2) then
-    USE_OPENDX = .false.
-    USE_AVS = .true.
-    USE_GMT = .false.
-  else
-    USE_OPENDX = .false.
-    USE_AVS = .false.
-    USE_GMT = .true.
-  endif
-
-  ! define the total number of elements at the surface
-  if(USE_HIGHRES_FOR_MOVIES) then
-     nspectot_AVS_max = NSPEC_SURFACE_EXT_MESH * (NGLLX-1) * (NGLLY-1)
-  else
-     nspectot_AVS_max = NSPEC_SURFACE_EXT_MESH
-  endif
-
-  ! maximum theoretical number of points at the surface
-  npointot = NGNOD2D_AVS_DX * nspectot_AVS_max
-
-  ! allocate arrays for sorting routine
-  allocate(iglob(npointot),loc(npointot), &
-          ifseg(npointot), &
-          xp(npointot),yp(npointot),zp(npointot), &
-          xp_save(npointot),yp_save(npointot),zp_save(npointot), &
-          field_display(npointot), &
-          mask_point(npointot), &
-          ireorder(npointot),stat=ier)
-  if( ier /= 0 ) stop 'error allocating arrays for sorting routine'
-
-  ! allocates data arrays
-  allocate(store_val_x(ilocnum), &
-          store_val_y(ilocnum), &
-          store_val_z(ilocnum), &
-          store_val_ux(ilocnum), &
-          store_val_uy(ilocnum), &
-          store_val_uz(ilocnum),stat=ier)
-  if( ier /= 0 ) stop 'error allocating arrays for data arrays'
-
-  if(USE_HIGHRES_FOR_MOVIES) then
-    allocate(x(NGLLX,NGLLY), &
-            y(NGLLX,NGLLY), &
-            z(NGLLX,NGLLY), &
-            display(NGLLX,NGLLY),stat=ier)
-    if( ier /= 0 ) stop 'error allocating arrays for highres'
-  endif
-
-  ! user output
-  print *
-  print *,'there are a total of ',nspectot_AVS_max,' elements at the surface'
-  print *
-  print *
-  if(APPLY_THRESHOLD .and. .not. plot_shaking_map) &
-    print *,'Will apply a threshold to amplitude below ',100.*THRESHOLD,' %'
-  if(NONLINEAR_SCALING .and. (.not. plot_shaking_map .or. iscaling_shake == 1)) &
-    print *,'Will apply a non linear scaling with coef ',POWER_SCALING
-
-
-  iframe = 0
-
-! loop on all the time steps in the range entered
-  do it = it1,it2
-
-    ! check if time step corresponds to a movie frame
-    if(mod(it,NTSTEP_BETWEEN_FRAMES) == 0 .or. plot_shaking_map) then
-
-      iframe = iframe + 1
-
-      print *
-      if(plot_shaking_map) then
-        print *,'reading shaking map snapshot'
-      else
-        print *,'reading snapshot time step ',it,' out of ',NSTEP
-      endif
-      print *
-
-      ! read all the elements from the same file
-      if(plot_shaking_map) then
-        write(outputname,"('/shakingdata')")
-      else
-        write(outputname,"('/moviedata',i6.6)") it
-      endif
-      open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(outputname),status='old', &
-            action='read',form='unformatted',iostat=ier)
-      if( ier /= 0 ) then
-        print*,'error: ',trim(OUTPUT_FILES)//trim(outputname)
-        stop 'error opening moviedata file'
-      endif
-
-      read(IOUT) store_val_x
-      read(IOUT) store_val_y
-      read(IOUT) store_val_z
-      read(IOUT) store_val_ux
-      read(IOUT) store_val_uy
-      read(IOUT) store_val_uz
-      close(IOUT)
-
-      ! clear number of elements kept
-      ispec = 0
-
-      ! reset point number
-      ipoin = 0
-
-      do ispecloc = 1, NSPEC_SURFACE_EXT_MESH
-
-        if(USE_HIGHRES_FOR_MOVIES) then
-          ! assign the OpenDX "elements"
-          do j = 1,NGLLY
-            do i = 1,NGLLX
-              ipoin = ipoin + 1
-
-              ! x,y,z coordinates
-              xcoord = store_val_x(ipoin)
-              ycoord = store_val_y(ipoin)
-              zcoord = store_val_z(ipoin)
-
-              ! note:
-              ! for shakemaps: ux = norm displacement, uy = norm velocity, uz = norm acceleration
-              ! for movies: ux = velocity x-component, uy = velocity y-component, uz = velocity z-component
-              vectorx = store_val_ux(ipoin)
-              vectory = store_val_uy(ipoin)
-              vectorz = store_val_uz(ipoin)
-
-              x(i,j) = xcoord
-              y(i,j) = ycoord
-              z(i,j) = zcoord
-
-              ! shakemap
-              if(plot_shaking_map) then
-                !!!! NL NL mute value near source
-                if ( (sqrt(((x(i,j) - (X_SOURCE_EXT_MESH))**2 + &
-                     (y(i,j) - (Y_SOURCE_EXT_MESH))**2 + &
-                     (z(i,j) - (Z_SOURCE_EXT_MESH))**2)) < RADIUS_TO_MUTE) &
-                     .and. MUTE_SOURCE) then
-
-                  display(i,j) = 0.
-                else
-                  ! chooses norm
-                  if(inorm == 1) then
-                    ! norm displacement
-                    display(i,j) = vectorx
-                  else if(inorm == 2) then
-                    ! norm velocity
-                    display(i,j) = vectory
-                  else
-                    ! norm acceleration
-                    display(i,j) = vectorz
-                  endif
-                endif
-              else
-                ! movie
-                if(inorm == 1) then
-                  ! norm of velocity
-                  display(i,j) = sqrt(vectorz**2+vectory**2+vectorx**2)
-                else if( inorm == 2 ) then
-                  ! velocity x-component
-                  display(i,j) = vectorx
-                else if( inorm == 3 ) then
-                  ! velocity y-component
-                  display(i,j) = vectory
-                else if( inorm == 4 ) then
-                  ! velocity z-component
-                  display(i,j) = vectorz
-                endif
-              endif
-
-            enddo
-          enddo
-
-          ! assign the values of the corners of the OpenDX "elements"
-          ispec = ispec + 1
-          ielm = (NGLLX-1)*(NGLLY-1)*(ispec-1)
-
-          do j = 1,NGLLY-1
-            do i = 1,NGLLX-1
-              ieoff = NGNOD2D_AVS_DX*(ielm+(i-1)+(j-1)*(NGLLX-1))
-              do ilocnum = 1,NGNOD2D_AVS_DX
-                !            do k = 1,NGNOD2D_AVS_DX
-
-
-                if(ilocnum == 1) then
-                  xp(ieoff+ilocnum) = dble(x(i,j))
-                  yp(ieoff+ilocnum) = dble(y(i,j))
-                  zp(ieoff+ilocnum) = dble(z(i,j))
-                  field_display(ieoff+ilocnum) = dble(display(i,j))
-                elseif(ilocnum == 2) then
-
-                  ! accounts for different ordering of square points
-                  xp(ieoff+ilocnum) = dble(x(i+1,j+1))
-                  yp(ieoff+ilocnum) = dble(y(i+1,j+1))
-                  zp(ieoff+ilocnum) = dble(z(i+1,j+1))
-                  field_display(ieoff+ilocnum) = dble(display(i+1,j+1))
-
-                  !                xp(ieoff+ilocnum) = dble(x(i+1,j))
-                  !                yp(ieoff+ilocnum) = dble(y(i+1,j))
-                  !                zp(ieoff+ilocnum) = dble(z(i+1,j))
-                  !                field_display(ieoff+ilocnum) = dble(display(i+1,j))
-
-                elseif(ilocnum == 3) then
-
-                  ! accounts for different ordering of square points
-                  xp(ieoff+ilocnum) = dble(x(i+1,j))
-                  yp(ieoff+ilocnum) = dble(y(i+1,j))
-                  zp(ieoff+ilocnum) = dble(z(i+1,j))
-                  field_display(ieoff+ilocnum) = dble(display(i+1,j))
-
-                  !                xp(ieoff+ilocnum) = dble(x(i+1,j+1))
-                  !                yp(ieoff+ilocnum) = dble(y(i+1,j+1))
-                  !                zp(ieoff+ilocnum) = dble(z(i+1,j+1))
-                  !                field_display(ieoff+ilocnum) = dble(display(i+1,j+1))
-                else
-                  xp(ieoff+ilocnum) = dble(x(i,j+1))
-                  yp(ieoff+ilocnum) = dble(y(i,j+1))
-                  zp(ieoff+ilocnum) = dble(z(i,j+1))
-                  field_display(ieoff+ilocnum) = dble(display(i,j+1))
-                endif
-
-              enddo
-
-              !if( j==1 .and. ispec==1) then
-              !print*,'p1',xp(ieoff+1),yp(ieoff+1),zp(ieoff+1)
-              !print*,'p2',xp(ieoff+2),yp(ieoff+2),zp(ieoff+2)
-              !print*,'p3',xp(ieoff+3),yp(ieoff+3),zp(ieoff+3)
-              !print*,'p4',xp(ieoff+4),yp(ieoff+4),zp(ieoff+4)
-              !endif
-
-            enddo
-          enddo
-
-        else
-          ! low-resolution (only spectral element corners)
-          ispec = ispec + 1
-          ieoff = NGNOD2D_AVS_DX*(ispec-1)
-
-          ! four points for each element
-          do i = 1,NGNOD2D_AVS_DX
-
-            ! accounts for different ordering of square points
-            ilocnum = iorder(i)
-
-            ipoin = ipoin + 1
-
-            xcoord = store_val_x(ipoin)
-            ycoord = store_val_y(ipoin)
-            zcoord = store_val_z(ipoin)
-
-            vectorx = store_val_ux(ipoin)
-            vectory = store_val_uy(ipoin)
-            vectorz = store_val_uz(ipoin)
-
-
-            xp(ilocnum+ieoff) = dble(xcoord)
-            yp(ilocnum+ieoff) = dble(ycoord)
-            zp(ilocnum+ieoff) = dble(zcoord)
-
-            ! shakemap
-            if(plot_shaking_map) then
-              !!!! NL NL mute value near source
-              if ( (sqrt(((dble(xcoord) - (X_SOURCE_EXT_MESH))**2 + &
-                     (dble(ycoord) - (Y_SOURCE_EXT_MESH))**2 + &
-                     (dble(zcoord) - (Z_SOURCE_EXT_MESH))**2)) < RADIUS_TO_MUTE) &
-                     .and. MUTE_SOURCE) then
-                  field_display(ilocnum+ieoff) = 0.
-              else
-                if(inorm == 1) then
-                  ! norm of displacement
-                  field_display(ilocnum+ieoff) = dble(vectorx)
-                else if(inorm == 2) then
-                  ! norm of velocity
-                  field_display(ilocnum+ieoff) = dble(vectory)
-                else
-                  ! norm of acceleration
-                  field_display(ilocnum+ieoff) = dble(vectorz)
-                endif
-              endif
-            else
-              ! movie
-              if(inorm == 1) then
-                ! norm of velocity
-                field_display(ilocnum+ieoff) = sqrt(vectorz**2+vectory**2+vectorx**2)
-              else if( inorm == 2 ) then
-                ! velocity x-component
-                field_display(ilocnum+ieoff) = vectorx
-              else if( inorm == 3 ) then
-                ! velocity y-component
-                field_display(ilocnum+ieoff) = vectory
-              else
-                ! velocity z-component
-                field_display(ilocnum+ieoff) = vectorz
-              endif
-              ! takes norm of velocity vector
-              !field_display(ilocnum+ieoff) =sqrt(vectorz**2+vectory**2+vectorx**2)
-            endif
-
-          enddo
-        endif ! USE_HIGHRES_FOR_MOVIES
-      enddo ! NSPEC_SURFACE_EXT_MESH
-
-      ! copy coordinate arrays since the sorting routine does not preserve them
-      xp_save(:) = xp(:)
-      yp_save(:) = yp(:)
-      zp_save(:) = zp(:)
-
-      ! sort the list based upon coordinates to get rid of multiples
-      print *,'sorting list of points'
-      call get_global_AVS(nspectot_AVS_max,xp,yp,zp,iglob,loc,ifseg,nglob,npointot, &
-           dble(minval(store_val_x(:))),dble(maxval(store_val_x(:))))
-
-      ! print total number of points found
-      print *
-      print *,'found a total of ',nglob,' points'
-      print *,'initial number of points (with multiples) was ',npointot
-
-
-      !  normalize and scale vector field
-
-      ! compute min and max of data value to normalize
-      min_field_current = minval(field_display(:))
-      max_field_current = maxval(field_display(:))
-
-      ! print minimum and maximum amplitude in current snapshot
-      print *
-      print *,'minimum amplitude in current snapshot = ',min_field_current
-      print *,'maximum amplitude in current snapshot = ',max_field_current
-      print *
-
-      if(plot_shaking_map) then
-        ! compute min and max of data value to normalize
-        min_field_current = minval(field_display(:))
-        max_field_current = maxval(field_display(:))
-        ! print minimum and maximum amplitude in current snapshot
-        print *
-        print *,'minimum amplitude in current snapshot after removal = ',min_field_current
-        print *,'maximum amplitude in current snapshot after removal = ',max_field_current
-        print *
-      endif
-
-      ! apply scaling in all cases for movies
-      if(.not. plot_shaking_map) then
-
-        ! make sure range is always symmetric and center is in zero
-        ! this assumption works only for fields that can be negative
-        ! would not work for norm of vector for instance
-        ! (we would lose half of the color palette if no negative values)
-        max_absol = max(abs(min_field_current),abs(max_field_current))
-        min_field_current = - max_absol
-        max_field_current = + max_absol
-
-        ! normalize field to [0:1]
-        if( abs(max_field_current - min_field_current) > TINYVAL ) &
-          field_display(:) = (field_display(:) - min_field_current) / (max_field_current - min_field_current)
-
-        ! rescale to [-1,1]
-        field_display(:) = 2.*field_display(:) - 1.
-
-        ! apply threshold to normalized field
-        if(APPLY_THRESHOLD) &
-          where(abs(field_display(:)) <= THRESHOLD) field_display = 0.
-
-        ! apply non linear scaling to normalized field if needed
-        if(NONLINEAR_SCALING) then
-          where(field_display(:) >= 0.)
-            field_display = field_display ** POWER_SCALING
-          elsewhere
-            field_display = - abs(field_display) ** POWER_SCALING
-          endwhere
-        endif
-
-        ! map back to [0,1]
-        field_display(:) = (field_display(:) + 1.) / 2.
-
-        ! map field to [0:255] for AVS color scale
-        field_display(:) = 255. * field_display(:)
-
-
-      ! apply scaling only if selected for shaking map
-      else if(NONLINEAR_SCALING .and. iscaling_shake == 1) then
-
-        ! normalize field to [0:1]
-        if( abs(max_field_current) > TINYVAL ) &
-          field_display(:) = field_display(:) / max_field_current
-
-        ! apply non linear scaling to normalized field
-        field_display = field_display ** POWER_SCALING
-
-        ! map field to [0:255] for AVS color scale
-        field_display(:) = 255. * field_display(:)
-
-      endif
-
-      !--- ****** create AVS file using sorted list ******
-
-      if(.not. plot_shaking_map) then
-        if(inumber == 1) then
-          ivalue = iframe
-        else
-          ivalue = it
-        endif
-      endif
-
-      ! create file name and open file
-      if(plot_shaking_map) then
-
-        if(USE_OPENDX) then
-          write(outputname,"('/DX_shaking_map.dx')")
-          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
-          write(11,*) 'object 1 class array type float rank 1 shape 3 items ',nglob,' data follows'
-        else if(USE_AVS) then
-          write(outputname,"('/AVS_shaking_map.inp')")
-          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
-          write(11,*) nglob,' ',nspectot_AVS_max,' 1 0 0'
-       else if(USE_GMT) then
-          write(outputname,"('/gmt_shaking_map.xyz')")
-          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
-        else
-          stop 'wrong output format selected'
-        endif
-
-      else
-
-        if(USE_OPENDX) then
-          write(outputname,"('/DX_movie_',i6.6,'.dx')") ivalue
-          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
-          write(11,*) 'object 1 class array type float rank 1 shape 3 items ',nglob,' data follows'
-        else if(USE_AVS) then
-          write(outputname,"('/AVS_movie_',i6.6,'.inp')") ivalue
-          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
-          write(11,*) nglob,' ',nspectot_AVS_max,' 1 0 0'
-       else if(USE_GMT) then
-          write(outputname,"('/gmt_movie_',i6.6,'.xyz')") ivalue
-          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
-        else
-          stop 'wrong output format selected'
-        endif
-
-      endif
-
-
-      if(USE_GMT) then
-
-! output list of points
-         mask_point = .false.
-         do ispec=1,nspectot_AVS_max
-            ieoff = NGNOD2D_AVS_DX*(ispec-1)
-! four points for each element
-            do ilocnum = 1,NGNOD2D_AVS_DX
-               ibool_number = iglob(ilocnum+ieoff)
-               if(.not. mask_point(ibool_number)) then
-                  call utm_geo(long,lat,xp_save(ilocnum+ieoff),yp_save(ilocnum+ieoff), &
-                       UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
-                  write(11,*) long,lat,field_display(ilocnum+ieoff)
-               endif
-               mask_point(ibool_number) = .true.
-            enddo
-         enddo
-
-      else
-
-        ! output list of points
-        mask_point = .false.
-        ipoin = 0
-        do ispec=1,nspectot_AVS_max
-          ieoff = NGNOD2D_AVS_DX*(ispec-1)
-          ! four points for each element
-          do ilocnum = 1,NGNOD2D_AVS_DX
-            ibool_number = iglob(ilocnum+ieoff)
-            if(.not. mask_point(ibool_number)) then
-              ipoin = ipoin + 1
-              ireorder(ibool_number) = ipoin
-              if(USE_OPENDX) then
-                write(11,*) xp_save(ilocnum+ieoff),yp_save(ilocnum+ieoff),zp_save(ilocnum+ieoff)
-              else if(USE_AVS) then
-                write(11,'(i9,3f16.6)') ireorder(ibool_number),xp_save(ilocnum+ieoff), &
-                    yp_save(ilocnum+ieoff),zp_save(ilocnum+ieoff)
-              endif
-            endif
-            mask_point(ibool_number) = .true.
-          enddo
-        enddo
-
-        if(USE_OPENDX) &
-          write(11,*) 'object 2 class array type int rank 1 shape 4 items ',nspectot_AVS_max,' data follows'
-
-        ! output list of elements
-        do ispec=1,nspectot_AVS_max
-          ieoff = NGNOD2D_AVS_DX*(ispec-1)
-          ! four points for each element
-          ibool_number1 = iglob(ieoff + 1)
-          ibool_number2 = iglob(ieoff + 2)
-          ibool_number3 = iglob(ieoff + 3)
-          ibool_number4 = iglob(ieoff + 4)
-          if(USE_OPENDX) then
-            ! point order in OpenDX is 1,4,2,3 *not* 1,2,3,4 as in AVS
-            write(11,"(i10,1x,i10,1x,i10,1x,i10)") ireorder(ibool_number1)-1, &
-              ireorder(ibool_number4)-1,ireorder(ibool_number2)-1,ireorder(ibool_number3)-1
-          else
-            write(11,"(i10,' 1 quad ',i10,1x,i10,1x,i10,1x,i10)") ispec,ireorder(ibool_number1), &
-              ireorder(ibool_number4),ireorder(ibool_number2),ireorder(ibool_number3)
-          endif
-        enddo
-
-        if(USE_OPENDX) then
-          write(11,*) 'attribute "element type" string "quads"'
-          write(11,*) 'attribute "ref" string "positions"'
-          write(11,*) 'object 3 class array type float rank 0 items ',nglob,' data follows'
-        else
-          ! dummy text for labels
-          write(11,*) '1 1'
-          write(11,*) 'a, b'
-        endif
-
-        ! output data values
-        mask_point = .false.
-        do ispec=1,nspectot_AVS_max
-          ieoff = NGNOD2D_AVS_DX*(ispec-1)
-          ! four points for each element
-          do ilocnum = 1,NGNOD2D_AVS_DX
-            ibool_number = iglob(ilocnum+ieoff)
-            if(.not. mask_point(ibool_number)) then
-              if(USE_OPENDX) then
-                if(plot_shaking_map) then
-                  write(11,*) sngl(field_display(ilocnum+ieoff))
-                else
-                  write(11,"(f7.2)") field_display(ilocnum+ieoff)
-                endif
-              else
-                if(plot_shaking_map) then
-                  write(11,*) ireorder(ibool_number),field_display(ilocnum+ieoff)
-                else
-                  write(11,"(i10,1x,f7.2)") ireorder(ibool_number),field_display(ilocnum+ieoff)
-                endif
-              endif
-            endif
-            mask_point(ibool_number) = .true.
-          enddo
-        enddo
-
-        ! define OpenDX field
-        if(USE_OPENDX) then
-          write(11,*) 'attribute "dep" string "positions"'
-          write(11,*) 'object "irregular positions irregular connections" class field'
-          write(11,*) 'component "positions" value 1'
-          write(11,*) 'component "connections" value 2'
-          write(11,*) 'component "data" value 3'
-          write(11,*) 'end'
-        endif
-
-      ! end of test for GMT format
-      endif
-
-      close(11)
-
-    ! end of loop and test on all the time steps for all the movie images
-   endif
-enddo ! it
-
-  print *
-  print *,'done creating movie or shaking map'
-  print *
-  if(USE_OPENDX) print *,'DX files are stored in ', trim(OUTPUT_FILES), '/DX_*.dx'
-  if(USE_AVS) print *,'AVS files are stored in ', trim(OUTPUT_FILES), '/AVS_*.inp'
-  if(USE_GMT) print *,'GMT files are stored in ', trim(OUTPUT_FILES), '/gmt_*.xyz'
-  print *
-
-
-  deallocate(store_val_x)
-  deallocate(store_val_y)
-  deallocate(store_val_z)
-  deallocate(store_val_ux)
-  deallocate(store_val_uy)
-  deallocate(store_val_uz)
-
-  ! deallocate arrays for sorting routine
-  deallocate(iglob,loc)
-  deallocate(ifseg)
-  deallocate(xp,yp,zp)
-  deallocate(xp_save,yp_save,zp_save)
-  deallocate(field_display)
-  deallocate(mask_point)
-  deallocate(ireorder)
-
-  if(USE_HIGHRES_FOR_MOVIES) then
-    deallocate(x)
-    deallocate(y)
-    deallocate(z)
-    deallocate(display)
-  endif
-
-  end program create_movie_shakemap
-
-!
-!=====================================================================
-!
-
-  subroutine get_global_AVS(nspec,xp,yp,zp,iglob,loc,ifseg,nglob,npointot,UTM_X_MIN,UTM_X_MAX)
-
-! this routine MUST be in double precision to avoid sensitivity
-! to roundoff errors in the coordinates of the points
-
-! leave sorting subroutines in same source file to allow for inlining
-
-  implicit none
-
-  include "constants.h"
-
-! geometry tolerance parameter to calculate number of independent grid points
-! small value for double precision and to avoid sensitivity to roundoff
-  double precision SMALLVALTOL
-
-  integer npointot
-  integer iglob(npointot),loc(npointot)
-  logical ifseg(npointot)
-  double precision xp(npointot),yp(npointot),zp(npointot)
-  integer nspec,nglob
-
-  integer ispec,i,j,ier
-  integer ieoff,ilocnum,nseg,ioff,iseg,ig
-
-  integer, dimension(:), allocatable :: ind,ninseg,iwork
-  double precision, dimension(:), allocatable :: work
-
-  double precision UTM_X_MIN,UTM_X_MAX
-
-! define geometrical tolerance based upon typical size of the model
-    SMALLVALTOL = 1.d-10 * dabs(UTM_X_MAX - UTM_X_MIN)
-    print *, 'UTM_X_MAX', UTM_X_MAX
-    print *, 'UTM_X_MIN', UTM_X_MIN
-    print *, 'SMALLVALTOL', SMALLVALTOL
-
-! dynamically allocate arrays
-  allocate(ind(npointot), &
-          ninseg(npointot), &
-          iwork(npointot), &
-          work(npointot),stat=ier)
-  if( ier /= 0 ) stop 'error allocating arrays ind etc.'
-
-! establish initial pointers
-  do ispec=1,nspec
-    ieoff=NGNOD2D_AVS_DX*(ispec-1)
-    do ilocnum=1,NGNOD2D_AVS_DX
-      loc(ilocnum+ieoff)=ilocnum+ieoff
-    enddo
-  enddo
-
-  ifseg(:)=.false.
-
-  nseg=1
-  ifseg(1)=.true.
-  ninseg(1)=npointot
-
-  do j=1,NDIM
-
-! sort within each segment
-  ioff=1
-  do iseg=1,nseg
-    if(j == 1) then
-      call rank(xp(ioff),ind,ninseg(iseg))
-    else if(j == 2) then
-      call rank(yp(ioff),ind,ninseg(iseg))
-    else
-      call rank(zp(ioff),ind,ninseg(iseg))
-    endif
-    call swap_all(loc(ioff),xp(ioff),yp(ioff),zp(ioff),iwork,work,ind,ninseg(iseg))
-    ioff=ioff+ninseg(iseg)
-  enddo
-
-! check for jumps in current coordinate
-! compare the coordinates of the points within a small tolerance
-  if(j == 1) then
-    do i=2,npointot
-      if(dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
-    enddo
-  else if(j == 2) then
-    do i=2,npointot
-      if(dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
-    enddo
-  else
-    do i=2,npointot
-      if(dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
-    enddo
-  endif
-
-! count up number of different segments
-  nseg=0
-  do i=1,npointot
-    if(ifseg(i)) then
-      nseg=nseg+1
-      ninseg(nseg)=1
-    else
-      ninseg(nseg)=ninseg(nseg)+1
-    endif
-  enddo
-  enddo
-
-! assign global node numbers (now sorted lexicographically)
-  ig=0
-  do i=1,npointot
-    if(ifseg(i)) ig=ig+1
-    iglob(loc(i))=ig
-  enddo
-
-  nglob=ig
-
-! deallocate arrays
-  deallocate(ind)
-  deallocate(ninseg)
-  deallocate(iwork)
-  deallocate(work)
-
-  end subroutine get_global_AVS
-
-! -----------------------------------
-
-! sorting routines put in same file to allow for inlining
-
-  subroutine rank(A,IND,N)
-!
-! Use Heap Sort (Numerical Recipes)
-!
-  implicit none
-
-  integer n
-  double precision A(n)
-  integer IND(n)
-
-  integer i,j,l,ir,indx
-  double precision q
-
-  do j=1,n
-   IND(j)=j
-  enddo
-
-  if (n == 1) return
-
-  L=n/2+1
-  ir=n
-  100 CONTINUE
-   IF (l>1) THEN
-      l=l-1
-      indx=ind(l)
-      q=a(indx)
-   ELSE
-      indx=ind(ir)
-      q=a(indx)
-      ind(ir)=ind(1)
-      ir=ir-1
-      if (ir == 1) then
-         ind(1)=indx
-         return
-      endif
-   ENDIF
-   i=l
-   j=l+l
-  200    CONTINUE
-   IF (J <= IR) THEN
-      IF (J<IR) THEN
-         IF ( A(IND(j))<A(IND(j+1)) ) j=j+1
-      ENDIF
-      IF (q<A(IND(j))) THEN
-         IND(I)=IND(J)
-         I=J
-         J=J+J
-      ELSE
-         J=IR+1
-      ENDIF
-   goto 200
-   ENDIF
-   IND(I)=INDX
-  goto 100
-  end subroutine rank
-
-! ------------------------------------------------------------------
-
-  subroutine swap_all(IA,A,B,C,IW,W,ind,n)
-!
-! swap arrays IA, A, B and C according to addressing in array IND
-!
-  implicit none
-
-  integer n
-
-  integer IND(n)
-  integer IA(n),IW(n)
-  double precision A(n),B(n),C(n),W(n)
-
-  integer i
-
-  IW(:) = IA(:)
-  W(:) = A(:)
-
-  do i=1,n
-    IA(i)=IW(ind(i))
-    A(i)=W(ind(i))
-  enddo
-
-  W(:) = B(:)
-
-  do i=1,n
-    B(i)=W(ind(i))
-  enddo
-
-  W(:) = C(:)
-
-  do i=1,n
-    C(i)=W(ind(i))
-  enddo
-
-  end subroutine swap_all
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!
+!---  create a movie of the vertical component of surface displacement or velocity
+!---  or a ShakeMap(R) (i.e. map of the maximum absolute value of the two horizontal components
+!---  of the velocity vector) in AVS, OpenDX or GMT format
+!
+
+  program create_movie_shakemap
+
+  implicit none
+
+  include "constants.h"
+  include "surface_from_mesher.h"
+
+!-------------------------------------------------------------------------------------------------
+! user parameters
+! threshold in percent of the maximum below which we cut the amplitude
+  logical, parameter :: APPLY_THRESHOLD = .false.
+  real(kind=CUSTOM_REAL), parameter :: THRESHOLD = 1._CUSTOM_REAL / 100._CUSTOM_REAL
+
+! coefficient of power law used for non linear scaling
+  logical, parameter :: NONLINEAR_SCALING = .false.
+  real(kind=CUSTOM_REAL), parameter :: POWER_SCALING = 0.13_CUSTOM_REAL
+
+!-------------------------------------------------------------------------------------------------
+
+  integer it,it1,it2,ivalue,nspectot_AVS_max,ispec
+  integer iformat,nframes,iframe,inumber,inorm,iscaling_shake
+  integer ibool_number,ibool_number1,ibool_number2,ibool_number3,ibool_number4
+
+  logical USE_OPENDX,USE_AVS,USE_GMT,plot_shaking_map
+
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: x,y,z,display
+  real(kind=CUSTOM_REAL) xcoord,ycoord,zcoord
+  real(kind=CUSTOM_REAL) vectorx,vectory,vectorz
+
+  double precision min_field_current,max_field_current,max_absol
+
+  character(len=256) outputname
+
+  integer ipoin
+
+  ! GMT
+  double precision lat,long
+
+  ! for sorting routine
+  integer npointot,ilocnum,nglob,i,j,ielm,ieoff,ispecloc
+  integer, dimension(:), allocatable :: iglob,loc,ireorder
+  logical, dimension(:), allocatable :: ifseg,mask_point
+  double precision, dimension(:), allocatable :: xp,yp,zp,xp_save,yp_save,zp_save,field_display
+
+  ! movie files stored by solver
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
+         store_val_x,store_val_y,store_val_z, &
+         store_val_ux,store_val_uy,store_val_uz
+
+  ! parameters read from parameter file
+  integer NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,UTM_PROJECTION_ZONE,SIMULATION_TYPE
+  integer NSOURCES,NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY
+  logical MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+          USE_HIGHRES_FOR_MOVIES,SUPPRESS_UTM_PROJECTION
+  integer NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO
+  double precision DT
+  double precision HDUR_MOVIE
+  logical ATTENUATION,USE_OLSEN_ATTENUATION, &
+          OCEANS,TOPOGRAPHY
+  logical ABSORBING_CONDITIONS,SAVE_FORWARD
+  logical ANISOTROPY,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
+  character(len=256) OUTPUT_FILES,LOCAL_PATH
+  integer NPROC
+  integer ier
+
+
+!--------------------------------------------
+!!!! NL NL for external meshes
+!--------------------------------------------
+  ! muting source region
+  real(kind=CUSTOM_REAL), parameter :: RADIUS_TO_MUTE = 1000._CUSTOM_REAL
+  logical, parameter :: MUTE_SOURCE = .true.
+  real(kind=CUSTOM_REAL), parameter :: X_SOURCE_EXT_MESH = -9023.021484375
+  real(kind=CUSTOM_REAL), parameter :: Y_SOURCE_EXT_MESH = 6123.611328125
+  real(kind=CUSTOM_REAL), parameter :: Z_SOURCE_EXT_MESH = 17.96331405639648
+!--------------------------------------------
+!!!! NL NL
+
+  ! order of points representing the 2D square element
+  integer,dimension(NGNOD2D_AVS_DX),parameter :: iorder = (/1,3,2,4/)
+
+
+! ************** PROGRAM STARTS HERE **************
+
+  print *
+  print *,'Recombining all movie frames to create a movie'
+  print *
+
+  print *
+  print *,'reading parameter file'
+  print *
+
+  ! read the parameter file
+  call read_parameter_file(NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
+        UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+        ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
+        OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
+        MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+        NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
+        SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION, &
+        NTSTEP_BETWEEN_OUTPUT_INFO,SIMULATION_TYPE,SAVE_FORWARD, &
+        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
+
+  ! get the base pathname for output files
+  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
+
+  ! only one global array for movie data, but stored for all surfaces defined
+  ! in file 'surface_from_mesher.h'
+  if(USE_HIGHRES_FOR_MOVIES) then
+     ilocnum = NSPEC_SURFACE_EXT_MESH*NGLLSQUARE
+  else
+     ilocnum = NSPEC_SURFACE_EXT_MESH*NGNOD2D_AVS_DX
+  endif
+  print*,'  moviedata element surfaces: ',NSPEC_SURFACE_EXT_MESH
+  print*,'  moviedata total elements all: ',ilocnum
+  print *
+
+  if(SAVE_DISPLACEMENT) then
+    print *,'Vertical displacement will be shown in movie'
+  else
+    print *,'Vertical velocity will be shown in movie'
+  endif
+  print *
+
+
+  ! user input
+  print *,'1 = create files in OpenDX format'
+  print *,'2 = create files in AVS UCD format'
+  print *,'3 = create files in GMT xyz Ascii long/lat/Uz format'
+  print *,'any other value = exit'
+  print *
+  print *,'enter value:'
+  read(5,*) iformat
+  if(iformat < 1 .or. iformat > 3) stop 'exiting...'
+
+  plot_shaking_map = .false.
+  print *,'movie frames have been saved every ',NTSTEP_BETWEEN_FRAMES,' time steps'
+  print *
+  print *,'enter first time step of movie (e.g. 1, enter -1 for shaking map)'
+  read(5,*) it1
+  if(it1 == 0 ) it1 = 1
+  if(it1 == -1) plot_shaking_map = .true.
+  if(.not. plot_shaking_map) then
+    print *,'enter last time step of movie (e.g. ',NSTEP,')'
+    read(5,*) it2
+    print *
+    print *,'1 = define file names using frame number'
+    print *,'2 = define file names using time step number'
+    print *,'any other value = exit'
+    print *
+    print *,'enter value:'
+    read(5,*) inumber
+    if(inumber<1 .or. inumber>2) stop 'exiting...'
+    print *
+    print *,'looping from ',it1,' to ',it2,' every ',NTSTEP_BETWEEN_FRAMES,' time steps'
+    ! count number of movie frames
+    nframes = 0
+    do it = it1,it2
+      if(mod(it,NTSTEP_BETWEEN_FRAMES) == 0) nframes = nframes + 1
+    enddo
+  else
+    ! only one frame if shaking map
+    nframes = 1
+    it1 = 1
+    it2 = 1
+  endif
+  print *
+  print *,'total number of frames will be ',nframes
+  if(nframes == 0) stop 'null number of frames'
+
+  iscaling_shake = 0
+  if(plot_shaking_map) then
+    print *
+    print *,'norm to display in shaking map:'
+    print *,'1=displacement  2=velocity  3=acceleration'
+    print *
+    read(5,*) inorm
+    if(inorm < 1 .or. inorm > 3) stop 'incorrect value of inorm'
+    print *
+    print *,'apply non-linear scaling to shaking map:'
+    print *,'1=non-linear  2=no scaling'
+    print *
+    read(5,*) iscaling_shake
+    if(iscaling_shake < 1 .or. iscaling_shake > 2) stop 'incorrect value of iscaling_shake'
+  else
+    print *
+    print *,'movie data:'
+    print *,'1= norm of velocity  2=velocity x-comp 3=velocity y-comp 4=velocity z-comp'
+    print *
+    read(5,*) inorm
+    if(inorm < 1 .or. inorm > 4) stop 'incorrect value of inorm'
+  endif
+
+! file format flags
+  if(iformat == 1) then
+    USE_OPENDX = .true.
+    USE_AVS = .false.
+    USE_GMT = .false.
+  else if(iformat == 2) then
+    USE_OPENDX = .false.
+    USE_AVS = .true.
+    USE_GMT = .false.
+  else
+    USE_OPENDX = .false.
+    USE_AVS = .false.
+    USE_GMT = .true.
+  endif
+
+  ! define the total number of elements at the surface
+  if(USE_HIGHRES_FOR_MOVIES) then
+     nspectot_AVS_max = NSPEC_SURFACE_EXT_MESH * (NGLLX-1) * (NGLLY-1)
+  else
+     nspectot_AVS_max = NSPEC_SURFACE_EXT_MESH
+  endif
+
+  ! maximum theoretical number of points at the surface
+  npointot = NGNOD2D_AVS_DX * nspectot_AVS_max
+
+  ! allocate arrays for sorting routine
+  allocate(iglob(npointot),loc(npointot), &
+          ifseg(npointot), &
+          xp(npointot),yp(npointot),zp(npointot), &
+          xp_save(npointot),yp_save(npointot),zp_save(npointot), &
+          field_display(npointot), &
+          mask_point(npointot), &
+          ireorder(npointot),stat=ier)
+  if( ier /= 0 ) stop 'error allocating arrays for sorting routine'
+
+  ! allocates data arrays
+  allocate(store_val_x(ilocnum), &
+          store_val_y(ilocnum), &
+          store_val_z(ilocnum), &
+          store_val_ux(ilocnum), &
+          store_val_uy(ilocnum), &
+          store_val_uz(ilocnum),stat=ier)
+  if( ier /= 0 ) stop 'error allocating arrays for data arrays'
+
+  if(USE_HIGHRES_FOR_MOVIES) then
+    allocate(x(NGLLX,NGLLY), &
+            y(NGLLX,NGLLY), &
+            z(NGLLX,NGLLY), &
+            display(NGLLX,NGLLY),stat=ier)
+    if( ier /= 0 ) stop 'error allocating arrays for highres'
+  endif
+
+  ! user output
+  print *
+  print *,'there are a total of ',nspectot_AVS_max,' elements at the surface'
+  print *
+  print *
+  if(APPLY_THRESHOLD .and. .not. plot_shaking_map) &
+    print *,'Will apply a threshold to amplitude below ',100.*THRESHOLD,' %'
+  if(NONLINEAR_SCALING .and. (.not. plot_shaking_map .or. iscaling_shake == 1)) &
+    print *,'Will apply a non linear scaling with coef ',POWER_SCALING
+
+
+  iframe = 0
+
+! loop on all the time steps in the range entered
+  do it = it1,it2
+
+    ! check if time step corresponds to a movie frame
+    if(mod(it,NTSTEP_BETWEEN_FRAMES) == 0 .or. plot_shaking_map) then
+
+      iframe = iframe + 1
+
+      print *
+      if(plot_shaking_map) then
+        print *,'reading shaking map snapshot'
+      else
+        print *,'reading snapshot time step ',it,' out of ',NSTEP
+      endif
+      print *
+
+      ! read all the elements from the same file
+      if(plot_shaking_map) then
+        write(outputname,"('/shakingdata')")
+      else
+        write(outputname,"('/moviedata',i6.6)") it
+      endif
+      open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(outputname),status='old', &
+            action='read',form='unformatted',iostat=ier)
+      if( ier /= 0 ) then
+        print*,'error: ',trim(OUTPUT_FILES)//trim(outputname)
+        stop 'error opening moviedata file'
+      endif
+
+      read(IOUT) store_val_x
+      read(IOUT) store_val_y
+      read(IOUT) store_val_z
+      read(IOUT) store_val_ux
+      read(IOUT) store_val_uy
+      read(IOUT) store_val_uz
+      close(IOUT)
+
+      ! clear number of elements kept
+      ispec = 0
+
+      ! reset point number
+      ipoin = 0
+
+      do ispecloc = 1, NSPEC_SURFACE_EXT_MESH
+
+        if(USE_HIGHRES_FOR_MOVIES) then
+          ! assign the OpenDX "elements"
+          do j = 1,NGLLY
+            do i = 1,NGLLX
+              ipoin = ipoin + 1
+
+              ! x,y,z coordinates
+              xcoord = store_val_x(ipoin)
+              ycoord = store_val_y(ipoin)
+              zcoord = store_val_z(ipoin)
+
+              ! note:
+              ! for shakemaps: ux = norm displacement, uy = norm velocity, uz = norm acceleration
+              ! for movies: ux = velocity x-component, uy = velocity y-component, uz = velocity z-component
+              vectorx = store_val_ux(ipoin)
+              vectory = store_val_uy(ipoin)
+              vectorz = store_val_uz(ipoin)
+
+              x(i,j) = xcoord
+              y(i,j) = ycoord
+              z(i,j) = zcoord
+
+              ! shakemap
+              if(plot_shaking_map) then
+                !!!! NL NL mute value near source
+                if ( (sqrt(((x(i,j) - (X_SOURCE_EXT_MESH))**2 + &
+                     (y(i,j) - (Y_SOURCE_EXT_MESH))**2 + &
+                     (z(i,j) - (Z_SOURCE_EXT_MESH))**2)) < RADIUS_TO_MUTE) &
+                     .and. MUTE_SOURCE) then
+
+                  display(i,j) = 0.
+                else
+                  ! chooses norm
+                  if(inorm == 1) then
+                    ! norm displacement
+                    display(i,j) = vectorx
+                  else if(inorm == 2) then
+                    ! norm velocity
+                    display(i,j) = vectory
+                  else
+                    ! norm acceleration
+                    display(i,j) = vectorz
+                  endif
+                endif
+              else
+                ! movie
+                if(inorm == 1) then
+                  ! norm of velocity
+                  display(i,j) = sqrt(vectorz**2+vectory**2+vectorx**2)
+                else if( inorm == 2 ) then
+                  ! velocity x-component
+                  display(i,j) = vectorx
+                else if( inorm == 3 ) then
+                  ! velocity y-component
+                  display(i,j) = vectory
+                else if( inorm == 4 ) then
+                  ! velocity z-component
+                  display(i,j) = vectorz
+                endif
+              endif
+
+            enddo
+          enddo
+
+          ! assign the values of the corners of the OpenDX "elements"
+          ispec = ispec + 1
+          ielm = (NGLLX-1)*(NGLLY-1)*(ispec-1)
+
+          do j = 1,NGLLY-1
+            do i = 1,NGLLX-1
+              ieoff = NGNOD2D_AVS_DX*(ielm+(i-1)+(j-1)*(NGLLX-1))
+              do ilocnum = 1,NGNOD2D_AVS_DX
+                !            do k = 1,NGNOD2D_AVS_DX
+
+
+                if(ilocnum == 1) then
+                  xp(ieoff+ilocnum) = dble(x(i,j))
+                  yp(ieoff+ilocnum) = dble(y(i,j))
+                  zp(ieoff+ilocnum) = dble(z(i,j))
+                  field_display(ieoff+ilocnum) = dble(display(i,j))
+                elseif(ilocnum == 2) then
+
+                  ! accounts for different ordering of square points
+                  xp(ieoff+ilocnum) = dble(x(i+1,j+1))
+                  yp(ieoff+ilocnum) = dble(y(i+1,j+1))
+                  zp(ieoff+ilocnum) = dble(z(i+1,j+1))
+                  field_display(ieoff+ilocnum) = dble(display(i+1,j+1))
+
+                  !                xp(ieoff+ilocnum) = dble(x(i+1,j))
+                  !                yp(ieoff+ilocnum) = dble(y(i+1,j))
+                  !                zp(ieoff+ilocnum) = dble(z(i+1,j))
+                  !                field_display(ieoff+ilocnum) = dble(display(i+1,j))
+
+                elseif(ilocnum == 3) then
+
+                  ! accounts for different ordering of square points
+                  xp(ieoff+ilocnum) = dble(x(i+1,j))
+                  yp(ieoff+ilocnum) = dble(y(i+1,j))
+                  zp(ieoff+ilocnum) = dble(z(i+1,j))
+                  field_display(ieoff+ilocnum) = dble(display(i+1,j))
+
+                  !                xp(ieoff+ilocnum) = dble(x(i+1,j+1))
+                  !                yp(ieoff+ilocnum) = dble(y(i+1,j+1))
+                  !                zp(ieoff+ilocnum) = dble(z(i+1,j+1))
+                  !                field_display(ieoff+ilocnum) = dble(display(i+1,j+1))
+                else
+                  xp(ieoff+ilocnum) = dble(x(i,j+1))
+                  yp(ieoff+ilocnum) = dble(y(i,j+1))
+                  zp(ieoff+ilocnum) = dble(z(i,j+1))
+                  field_display(ieoff+ilocnum) = dble(display(i,j+1))
+                endif
+
+              enddo
+
+              !if( j==1 .and. ispec==1) then
+              !print*,'p1',xp(ieoff+1),yp(ieoff+1),zp(ieoff+1)
+              !print*,'p2',xp(ieoff+2),yp(ieoff+2),zp(ieoff+2)
+              !print*,'p3',xp(ieoff+3),yp(ieoff+3),zp(ieoff+3)
+              !print*,'p4',xp(ieoff+4),yp(ieoff+4),zp(ieoff+4)
+              !endif
+
+            enddo
+          enddo
+
+        else
+          ! low-resolution (only spectral element corners)
+          ispec = ispec + 1
+          ieoff = NGNOD2D_AVS_DX*(ispec-1)
+
+          ! four points for each element
+          do i = 1,NGNOD2D_AVS_DX
+
+            ! accounts for different ordering of square points
+            ilocnum = iorder(i)
+
+            ipoin = ipoin + 1
+
+            xcoord = store_val_x(ipoin)
+            ycoord = store_val_y(ipoin)
+            zcoord = store_val_z(ipoin)
+
+            vectorx = store_val_ux(ipoin)
+            vectory = store_val_uy(ipoin)
+            vectorz = store_val_uz(ipoin)
+
+
+            xp(ilocnum+ieoff) = dble(xcoord)
+            yp(ilocnum+ieoff) = dble(ycoord)
+            zp(ilocnum+ieoff) = dble(zcoord)
+
+            ! shakemap
+            if(plot_shaking_map) then
+              !!!! NL NL mute value near source
+              if ( (sqrt(((dble(xcoord) - (X_SOURCE_EXT_MESH))**2 + &
+                     (dble(ycoord) - (Y_SOURCE_EXT_MESH))**2 + &
+                     (dble(zcoord) - (Z_SOURCE_EXT_MESH))**2)) < RADIUS_TO_MUTE) &
+                     .and. MUTE_SOURCE) then
+                  field_display(ilocnum+ieoff) = 0.
+              else
+                if(inorm == 1) then
+                  ! norm of displacement
+                  field_display(ilocnum+ieoff) = dble(vectorx)
+                else if(inorm == 2) then
+                  ! norm of velocity
+                  field_display(ilocnum+ieoff) = dble(vectory)
+                else
+                  ! norm of acceleration
+                  field_display(ilocnum+ieoff) = dble(vectorz)
+                endif
+              endif
+            else
+              ! movie
+              if(inorm == 1) then
+                ! norm of velocity
+                field_display(ilocnum+ieoff) = sqrt(vectorz**2+vectory**2+vectorx**2)
+              else if( inorm == 2 ) then
+                ! velocity x-component
+                field_display(ilocnum+ieoff) = vectorx
+              else if( inorm == 3 ) then
+                ! velocity y-component
+                field_display(ilocnum+ieoff) = vectory
+              else
+                ! velocity z-component
+                field_display(ilocnum+ieoff) = vectorz
+              endif
+              ! takes norm of velocity vector
+              !field_display(ilocnum+ieoff) =sqrt(vectorz**2+vectory**2+vectorx**2)
+            endif
+
+          enddo
+        endif ! USE_HIGHRES_FOR_MOVIES
+      enddo ! NSPEC_SURFACE_EXT_MESH
+
+      ! copy coordinate arrays since the sorting routine does not preserve them
+      xp_save(:) = xp(:)
+      yp_save(:) = yp(:)
+      zp_save(:) = zp(:)
+
+      ! sort the list based upon coordinates to get rid of multiples
+      print *,'sorting list of points'
+      call get_global_AVS(nspectot_AVS_max,xp,yp,zp,iglob,loc,ifseg,nglob,npointot, &
+           dble(minval(store_val_x(:))),dble(maxval(store_val_x(:))))
+
+      ! print total number of points found
+      print *
+      print *,'found a total of ',nglob,' points'
+      print *,'initial number of points (with multiples) was ',npointot
+
+
+      !  normalize and scale vector field
+
+      ! compute min and max of data value to normalize
+      min_field_current = minval(field_display(:))
+      max_field_current = maxval(field_display(:))
+
+      ! print minimum and maximum amplitude in current snapshot
+      print *
+      print *,'minimum amplitude in current snapshot = ',min_field_current
+      print *,'maximum amplitude in current snapshot = ',max_field_current
+      print *
+
+      if(plot_shaking_map) then
+        ! compute min and max of data value to normalize
+        min_field_current = minval(field_display(:))
+        max_field_current = maxval(field_display(:))
+        ! print minimum and maximum amplitude in current snapshot
+        print *
+        print *,'minimum amplitude in current snapshot after removal = ',min_field_current
+        print *,'maximum amplitude in current snapshot after removal = ',max_field_current
+        print *
+      endif
+
+      ! apply scaling in all cases for movies
+      if(.not. plot_shaking_map) then
+
+        ! make sure range is always symmetric and center is in zero
+        ! this assumption works only for fields that can be negative
+        ! would not work for norm of vector for instance
+        ! (we would lose half of the color palette if no negative values)
+        max_absol = max(abs(min_field_current),abs(max_field_current))
+        min_field_current = - max_absol
+        max_field_current = + max_absol
+
+        ! normalize field to [0:1]
+        if( abs(max_field_current - min_field_current) > TINYVAL ) &
+          field_display(:) = (field_display(:) - min_field_current) / (max_field_current - min_field_current)
+
+        ! rescale to [-1,1]
+        field_display(:) = 2.*field_display(:) - 1.
+
+        ! apply threshold to normalized field
+        if(APPLY_THRESHOLD) &
+          where(abs(field_display(:)) <= THRESHOLD) field_display = 0.
+
+        ! apply non linear scaling to normalized field if needed
+        if(NONLINEAR_SCALING) then
+          where(field_display(:) >= 0.)
+            field_display = field_display ** POWER_SCALING
+          elsewhere
+            field_display = - abs(field_display) ** POWER_SCALING
+          endwhere
+        endif
+
+        ! map back to [0,1]
+        field_display(:) = (field_display(:) + 1.) / 2.
+
+        ! map field to [0:255] for AVS color scale
+        field_display(:) = 255. * field_display(:)
+
+
+      ! apply scaling only if selected for shaking map
+      else if(NONLINEAR_SCALING .and. iscaling_shake == 1) then
+
+        ! normalize field to [0:1]
+        if( abs(max_field_current) > TINYVAL ) &
+          field_display(:) = field_display(:) / max_field_current
+
+        ! apply non linear scaling to normalized field
+        field_display = field_display ** POWER_SCALING
+
+        ! map field to [0:255] for AVS color scale
+        field_display(:) = 255. * field_display(:)
+
+      endif
+
+      !--- ****** create AVS file using sorted list ******
+
+      if(.not. plot_shaking_map) then
+        if(inumber == 1) then
+          ivalue = iframe
+        else
+          ivalue = it
+        endif
+      endif
+
+      ! create file name and open file
+      if(plot_shaking_map) then
+
+        if(USE_OPENDX) then
+          write(outputname,"('/DX_shaking_map.dx')")
+          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+          write(11,*) 'object 1 class array type float rank 1 shape 3 items ',nglob,' data follows'
+        else if(USE_AVS) then
+          write(outputname,"('/AVS_shaking_map.inp')")
+          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+          write(11,*) nglob,' ',nspectot_AVS_max,' 1 0 0'
+       else if(USE_GMT) then
+          write(outputname,"('/gmt_shaking_map.xyz')")
+          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+        else
+          stop 'wrong output format selected'
+        endif
+
+      else
+
+        if(USE_OPENDX) then
+          write(outputname,"('/DX_movie_',i6.6,'.dx')") ivalue
+          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+          write(11,*) 'object 1 class array type float rank 1 shape 3 items ',nglob,' data follows'
+        else if(USE_AVS) then
+          write(outputname,"('/AVS_movie_',i6.6,'.inp')") ivalue
+          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+          write(11,*) nglob,' ',nspectot_AVS_max,' 1 0 0'
+       else if(USE_GMT) then
+          write(outputname,"('/gmt_movie_',i6.6,'.xyz')") ivalue
+          open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+        else
+          stop 'wrong output format selected'
+        endif
+
+      endif
+
+
+      if(USE_GMT) then
+
+! output list of points
+         mask_point = .false.
+         do ispec=1,nspectot_AVS_max
+            ieoff = NGNOD2D_AVS_DX*(ispec-1)
+! four points for each element
+            do ilocnum = 1,NGNOD2D_AVS_DX
+               ibool_number = iglob(ilocnum+ieoff)
+               if(.not. mask_point(ibool_number)) then
+                  call utm_geo(long,lat,xp_save(ilocnum+ieoff),yp_save(ilocnum+ieoff), &
+                       UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
+                  write(11,*) long,lat,field_display(ilocnum+ieoff)
+               endif
+               mask_point(ibool_number) = .true.
+            enddo
+         enddo
+
+      else
+
+        ! output list of points
+        mask_point = .false.
+        ipoin = 0
+        do ispec=1,nspectot_AVS_max
+          ieoff = NGNOD2D_AVS_DX*(ispec-1)
+          ! four points for each element
+          do ilocnum = 1,NGNOD2D_AVS_DX
+            ibool_number = iglob(ilocnum+ieoff)
+            if(.not. mask_point(ibool_number)) then
+              ipoin = ipoin + 1
+              ireorder(ibool_number) = ipoin
+              if(USE_OPENDX) then
+                write(11,*) xp_save(ilocnum+ieoff),yp_save(ilocnum+ieoff),zp_save(ilocnum+ieoff)
+              else if(USE_AVS) then
+                write(11,'(i9,3f16.6)') ireorder(ibool_number),xp_save(ilocnum+ieoff), &
+                    yp_save(ilocnum+ieoff),zp_save(ilocnum+ieoff)
+              endif
+            endif
+            mask_point(ibool_number) = .true.
+          enddo
+        enddo
+
+        if(USE_OPENDX) &
+          write(11,*) 'object 2 class array type int rank 1 shape 4 items ',nspectot_AVS_max,' data follows'
+
+        ! output list of elements
+        do ispec=1,nspectot_AVS_max
+          ieoff = NGNOD2D_AVS_DX*(ispec-1)
+          ! four points for each element
+          ibool_number1 = iglob(ieoff + 1)
+          ibool_number2 = iglob(ieoff + 2)
+          ibool_number3 = iglob(ieoff + 3)
+          ibool_number4 = iglob(ieoff + 4)
+          if(USE_OPENDX) then
+            ! point order in OpenDX is 1,4,2,3 *not* 1,2,3,4 as in AVS
+            write(11,"(i10,1x,i10,1x,i10,1x,i10)") ireorder(ibool_number1)-1, &
+              ireorder(ibool_number4)-1,ireorder(ibool_number2)-1,ireorder(ibool_number3)-1
+          else
+            write(11,"(i10,' 1 quad ',i10,1x,i10,1x,i10,1x,i10)") ispec,ireorder(ibool_number1), &
+              ireorder(ibool_number4),ireorder(ibool_number2),ireorder(ibool_number3)
+          endif
+        enddo
+
+        if(USE_OPENDX) then
+          write(11,*) 'attribute "element type" string "quads"'
+          write(11,*) 'attribute "ref" string "positions"'
+          write(11,*) 'object 3 class array type float rank 0 items ',nglob,' data follows'
+        else
+          ! dummy text for labels
+          write(11,*) '1 1'
+          write(11,*) 'a, b'
+        endif
+
+        ! output data values
+        mask_point = .false.
+        do ispec=1,nspectot_AVS_max
+          ieoff = NGNOD2D_AVS_DX*(ispec-1)
+          ! four points for each element
+          do ilocnum = 1,NGNOD2D_AVS_DX
+            ibool_number = iglob(ilocnum+ieoff)
+            if(.not. mask_point(ibool_number)) then
+              if(USE_OPENDX) then
+                if(plot_shaking_map) then
+                  write(11,*) sngl(field_display(ilocnum+ieoff))
+                else
+                  write(11,"(f7.2)") field_display(ilocnum+ieoff)
+                endif
+              else
+                if(plot_shaking_map) then
+                  write(11,*) ireorder(ibool_number),field_display(ilocnum+ieoff)
+                else
+                  write(11,"(i10,1x,f7.2)") ireorder(ibool_number),field_display(ilocnum+ieoff)
+                endif
+              endif
+            endif
+            mask_point(ibool_number) = .true.
+          enddo
+        enddo
+
+        ! define OpenDX field
+        if(USE_OPENDX) then
+          write(11,*) 'attribute "dep" string "positions"'
+          write(11,*) 'object "irregular positions irregular connections" class field'
+          write(11,*) 'component "positions" value 1'
+          write(11,*) 'component "connections" value 2'
+          write(11,*) 'component "data" value 3'
+          write(11,*) 'end'
+        endif
+
+      ! end of test for GMT format
+      endif
+
+      close(11)
+
+    ! end of loop and test on all the time steps for all the movie images
+   endif
+enddo ! it
+
+  print *
+  print *,'done creating movie or shaking map'
+  print *
+  if(USE_OPENDX) print *,'DX files are stored in ', trim(OUTPUT_FILES), '/DX_*.dx'
+  if(USE_AVS) print *,'AVS files are stored in ', trim(OUTPUT_FILES), '/AVS_*.inp'
+  if(USE_GMT) print *,'GMT files are stored in ', trim(OUTPUT_FILES), '/gmt_*.xyz'
+  print *
+
+
+  deallocate(store_val_x)
+  deallocate(store_val_y)
+  deallocate(store_val_z)
+  deallocate(store_val_ux)
+  deallocate(store_val_uy)
+  deallocate(store_val_uz)
+
+  ! deallocate arrays for sorting routine
+  deallocate(iglob,loc)
+  deallocate(ifseg)
+  deallocate(xp,yp,zp)
+  deallocate(xp_save,yp_save,zp_save)
+  deallocate(field_display)
+  deallocate(mask_point)
+  deallocate(ireorder)
+
+  if(USE_HIGHRES_FOR_MOVIES) then
+    deallocate(x)
+    deallocate(y)
+    deallocate(z)
+    deallocate(display)
+  endif
+
+  end program create_movie_shakemap
+
+!
+!=====================================================================
+!
+
+  subroutine get_global_AVS(nspec,xp,yp,zp,iglob,loc,ifseg,nglob,npointot,UTM_X_MIN,UTM_X_MAX)
+
+! this routine MUST be in double precision to avoid sensitivity
+! to roundoff errors in the coordinates of the points
+
+! leave sorting subroutines in same source file to allow for inlining
+
+  implicit none
+
+  include "constants.h"
+
+! geometry tolerance parameter to calculate number of independent grid points
+! small value for double precision and to avoid sensitivity to roundoff
+  double precision SMALLVALTOL
+
+  integer npointot
+  integer iglob(npointot),loc(npointot)
+  logical ifseg(npointot)
+  double precision xp(npointot),yp(npointot),zp(npointot)
+  integer nspec,nglob
+
+  integer ispec,i,j,ier
+  integer ieoff,ilocnum,nseg,ioff,iseg,ig
+
+  integer, dimension(:), allocatable :: ind,ninseg,iwork
+  double precision, dimension(:), allocatable :: work
+
+  double precision UTM_X_MIN,UTM_X_MAX
+
+! define geometrical tolerance based upon typical size of the model
+    SMALLVALTOL = 1.d-10 * dabs(UTM_X_MAX - UTM_X_MIN)
+    print *, 'UTM_X_MAX', UTM_X_MAX
+    print *, 'UTM_X_MIN', UTM_X_MIN
+    print *, 'SMALLVALTOL', SMALLVALTOL
+
+! dynamically allocate arrays
+  allocate(ind(npointot), &
+          ninseg(npointot), &
+          iwork(npointot), &
+          work(npointot),stat=ier)
+  if( ier /= 0 ) stop 'error allocating arrays ind etc.'
+
+! establish initial pointers
+  do ispec=1,nspec
+    ieoff=NGNOD2D_AVS_DX*(ispec-1)
+    do ilocnum=1,NGNOD2D_AVS_DX
+      loc(ilocnum+ieoff)=ilocnum+ieoff
+    enddo
+  enddo
+
+  ifseg(:)=.false.
+
+  nseg=1
+  ifseg(1)=.true.
+  ninseg(1)=npointot
+
+  do j=1,NDIM
+
+! sort within each segment
+  ioff=1
+  do iseg=1,nseg
+    if(j == 1) then
+      call rank(xp(ioff),ind,ninseg(iseg))
+    else if(j == 2) then
+      call rank(yp(ioff),ind,ninseg(iseg))
+    else
+      call rank(zp(ioff),ind,ninseg(iseg))
+    endif
+    call swap_all(loc(ioff),xp(ioff),yp(ioff),zp(ioff),iwork,work,ind,ninseg(iseg))
+    ioff=ioff+ninseg(iseg)
+  enddo
+
+! check for jumps in current coordinate
+! compare the coordinates of the points within a small tolerance
+  if(j == 1) then
+    do i=2,npointot
+      if(dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+    enddo
+  else if(j == 2) then
+    do i=2,npointot
+      if(dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+    enddo
+  else
+    do i=2,npointot
+      if(dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+    enddo
+  endif
+
+! count up number of different segments
+  nseg=0
+  do i=1,npointot
+    if(ifseg(i)) then
+      nseg=nseg+1
+      ninseg(nseg)=1
+    else
+      ninseg(nseg)=ninseg(nseg)+1
+    endif
+  enddo
+  enddo
+
+! assign global node numbers (now sorted lexicographically)
+  ig=0
+  do i=1,npointot
+    if(ifseg(i)) ig=ig+1
+    iglob(loc(i))=ig
+  enddo
+
+  nglob=ig
+
+! deallocate arrays
+  deallocate(ind)
+  deallocate(ninseg)
+  deallocate(iwork)
+  deallocate(work)
+
+  end subroutine get_global_AVS
+
+! -----------------------------------
+
+! sorting routines put in same file to allow for inlining
+
+  subroutine rank(A,IND,N)
+!
+! Use Heap Sort (Numerical Recipes)
+!
+  implicit none
+
+  integer n
+  double precision A(n)
+  integer IND(n)
+
+  integer i,j,l,ir,indx
+  double precision q
+
+  do j=1,n
+   IND(j)=j
+  enddo
+
+  if (n == 1) return
+
+  L=n/2+1
+  ir=n
+  100 CONTINUE
+   IF (l>1) THEN
+      l=l-1
+      indx=ind(l)
+      q=a(indx)
+   ELSE
+      indx=ind(ir)
+      q=a(indx)
+      ind(ir)=ind(1)
+      ir=ir-1
+      if (ir == 1) then
+         ind(1)=indx
+         return
+      endif
+   ENDIF
+   i=l
+   j=l+l
+  200    CONTINUE
+   IF (J <= IR) THEN
+      IF (J<IR) THEN
+         IF ( A(IND(j))<A(IND(j+1)) ) j=j+1
+      ENDIF
+      IF (q<A(IND(j))) THEN
+         IND(I)=IND(J)
+         I=J
+         J=J+J
+      ELSE
+         J=IR+1
+      ENDIF
+   goto 200
+   ENDIF
+   IND(I)=INDX
+  goto 100
+  end subroutine rank
+
+! ------------------------------------------------------------------
+
+  subroutine swap_all(IA,A,B,C,IW,W,ind,n)
+!
+! swap arrays IA, A, B and C according to addressing in array IND
+!
+  implicit none
+
+  integer n
+
+  integer IND(n)
+  integer IA(n),IW(n)
+  double precision A(n),B(n),C(n),W(n)
+
+  integer i
+
+  IW(:) = IA(:)
+  W(:) = A(:)
+
+  do i=1,n
+    IA(i)=IW(ind(i))
+    A(i)=W(ind(i))
+  enddo
+
+  W(:) = B(:)
+
+  do i=1,n
+    B(i)=W(ind(i))
+  enddo
+
+  W(:) = C(:)
+
+  do i=1,n
+    C(i)=W(ind(i))
+  enddo
+
+  end subroutine swap_all
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_name_database.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_name_database.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_name_database.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,48 +1,48 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine create_name_database(prname,iproc,LOCAL_PATH)
-
-! create the name of the database for the mesher and the solver
-
-  implicit none
-
-  integer iproc
-
-! name of the database file
-  character(len=256) prname,procname,LOCAL_PATH,clean_LOCAL_PATH
-
-! create the name for the database of the current slide and region
-  write(procname,"('/proc',i6.6,'_')") iproc
-
-! suppress white spaces if any
-  clean_LOCAL_PATH = adjustl(LOCAL_PATH)
-
-! create full name with path
-  prname = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
-
-  end subroutine create_name_database
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine create_name_database(prname,iproc,LOCAL_PATH)
+
+! create the name of the database for the mesher and the solver
+
+  implicit none
+
+  integer iproc
+
+! name of the database file
+  character(len=256) prname,procname,LOCAL_PATH,clean_LOCAL_PATH
+
+! create the name for the database of the current slide and region
+  write(procname,"('/proc',i6.6,'_')") iproc
+
+! suppress white spaces if any
+  clean_LOCAL_PATH = adjustl(LOCAL_PATH)
+
+! create full name with path
+  prname = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
+
+  end subroutine create_name_database
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_serial_name_database.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_serial_name_database.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_serial_name_database.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,88 +1,88 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine create_serial_name_database(prname,iproc,LOCAL_PATH,NPROC,OUTPUT_FILES)
-
-! create name of the database for serial codes (AVS_DX and codes to check buffers)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iproc,NPROC
-
-! name of the database file
-  character(len=256) prname,procname,LOCAL_PATH,clean_LOCAL_PATH,serial_prefix,OUTPUT_FILES
-
-  integer iprocloop,nproc_max_loop
-  integer, dimension(:), allocatable :: num_active_proc
-  integer ier
-  nproc_max_loop = NPROC-1
-
-! create the name for the database of the current slide and region
-  write(procname,"('/proc',i6.6,'_')") iproc
-
-! on a Beowulf-type machine, path on frontend can be different from local paths
-  if(.not. LOCAL_PATH_IS_ALSO_GLOBAL) then
-
-! allocate array for active processors
-    allocate(num_active_proc(0:nproc_max_loop),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array num_active_proc'
-
-! read filtered file with name of active machines
-    open(unit=48,file=trim(OUTPUT_FILES)//'/filtered_machines.txt',status='old',action='read')
-    do iprocloop = 0,nproc_max_loop
-      read(48,*) num_active_proc(iprocloop)
-    enddo
-    close(48)
-
-! create the serial prefix pointing to the correct machine
-    write(serial_prefix,"('/auto/scratch_n',i6.6,'/')") num_active_proc(iproc)
-
-! suppress everything until the last "/" to define the base name of local path
-! this is system dependent since it assumes the disks are mounted
-! as on our Beowulf (Unix and NFS)
-    clean_LOCAL_PATH = LOCAL_PATH(index(LOCAL_PATH,'/',.true.)+1:len_trim(LOCAL_PATH))
-
-! create full name with path
-    prname = serial_prefix(1:len_trim(serial_prefix)) // clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
-
-! deallocate array
-    deallocate(num_active_proc)
-
-! on shared-memory machines, global path is the same as local path
-  else
-
-! suppress white spaces if any
-    clean_LOCAL_PATH = adjustl(LOCAL_PATH)
-
-! create full name with path
-    prname = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
-
-  endif
-
-  end subroutine create_serial_name_database
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine create_serial_name_database(prname,iproc,LOCAL_PATH,NPROC,OUTPUT_FILES)
+
+! create name of the database for serial codes (AVS_DX and codes to check buffers)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iproc,NPROC
+
+! name of the database file
+  character(len=256) prname,procname,LOCAL_PATH,clean_LOCAL_PATH,serial_prefix,OUTPUT_FILES
+
+  integer iprocloop,nproc_max_loop
+  integer, dimension(:), allocatable :: num_active_proc
+  integer ier
+  nproc_max_loop = NPROC-1
+
+! create the name for the database of the current slide and region
+  write(procname,"('/proc',i6.6,'_')") iproc
+
+! on a Beowulf-type machine, path on frontend can be different from local paths
+  if(.not. LOCAL_PATH_IS_ALSO_GLOBAL) then
+
+! allocate array for active processors
+    allocate(num_active_proc(0:nproc_max_loop),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array num_active_proc'
+
+! read filtered file with name of active machines
+    open(unit=48,file=trim(OUTPUT_FILES)//'/filtered_machines.txt',status='old',action='read')
+    do iprocloop = 0,nproc_max_loop
+      read(48,*) num_active_proc(iprocloop)
+    enddo
+    close(48)
+
+! create the serial prefix pointing to the correct machine
+    write(serial_prefix,"('/auto/scratch_n',i6.6,'/')") num_active_proc(iproc)
+
+! suppress everything until the last "/" to define the base name of local path
+! this is system dependent since it assumes the disks are mounted
+! as on our Beowulf (Unix and NFS)
+    clean_LOCAL_PATH = LOCAL_PATH(index(LOCAL_PATH,'/',.true.)+1:len_trim(LOCAL_PATH))
+
+! create full name with path
+    prname = serial_prefix(1:len_trim(serial_prefix)) // clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
+
+! deallocate array
+    deallocate(num_active_proc)
+
+! on shared-memory machines, global path is the same as local path
+  else
+
+! suppress white spaces if any
+    clean_LOCAL_PATH = adjustl(LOCAL_PATH)
+
+! create full name with path
+    prname = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
+
+  endif
+
+  end subroutine create_serial_name_database
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/define_derivation_matrices.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/define_derivation_matrices.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/define_derivation_matrices.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,158 +1,158 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
-         hprime_xx,hprime_yy,hprime_zz, &
-         hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
-         wgllwgll_xy,wgllwgll_xz,wgllwgll_yz)
-
-  implicit none
-
-  include "constants.h"
-
-! Gauss-Lobatto-Legendre points of integration and weights
-  double precision, dimension(NGLLX) :: xigll,wxgll
-  double precision, dimension(NGLLY) :: yigll,wygll
-  double precision, dimension(NGLLZ) :: zigll,wzgll
-
-! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
-  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! function for calculating derivatives of Lagrange polynomials
-  double precision, external :: lagrange_deriv_GLL
-
-  integer i,j,k,i1,i2,j1,j2,k1,k2
-
-! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
-
-! set up coordinates of the Gauss-Lobatto-Legendre points
-  call zwgljd(xigll,wxgll,NGLLX,GAUSSALPHA,GAUSSBETA)
-  call zwgljd(yigll,wygll,NGLLY,GAUSSALPHA,GAUSSBETA)
-  call zwgljd(zigll,wzgll,NGLLZ,GAUSSALPHA,GAUSSBETA)
-
-! if number of points is odd, the middle abscissa is exactly ZERO
-  if(mod(NGLLX,2) /= 0) xigll((NGLLX-1)/2+1) = ZERO
-  if(mod(NGLLY,2) /= 0) yigll((NGLLY-1)/2+1) = ZERO
-  if(mod(NGLLZ,2) /= 0) zigll((NGLLZ-1)/2+1) = ZERO
-
-! distinguish between single and double precision for reals
-  if(CUSTOM_REAL == SIZE_REAL) then
-
-! calculate derivatives of the Lagrange polynomials
-! and precalculate some products in double precision
-! hprime(i,j) = h'_j(xigll_i) by definition of the derivation matrix
-  do i1=1,NGLLX
-    do i2=1,NGLLX
-      hprime_xx(i2,i1) = sngl(lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX))
-      hprimewgll_xx(i2,i1) = sngl(lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)*wxgll(i2))
-    enddo
-  enddo
-
-  do j1=1,NGLLY
-    do j2=1,NGLLY
-      hprime_yy(j2,j1) = sngl(lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY))
-      hprimewgll_yy(j2,j1) = sngl(lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)*wygll(j2))
-    enddo
-  enddo
-
-  do k1=1,NGLLZ
-    do k2=1,NGLLZ
-      hprime_zz(k2,k1) = sngl(lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ))
-      hprimewgll_zz(k2,k1) = sngl(lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)*wzgll(k2))
-    enddo
-  enddo
-
-  do i=1,NGLLX
-    do j=1,NGLLY
-      wgllwgll_xy(i,j) = sngl(wxgll(i)*wygll(j))
-    enddo
-  enddo
-
-  do i=1,NGLLX
-    do k=1,NGLLZ
-      wgllwgll_xz(i,k) = sngl(wxgll(i)*wzgll(k))
-    enddo
-  enddo
-
-  do j=1,NGLLY
-    do k=1,NGLLZ
-      wgllwgll_yz(j,k) = sngl(wygll(j)*wzgll(k))
-    enddo
-  enddo
-
-  else  ! double precision version
-
-! calculate derivatives of the Lagrange polynomials
-! and precalculate some products in double precision
-! hprime(i,j) = h'_j(xigll_i) by definition of the derivation matrix
-  do i1=1,NGLLX
-    do i2=1,NGLLX
-      hprime_xx(i2,i1) = lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)
-      hprimewgll_xx(i2,i1) = lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)*wxgll(i2)
-    enddo
-  enddo
-
-  do j1=1,NGLLY
-    do j2=1,NGLLY
-      hprime_yy(j2,j1) = lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)
-      hprimewgll_yy(j2,j1) = lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)*wygll(j2)
-    enddo
-  enddo
-
-  do k1=1,NGLLZ
-    do k2=1,NGLLZ
-      hprime_zz(k2,k1) = lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)
-      hprimewgll_zz(k2,k1) = lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)*wzgll(k2)
-    enddo
-  enddo
-
-  do i=1,NGLLX
-    do j=1,NGLLY
-      wgllwgll_xy(i,j) = wxgll(i)*wygll(j)
-    enddo
-  enddo
-
-  do i=1,NGLLX
-    do k=1,NGLLZ
-      wgllwgll_xz(i,k) = wxgll(i)*wzgll(k)
-    enddo
-  enddo
-
-  do j=1,NGLLY
-    do k=1,NGLLZ
-      wgllwgll_yz(j,k) = wygll(j)*wzgll(k)
-    enddo
-  enddo
-
-  endif
-
-  end subroutine define_derivation_matrices
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
+         hprime_xx,hprime_yy,hprime_zz, &
+         hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+         wgllwgll_xy,wgllwgll_xz,wgllwgll_yz)
+
+  implicit none
+
+  include "constants.h"
+
+! Gauss-Lobatto-Legendre points of integration and weights
+  double precision, dimension(NGLLX) :: xigll,wxgll
+  double precision, dimension(NGLLY) :: yigll,wygll
+  double precision, dimension(NGLLZ) :: zigll,wzgll
+
+! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+! function for calculating derivatives of Lagrange polynomials
+  double precision, external :: lagrange_deriv_GLL
+
+  integer i,j,k,i1,i2,j1,j2,k1,k2
+
+! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+! set up coordinates of the Gauss-Lobatto-Legendre points
+  call zwgljd(xigll,wxgll,NGLLX,GAUSSALPHA,GAUSSBETA)
+  call zwgljd(yigll,wygll,NGLLY,GAUSSALPHA,GAUSSBETA)
+  call zwgljd(zigll,wzgll,NGLLZ,GAUSSALPHA,GAUSSBETA)
+
+! if number of points is odd, the middle abscissa is exactly ZERO
+  if(mod(NGLLX,2) /= 0) xigll((NGLLX-1)/2+1) = ZERO
+  if(mod(NGLLY,2) /= 0) yigll((NGLLY-1)/2+1) = ZERO
+  if(mod(NGLLZ,2) /= 0) zigll((NGLLZ-1)/2+1) = ZERO
+
+! distinguish between single and double precision for reals
+  if(CUSTOM_REAL == SIZE_REAL) then
+
+! calculate derivatives of the Lagrange polynomials
+! and precalculate some products in double precision
+! hprime(i,j) = h'_j(xigll_i) by definition of the derivation matrix
+  do i1=1,NGLLX
+    do i2=1,NGLLX
+      hprime_xx(i2,i1) = sngl(lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX))
+      hprimewgll_xx(i2,i1) = sngl(lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)*wxgll(i2))
+    enddo
+  enddo
+
+  do j1=1,NGLLY
+    do j2=1,NGLLY
+      hprime_yy(j2,j1) = sngl(lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY))
+      hprimewgll_yy(j2,j1) = sngl(lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)*wygll(j2))
+    enddo
+  enddo
+
+  do k1=1,NGLLZ
+    do k2=1,NGLLZ
+      hprime_zz(k2,k1) = sngl(lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ))
+      hprimewgll_zz(k2,k1) = sngl(lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)*wzgll(k2))
+    enddo
+  enddo
+
+  do i=1,NGLLX
+    do j=1,NGLLY
+      wgllwgll_xy(i,j) = sngl(wxgll(i)*wygll(j))
+    enddo
+  enddo
+
+  do i=1,NGLLX
+    do k=1,NGLLZ
+      wgllwgll_xz(i,k) = sngl(wxgll(i)*wzgll(k))
+    enddo
+  enddo
+
+  do j=1,NGLLY
+    do k=1,NGLLZ
+      wgllwgll_yz(j,k) = sngl(wygll(j)*wzgll(k))
+    enddo
+  enddo
+
+  else  ! double precision version
+
+! calculate derivatives of the Lagrange polynomials
+! and precalculate some products in double precision
+! hprime(i,j) = h'_j(xigll_i) by definition of the derivation matrix
+  do i1=1,NGLLX
+    do i2=1,NGLLX
+      hprime_xx(i2,i1) = lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)
+      hprimewgll_xx(i2,i1) = lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)*wxgll(i2)
+    enddo
+  enddo
+
+  do j1=1,NGLLY
+    do j2=1,NGLLY
+      hprime_yy(j2,j1) = lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)
+      hprimewgll_yy(j2,j1) = lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)*wygll(j2)
+    enddo
+  enddo
+
+  do k1=1,NGLLZ
+    do k2=1,NGLLZ
+      hprime_zz(k2,k1) = lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)
+      hprimewgll_zz(k2,k1) = lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)*wzgll(k2)
+    enddo
+  enddo
+
+  do i=1,NGLLX
+    do j=1,NGLLY
+      wgllwgll_xy(i,j) = wxgll(i)*wygll(j)
+    enddo
+  enddo
+
+  do i=1,NGLLX
+    do k=1,NGLLZ
+      wgllwgll_xz(i,k) = wxgll(i)*wzgll(k)
+    enddo
+  enddo
+
+  do j=1,NGLLY
+    do k=1,NGLLZ
+      wgllwgll_yz(j,k) = wygll(j)*wzgll(k)
+    enddo
+  enddo
+
+  endif
+
+  end subroutine define_derivation_matrices
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/exit_mpi.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/exit_mpi.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/exit_mpi.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,83 +1,83 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! end the simulation and exit MPI
-
-  subroutine exit_MPI(myrank,error_msg)
-
-  implicit none
-
-  include "constants.h"
-
-! identifier for error message file
-  integer, parameter :: IERROR = 30
-
-  integer myrank
-  character(len=*) error_msg
-
-  character(len=80) outputname
-  character(len=256) OUTPUT_FILES
-
-! write error message to screen
-  write(*,*) error_msg(1:len(error_msg))
-  write(*,*) 'Error detected, aborting MPI... proc ',myrank
-
-! write error message to file
-  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
-  write(outputname,"('/error_message',i6.6,'.txt')") myrank
-  open(unit=IERROR,file=trim(OUTPUT_FILES)//outputname,status='unknown')
-  write(IERROR,*) error_msg(1:len(error_msg))
-  write(IERROR,*) 'Error detected, aborting MPI... proc ',myrank
-  close(IERROR)
-
-! close output file
-  if(myrank == 0 .and. IMAIN /= ISTANDARD_OUTPUT) close(IMAIN)
-
-  call stop_all()
-
-  end subroutine exit_MPI
-
-!
-!----
-!
-
-! version without rank number printed in the error message
-  subroutine exit_MPI_without_rank(error_msg)
-
-  implicit none
-
-  include "constants.h"
-
-  character(len=*) error_msg
-
-! write error message to screen
-  write(*,*) error_msg(1:len(error_msg))
-  write(*,*) 'Error detected, aborting MPI...'
-
-  call stop_all()
-
-  end subroutine exit_MPI_without_rank
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! end the simulation and exit MPI
+
+  subroutine exit_MPI(myrank,error_msg)
+
+  implicit none
+
+  include "constants.h"
+
+! identifier for error message file
+  integer, parameter :: IERROR = 30
+
+  integer myrank
+  character(len=*) error_msg
+
+  character(len=80) outputname
+  character(len=256) OUTPUT_FILES
+
+! write error message to screen
+  write(*,*) error_msg(1:len(error_msg))
+  write(*,*) 'Error detected, aborting MPI... proc ',myrank
+
+! write error message to file
+  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
+  write(outputname,"('/error_message',i6.6,'.txt')") myrank
+  open(unit=IERROR,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+  write(IERROR,*) error_msg(1:len(error_msg))
+  write(IERROR,*) 'Error detected, aborting MPI... proc ',myrank
+  close(IERROR)
+
+! close output file
+  if(myrank == 0 .and. IMAIN /= ISTANDARD_OUTPUT) close(IMAIN)
+
+  call stop_all()
+
+  end subroutine exit_MPI
+
+!
+!----
+!
+
+! version without rank number printed in the error message
+  subroutine exit_MPI_without_rank(error_msg)
+
+  implicit none
+
+  include "constants.h"
+
+  character(len=*) error_msg
+
+! write error message to screen
+  write(*,*) error_msg(1:len(error_msg))
+  write(*,*) 'Error detected, aborting MPI...'
+
+  call stop_all()
+
+  end subroutine exit_MPI_without_rank
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_attenuation_model.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_attenuation_model.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_attenuation_model.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,1524 +1,1524 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-
-  subroutine get_attenuation_model_olsen( vs_val, Q_mu )
-
-! uses scaling rule similar to Olsen et al. (2003) to determine attenuation medium
-!
-! returns: selected (sediment) Q_mu
-!
-! refers to:
-!   K. B. Olsen, S. M. Day and C. R. Bradley, 2003.
-!   Estimation of Q for Long-Period (>2 sec) Waves in the Los Angeles Basin
-!   BSSA, 93, 2, p. 627-638
-
-  implicit none
-
-  include "constants.h"
-
-  real(kind=CUSTOM_REAL) :: vs_val
-  double precision :: Q_mu
-
-  !local parameters
-  integer :: int_Q_mu
-
-  ! two variations of scaling rule handling
-  logical,parameter :: USE_SIMPLE_OLSEN = .false.
-  logical,parameter :: USE_DISCRETE_OLSEN = .true.
-
-  ! uses rule Q_mu = constant * v_s
-  ! v_s in m/s
-  Q_mu = OLSEN_ATTENUATION_RATIO * vs_val
-
-  ! uses a simple, 2-constant model mentioned in Olsen et al. (2003)
-  if( USE_SIMPLE_OLSEN ) then
-    ! vs (in m/s)
-    if( vs_val < 2000.0_CUSTOM_REAL ) then
-      Q_mu = 0.02 * vs_val
-    else
-      Q_mu = 0.1 * vs_val
-    endif
-  endif
-
-  ! uses discrete values in sediment range
-  if( USE_DISCRETE_OLSEN ) then
-    int_Q_mu = 10 * nint(Q_mu / 10.)
-
-    if(int_Q_mu < 40) int_Q_mu = 40
-    if(int_Q_mu > 150) int_Q_mu = 150
-
-    if(int_Q_mu == 40) then
-      Q_mu = 40.0d0
-    else if(int_Q_mu == 50) then
-      Q_mu = 50.0d0
-    else if(int_Q_mu == 60) then
-      Q_mu = 60.0d0
-    else if(int_Q_mu == 70) then
-      Q_mu = 70.0d0
-    else if(int_Q_mu == 80) then
-      Q_mu = 80.0d0
-    else if(int_Q_mu == 90) then
-      Q_mu = 90.0d0
-    else if(int_Q_mu == 100) then
-      Q_mu = 100.0d0
-    else if(int_Q_mu == 110) then
-      Q_mu = 110.0d0
-    else if(int_Q_mu == 120) then
-      Q_mu = 120.0d0
-    else if(int_Q_mu == 130) then
-      Q_mu = 130.0d0
-    else if(int_Q_mu == 140) then
-      Q_mu = 140.0d0
-    else if(int_Q_mu == 150) then
-      Q_mu = 150.0d0
-    else
-      stop 'incorrect attenuation coefficient'
-    endif
-  endif
-
-  ! limits Q_mu value range
-  if( Q_mu < 1.0d0 ) Q_mu = 1.0d0
-  if( Q_mu > ATTENUATION_COMP_MAXIMUM ) Q_mu = ATTENUATION_COMP_MAXIMUM
-
-
-  end subroutine get_attenuation_model_olsen
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine get_attenuation_model(myrank,nspec,USE_OLSEN_ATTENUATION, &
-                          mustore,rho_vs,qmu_attenuation_store, &
-                          ispec_is_elastic,min_resolved_period,prname)
-
-! precalculates attenuation arrays and stores arrays into files
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: myrank,nspec
-  logical :: USE_OLSEN_ATTENUATION
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: mustore
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rho_vs
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: qmu_attenuation_store
-
-  logical, dimension(nspec) :: ispec_is_elastic
-  real(kind=CUSTOM_REAL) :: min_resolved_period
-  character(len=256) :: prname
-
-  ! local parameters
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: one_minus_sum_beta
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: factor_common
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: scale_factor
-  double precision, dimension(N_SLS) :: tau_sigma_dble,beta_dble
-  double precision factor_scale_dble,one_minus_sum_beta_dble
-  double precision :: Q_mu
-  double precision :: f_c_source
-  double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: tau_sigma
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: tauinv
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: beta
-  real(kind=CUSTOM_REAL):: vs_val
-  integer :: i,j,k,ispec,ier
-  double precision :: qmin,qmax,qmin_all,qmax_all
-
-  ! initializes arrays
-  allocate(one_minus_sum_beta(NGLLX,NGLLY,NGLLZ,nspec), &
-          factor_common(N_SLS,NGLLX,NGLLY,NGLLZ,nspec), &
-          scale_factor(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocation attenuation arrays')
-
-  one_minus_sum_beta(:,:,:,:) = 1._CUSTOM_REAL
-  factor_common(:,:,:,:,:) = 1._CUSTOM_REAL
-  scale_factor(:,:,:,:) = 1._CUSTOM_REAL
-
-
-  ! gets stress relaxation times tau_sigma, i.e.
-  ! precalculates tau_sigma depending on period band (constant for all Q_mu), and
-  ! determines central frequency f_c_source of attenuation period band
-  call get_attenuation_constants(min_resolved_period,tau_sigma_dble,&
-              f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
-  ! user output
-  if( myrank == 0 ) then
-    write(IMAIN,*)
-    write(IMAIN,*) "attenuation: "
-    write(IMAIN,*) "  reference period (s)   : ",sngl(1.0/ATTENUATION_f0_REFERENCE), &
-                  " frequency: ",sngl(ATTENUATION_f0_REFERENCE)
-    write(IMAIN,*) "  period band min/max (s): ",sngl(MIN_ATTENUATION_PERIOD),sngl(MAX_ATTENUATION_PERIOD)
-    write(IMAIN,*) "  central period (s)     : ",sngl(1.0/f_c_source), &
-                  " frequency: ",sngl(f_c_source)
-  endif
-
-  ! determines inverse of tau_sigma
-  if(CUSTOM_REAL == SIZE_REAL) then
-    tau_sigma(:) = sngl(tau_sigma_dble(:))
-  else
-    tau_sigma(:) = tau_sigma_dble(:)
-  endif
-  ! precalculates the negative inverse of tau_sigma
-  tauinv(:) = - 1._CUSTOM_REAL / tau_sigma(:)
-
-  ! precalculates factors for shear modulus scaling according to attenuation model
-  qmin = HUGEVAL
-  qmax = 0.0
-  do ispec = 1,nspec
-
-    ! skips non elastic elements
-    if( ispec_is_elastic(ispec) .eqv. .false. ) cycle
-
-    ! determines attenuation factors for each GLL point
-    do k=1,NGLLZ
-      do j=1,NGLLY
-        do i=1,NGLLX
-
-          ! shear moduli attenuation
-          ! gets Q_mu value
-          if(USE_OLSEN_ATTENUATION) then
-            ! use scaling rule similar to Olsen et al. (2003)
-            vs_val = mustore(i,j,k,ispec) / rho_vs(i,j,k,ispec)
-            call get_attenuation_model_olsen( vs_val, Q_mu )
-          else
-            ! takes Q set in (CUBIT) mesh
-            Q_mu = qmu_attenuation_store(i,j,k,ispec)
-
-            ! attenuation zero
-            if( Q_mu <= 1.e-5 ) cycle
-
-            ! limits Q
-            if( Q_mu < 1.0d0 ) Q_mu = 1.0d0
-            if( Q_mu > ATTENUATION_COMP_MAXIMUM ) Q_mu = ATTENUATION_COMP_MAXIMUM
-
-          endif
-          ! statistics
-          if( Q_mu < qmin ) qmin = Q_mu
-          if( Q_mu > qmax ) qmax = Q_mu
-
-          ! gets beta, on_minus_sum_beta and factor_scale
-          ! based on calculation of strain relaxation times tau_eps
-          call get_attenuation_factors(myrank,Q_mu,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD, &
-                            f_c_source,tau_sigma_dble, &
-                            beta_dble,one_minus_sum_beta_dble,factor_scale_dble)
-
-          ! stores factor for unrelaxed parameter
-          one_minus_sum_beta(i,j,k,ispec) = one_minus_sum_beta_dble
-
-          ! stores factor for runge-kutta scheme
-          ! using factor for modulus defect Delta M_i = - M_relaxed
-          ! see e.g. Savage et al. (BSSA, 2010): eq. 11
-          !     precomputes factor: 2 ( 1 - tau_eps_i / tau_sigma_i ) / tau_sigma_i
-          beta(:) = beta_dble(:)
-          factor_common(:,i,j,k,ispec) = 2._CUSTOM_REAL * beta(:) * tauinv(:)
-
-          ! stores scale factor for mu moduli
-          scale_factor(i,j,k,ispec) = factor_scale_dble
-
-        enddo
-      enddo
-    enddo
-  enddo
-
-  ! stores attenuation arrays into files
-  open(unit=27, file=prname(1:len_trim(prname))//'attenuation.bin', &
-        status='unknown',action='write',form='unformatted',iostat=ier)
-  if( ier /= 0 ) then
-    print*,'error: could not open ',prname(1:len_trim(prname))//'attenuation.bin'
-    call exit_mpi(myrank,'error opening attenuation.bin file')
-  endif
-  write(27) nspec
-  write(27) one_minus_sum_beta
-  write(27) factor_common
-  write(27) scale_factor
-  close(27)
-
-  deallocate(one_minus_sum_beta,factor_common,scale_factor)
-
-  ! statistics
-  call min_all_dp(qmin,qmin_all)
-  call max_all_dp(qmax,qmax_all)
-  ! user output
-  if( myrank == 0 ) then
-    write(IMAIN,*) "  Q_mu min/max           : ",sngl(qmin_all),sngl(qmax_all)
-    write(IMAIN,*)
-  endif
-
-  end subroutine get_attenuation_model
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-  subroutine get_attenuation_memory_values(tau_s, deltat, alphaval,betaval,gammaval)
-
-! returns: runge-kutta scheme terms alphaval, betaval and gammaval
-
-  implicit none
-
-  include 'constants.h'
-
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: tau_s, alphaval, betaval,gammaval
-  real(kind=CUSTOM_REAL) :: deltat
-
-  ! local parameter
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: tauinv
-
-  ! inverse of tau_s
-  tauinv(:) = - 1._CUSTOM_REAL / tau_s(:)
-
-  ! runge-kutta coefficients
-  ! see e.g.: Savage et al. (BSSA, 2010): eq. (11)
-  alphaval(:) = 1.0 + deltat*tauinv(:) + deltat**2 * tauinv(:)**2 / 2._CUSTOM_REAL &
-                    + deltat**3 * tauinv(:)**3 / 6._CUSTOM_REAL &
-                    + deltat**4 * tauinv(:)**4 / 24._CUSTOM_REAL
-  betaval(:) = deltat / 2._CUSTOM_REAL + deltat**2 * tauinv(:) / 3._CUSTOM_REAL &
-                   + deltat**3 * tauinv(:)**2 / 8._CUSTOM_REAL &
-                   + deltat**4 * tauinv(:)**3 / 24._CUSTOM_REAL
-  gammaval(:) = deltat / 2._CUSTOM_REAL + deltat**2 * tauinv(:) / 6._CUSTOM_REAL &
-                    + deltat**3 * tauinv(:)**2 / 24._CUSTOM_REAL
-
-  end subroutine get_attenuation_memory_values
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine get_attenuation_constants(min_resolved_period,tau_sigma, &
-                              f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
-! returns: period band constants tau_sigma and center frequency f_c_source
-
-  implicit none
-
-  include "constants.h"
-
-  real(kind=CUSTOM_REAL) :: min_resolved_period
-  double precision, dimension(N_SLS) :: tau_sigma
-  double precision :: f_c_source
-  double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
-
-  ! local parameters
-  real(kind=CUSTOM_REAL)  :: min_period
-
-  ! determines min/max periods for attenuation band based on minimum resolved period of mesh
-  min_period = 0.99 * min_resolved_period ! uses a small margin
-  ! debug for comparison with fix values from above
-  !min_resolved_period = 0.943
-  call get_attenuation_periods(min_period,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
-  !  sets up stress relaxation times tau_sigma,
-  ! equally spaced based on number of standard linear solids and period band
-  call get_attenuation_tau_sigma(tau_sigma,N_SLS,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
-  ! sets up central frequency
-  ! logarithmic mean of frequency interval of absorption band
-  call get_attenuation_source_freq(f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
-  ! debug
-  !f_c_source = 0.141421d0
-  !tau_sigma(1) =  7.957747154594766669788441504352d0
-  !tau_sigma(2) =  1.125395395196382652969191440206d0
-  !tau_sigma(3) =  0.159154943091895345608222100964d0
-
-  end subroutine get_attenuation_constants
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine get_attenuation_factors(myrank,Q_mu,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD, &
-                              f_c_source,tau_sigma, &
-                              beta,one_minus_sum_beta,factor_scale)
-
-! returns: attenuation mechanisms beta,one_minus_sum_beta,factor_scale
-
-! variable frequency range
-! variable period range
-! variable central logarithmic frequency
-
-! in the future when more memory is available on computers
-! it would be more accurate to use four mechanisms instead of three
-
-
-  implicit none
-
-  include "constants.h"
-
-  integer:: myrank
-  double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
-  double precision :: f_c_source,Q_mu
-  double precision, dimension(N_SLS) :: tau_sigma
-  double precision, dimension(N_SLS) :: beta
-  double precision :: one_minus_sum_beta
-  double precision :: factor_scale
-  ! local parameters
-  double precision, dimension(N_SLS) :: tau_eps
-
-  ! determines tau_eps for Q_mu
-  call get_attenuation_tau_eps(Q_mu,tau_sigma,tau_eps, &
-                              MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
-  ! determines one_minus_sum_beta
-  call get_attenuation_property_values(tau_sigma,tau_eps,beta,one_minus_sum_beta)
-
-  ! determines the "scale factor"
-  call get_attenuation_scale_factor(myrank,f_c_source,tau_eps,tau_sigma,Q_mu,factor_scale)
-
-  end subroutine get_attenuation_factors
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-  subroutine get_attenuation_property_values(tau_s, tau_eps, beta, one_minus_sum_beta)
-
-! coefficients useful for calculation between relaxed and unrelaxed moduli
-!
-! returns: coefficients beta, one_minus_sum_beta
-
-  implicit none
-
-  include "constants.h"
-
-  double precision,dimension(N_SLS),intent(in) :: tau_s, tau_eps
-  double precision,dimension(N_SLS),intent(out) :: beta
-  double precision,intent(out):: one_minus_sum_beta
-
-  ! local parameters
-  double precision,dimension(N_SLS) :: tauinv
-  integer :: i
-
-  ! inverse of stress relaxation times
-  tauinv(:) = -1.0d0 / tau_s(:)
-
-  ! see e.g. Komatitsch & Tromp 1999, eq. (7)
-
-  ! coefficients beta
-  beta(:) = 1.0d0 - tau_eps(:) / tau_s(:)
-
-  ! sum of coefficients beta
-  one_minus_sum_beta = 1.0d0
-  do i = 1,N_SLS
-     one_minus_sum_beta = one_minus_sum_beta - beta(i)
-  enddo
-
-  end subroutine get_attenuation_property_values
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine get_attenuation_scale_factor(myrank, f_c_source, tau_eps, tau_sigma, Q_mu, scale_factor)
-
-! returns: physical dispersion scaling factor scale_factor
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: myrank
-  double precision :: scale_factor, Q_mu, f_c_source
-  ! strain and stress relaxation times
-  double precision, dimension(N_SLS) :: tau_eps, tau_sigma
-
-  ! local parameters
-  double precision w_c_source
-  double precision factor_scale_mu0, factor_scale_mu
-  double precision a_val, b_val
-  double precision big_omega
-  integer i
-
-
-  !--- compute central angular frequency of source (non dimensionalized)
-  w_c_source = TWO_PI * f_c_source
-
-
-  !--- quantity by which to scale mu_0 to get mu
-  ! this formula can be found for instance in
-  ! Liu, H. P., Anderson, D. L. and Kanamori, H., Velocity dispersion due to
-  ! anelasticity: implications for seismology and mantle composition,
-  ! Geophys. J. R. Astron. Soc., vol. 47, pp. 41-58 (1976)
-  ! and in Aki, K. and Richards, P. G., Quantitative seismology, theory and methods,
-  ! W. H. Freeman, (1980), second edition, sections 5.5 and 5.5.2, eq. (5.81) p. 170
-  factor_scale_mu0 = ONE + TWO * log(f_c_source / ATTENUATION_f0_REFERENCE ) / (PI * Q_mu)
-
-  !--- compute a, b and Omega parameters
-  ! see e.g.:
-  !   Liu et al. (1976): eq. 25
-  !   using complex modulus Mc = M_R / ( A - i B )
-  !   or
-  !   Savage et al. (BSSA, 2010): eq. (5) and (6)
-  !   complex modulus: M(t) = M_1(t) + i M_2(t)
-  a_val = ONE
-  b_val = ZERO
-  do i = 1,N_SLS
-    ! real part M_1 of complex modulus
-    a_val = a_val - w_c_source * w_c_source * tau_eps(i) * &
-      (tau_eps(i) - tau_sigma(i)) / (1.d0 + w_c_source * w_c_source * tau_eps(i) * tau_eps(i))
-    ! imaginary part M_2 of complex modulus
-    b_val = b_val + w_c_source * (tau_eps(i) - tau_sigma(i)) / &
-      (1.d0 + w_c_source * w_c_source * tau_eps(i) * tau_eps(i))
-  enddo
-
-  ! see e.g. Liu et al. (1976): Omega used in equation (20)
-  big_omega = a_val * ( sqrt(1.d0 + b_val*b_val/(a_val*a_val)) - 1.d0 )
-
-  !--- quantity by which to scale mu to get mu_relaxed
-  factor_scale_mu = b_val * b_val / (TWO * big_omega)
-
-  !--- total factor by which to scale mu0
-  scale_factor = factor_scale_mu * factor_scale_mu0
-
-  !--- check that the correction factor is close to one
-  if(scale_factor < 0.7 .or. scale_factor > 1.3) then
-     write(*,*) "error : in get_attenuation_scale_factor() "
-     write(*,*) "  scale factor: ", scale_factor, " should be between 0.7 and 1.3"
-     write(*,*) "  please check your reference frequency ATTENUATION_f0_REFERENCE in constants.h"
-     call exit_MPI(myrank,'incorrect correction factor in attenuation model')
-  endif
-
-  end subroutine get_attenuation_scale_factor
-
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-!compare: auto_ner.f90, GLOBE package
-
-  subroutine get_attenuation_periods(min_resolved_period,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
-! determines min/max periods for attenuation based upon mininum resolved period of mesh
-
-  implicit none
-
-  include "constants.h"
-
-  real(kind=CUSTOM_REAL),intent(in) :: min_resolved_period
-  double precision,intent(out) :: MIN_ATTENUATION_PERIOD, MAX_ATTENUATION_PERIOD
-
-  ! local parameters
-  double precision :: THETA(5)
-
-  ! checks number of standard linear solids
-  if(N_SLS < 2 .OR. N_SLS > 5) then
-     stop 'N_SLS must be greater than 1 or less than 6'
-  endif
-
-  ! THETA defines the width of the Attenation Range in Decades
-  !   The number defined here were determined by minimizing
-  !   the "flatness" of the absoption spectrum.  Each THETA
-  !   is defined for a particular N_SLS (constants.h)
-  !   THETA(2) is for N_SLS = 2
-  THETA(1)           =   0.00d0
-  THETA(2)           =   0.75d0
-  THETA(3)           =   1.75d0
-  THETA(4)           =   2.25d0
-  THETA(5)           =   2.85d0
-
-  ! Compute Min Attenuation Period
-  !
-  ! The Minimum attenuation period = (Grid Spacing in km) / V_min
-  !  Grid spacing in km     = Width of an element in km * spacing for GLL point * points per wavelength
-
-  MIN_ATTENUATION_PERIOD = min_resolved_period
-
-
-  ! Compute Max Attenuation Period
-  !
-  ! The max attenuation period for 3 SLS is optimally
-  !   1.75 decades from the min attenuation period, see THETA above
-  !
-  ! this uses: theta = log( T_max / T_min ) to calculate T_max for a given T_min
-
-  MAX_ATTENUATION_PERIOD = MIN_ATTENUATION_PERIOD * 10.0d0**THETA(N_SLS)
-
-  end subroutine get_attenuation_periods
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine get_attenuation_tau_sigma(tau_s, nsls, min_period, max_period)
-
-! Determines stress relaxation times tau_sigma
-! Sets the Tau_sigma (tau_s) to be equally spaced in log10 frequency
-
-  implicit none
-
-  integer :: nsls
-  double precision,intent(in) :: min_period, max_period
-  double precision,intent(out) :: tau_s(nsls)
-  ! local parameters
-  double precision :: f1, f2
-  double precision :: exp1, exp2
-  double precision :: dexp
-  integer :: i
-  double precision, parameter :: PI = 3.14159265358979d0
-
-  ! min/max frequencies
-  f1 = 1.0d0 / max_period
-  f2 = 1.0d0 / min_period
-
-  ! logarithms
-  exp1 = log10(f1)
-  exp2 = log10(f2)
-
-  ! equally spaced in log10 frequency
-  dexp = (exp2-exp1) / ((nsls*1.0d0) - 1)
-  do i = 1,nsls
-     tau_s(i) = 1.0 / (PI * 2.0d0 * 10**(exp1 + (i - 1)* 1.0d0 *dexp))
-  enddo
-
-  end subroutine get_attenuation_tau_sigma
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-  subroutine get_attenuation_source_freq(f_c_source,min_period,max_period)
-
-! Determines the Source Frequency
-
-  implicit none
-
-  double precision,intent(out) :: f_c_source
-  double precision,intent(in) :: min_period, max_period
-
-  ! local parameters
-  double precision f1, f2,T_c_source
-
-  ! min/max frequencies
-  f1 = 1.0d0 / max_period
-  f2 = 1.0d0 / min_period
-
-  T_c_source =  1.0e+03 * 10.0d0**(0.5 * (log10(f1) + log10(f2)))
-  ! central frequency
-  f_c_source = T_c_source / 1000.0d0
-
-  end subroutine get_attenuation_source_freq
-
-!--------------------------------------------------------------------------------------------------
-!
-!  This portion of the SPECFEM3D Code was written by:
-!  Brian Savage while at
-!     California Institute of Technology
-!     Department of Terrestrial Magnetism / Carnegie Institute of Washington
-!     Univeristy of Rhode Island
-!
-!  <savage at uri.edu>.
-!  <savage13 at gps.caltech.edu>
-!  <savage13 at dtm.ciw.edu>
-!
-!   It is based upon formulation in the following references:
-!
-!   Dahlen and Tromp, 1998
-!      Theoretical Global Seismology
-!
-!   Liu et al. 1976
-!      Velocity dispersion due to anelasticity: implications for seismology and mantle composition
-!      Geophys, J. R. asts. Soc, Vol 47, pp. 41-58
-!
-!   The methodology can be found in:
-!       Savage, B, D. Komatitsch and J. Tromp, 2010.
-!       Effects of 3D Attenuation on Seismic Wave Amplitude and Phase Measurements
-!       BSSA, 100, 3, p. 1241-1251.
-!
-! modifications:
-!  - minor modifications by Daniel Peter, november 2010
-!--------------------------------------------------------------------------------------------------
-
-  subroutine get_attenuation_tau_eps(Qmu_in,tau_s,tau_eps,min_period,max_period)
-
-! includes min_period, max_period, and N_SLS
-!
-! returns: determines strain relaxation times tau_eps
-
-  implicit none
-
-  include 'constants.h'
-
-! model_attenuation_variables
-!...
-
-  double precision :: Qmu_in
-  double precision, dimension(N_SLS) :: tau_s, tau_eps
-  double precision :: min_period,max_period
-
-  ! local parameters
-  integer :: rw
-  ! model_attenuation_storage_var
-  type model_attenuation_storage_var
-    sequence
-    double precision, dimension(:,:), pointer :: tau_eps_storage
-    double precision, dimension(:), pointer :: Qmu_storage
-    integer Q_resolution
-    integer Q_max
-  end type model_attenuation_storage_var
-  type (model_attenuation_storage_var) AM_S
-  ! attenuation_simplex_variables
-  type attenuation_simplex_variables
-    sequence
-    double precision Q  ! Q     = Desired Value of Attenuation or Q
-    double precision iQ ! iQ    = 1/Q
-    double precision, dimension(:), pointer ::  f
-    ! f = Frequencies at which to evaluate the solution
-    double precision, dimension(:), pointer :: tau_s
-    ! tau_s = Tau_sigma defined by the frequency range and
-    !             number of standard linear solids
-    integer nf          ! nf    = Number of Frequencies
-    integer nsls        ! nsls  = Number of Standard Linear Solids
-  end type attenuation_simplex_variables
-  type(attenuation_simplex_variables) AS_V
-
-  ! READ
-  rw = 1
-  call model_attenuation_storage(Qmu_in, tau_eps, rw, AM_S)
-  if(rw > 0) return
-
-  call attenuation_invert_by_simplex(min_period, max_period, N_SLS, Qmu_in, tau_s, tau_eps, AS_V)
-
-  ! WRITE
-  rw = -1
-  call model_attenuation_storage(Qmu_in, tau_eps, rw, AM_S)
-
-  end subroutine get_attenuation_tau_eps
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-  subroutine model_attenuation_storage(Qmu, tau_eps, rw, AM_S)
-
-  implicit none
-  include 'constants.h'
-
-! model_attenuation_storage_var
-  type model_attenuation_storage_var
-    sequence
-    double precision, dimension(:,:), pointer :: tau_eps_storage
-    double precision, dimension(:), pointer :: Qmu_storage
-    integer Q_resolution
-    integer Q_max
-  end type model_attenuation_storage_var
-
-  type (model_attenuation_storage_var) AM_S
-! model_attenuation_storage_var
-
-  double precision Qmu, Qmu_new
-  double precision, dimension(N_SLS) :: tau_eps
-  integer rw
-
-  integer Qtmp
-  integer, save :: first_time_called = 1
-  double precision, parameter :: ZERO_TOL = 1.e-5
-  integer ier
-
-  if(first_time_called == 1) then
-     first_time_called       = 0
-     AM_S%Q_resolution = 10**ATTENUATION_COMP_RESOLUTION
-     AM_S%Q_max        = ATTENUATION_COMP_MAXIMUM
-     Qtmp         = AM_S%Q_resolution * AM_S%Q_max
-
-     allocate(AM_S%tau_eps_storage(N_SLS, Qtmp), &
-             AM_S%Qmu_storage(Qtmp),stat=ier)
-     if( ier /= 0 ) stop 'error allocating arrays for attenuation storage'
-     AM_S%Qmu_storage(:) = -1
-  endif
-
-  if(Qmu < 0.0d0 .OR. Qmu > AM_S%Q_max) then
-     write(IMAIN,*) 'Error attenuation_storage()'
-     write(IMAIN,*) 'Attenuation Value out of Range: ', Qmu
-     write(IMAIN,*) 'Attenuation Value out of Range: Min, Max ', 0, AM_S%Q_max
-     call exit_MPI(0, 'Attenuation Value out of Range')
-  endif
-
-  if(rw > 0 .AND. Qmu <= ZERO_TOL) then
-     Qmu = 0.0d0;
-     tau_eps(:) = 0.0d0;
-     return
-  endif
-  ! Generate index for Storage Array
-  ! and Recast Qmu using this index
-  ! Accroding to Brian, use float
-  !Qtmp = Qmu * Q_resolution
-  !Qmu = Qtmp / Q_resolution;
-
-  ! by default: resolution is Q_resolution = 10
-  ! converts Qmu to an array integer index:
-  ! e.g. Qmu = 150.31 -> Qtmp = 150.31 * 10 = int( 1503.10 ) = 1503
-  Qtmp    = Qmu * dble(AM_S%Q_resolution)
-
-  ! rounds to corresponding double value:
-  ! e.g. Qmu_new = dble( 1503 ) / dble(10) = 150.30
-  ! but Qmu_new is not used any further...
-  Qmu_new = dble(Qtmp) / dble(AM_S%Q_resolution)
-
-  if(rw > 0) then
-     ! READ
-     if(AM_S%Qmu_storage(Qtmp) > 0) then
-        ! READ SUCCESSFUL
-        tau_eps(:)   = AM_S%tau_eps_storage(:, Qtmp)
-        Qmu        = AM_S%Qmu_storage(Qtmp)
-        rw = 1
-     else
-        ! READ NOT SUCCESSFUL
-        rw = -1
-     endif
-  else
-     ! WRITE SUCCESSFUL
-     AM_S%tau_eps_storage(:,Qtmp)    = tau_eps(:)
-     AM_S%Qmu_storage(Qtmp)        = Qmu
-     rw = 1
-  endif
-
-  end subroutine model_attenuation_storage
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine attenuation_invert_by_simplex(t2, t1, n, Q_real, tau_s, tau_eps, AS_V)
-
-  implicit none
-
-  ! attenuation_simplex_variables
-  type attenuation_simplex_variables
-    sequence
-    double precision Q  ! Q     = Desired Value of Attenuation or Q
-    double precision iQ ! iQ    = 1/Q
-    double precision, dimension(:), pointer ::  f
-    ! f = Frequencies at which to evaluate the solution
-    double precision, dimension(:), pointer :: tau_s
-    ! tau_s = Tau_sigma defined by the frequency range and
-    !             number of standard linear solids
-    integer nf          ! nf    = Number of Frequencies
-    integer nsls        ! nsls  = Number of Standard Linear Solids
-  end type attenuation_simplex_variables
-  type(attenuation_simplex_variables) AS_V
-  ! attenuation_simplex_variables
-
-  ! Input / Output
-  double precision  t1, t2
-  double precision  Q_real
-!  double precision  omega_not
-  integer  n
-  double precision, dimension(n)   :: tau_s, tau_eps
-
-  ! Internal
-  integer i, iterations, err,prnt
-  double precision f1, f2, exp1,exp2, min_value !, dexp
-  integer, parameter :: nf = 100
-  double precision, dimension(nf) :: f
-  double precision, parameter :: PI = 3.14159265358979d0
-  double precision, external :: attenuation_eval
-
-  ! Values to be passed into the simplex minimization routine
-  iterations = -1
-  min_value  = -1.0e-4
-  err        = 0
-  prnt       = 0
-
-  !allocate(f(nf))
-
-  ! Determine the min and max frequencies
-  f1 = 1.0d0 / t1
-  f2 = 1.0d0 / t2
-
-  ! Determine the exponents of the frequencies
-  exp1 = log10(f1)
-  exp2 = log10(f2)
-
-!  if(f2 < f1 .OR. Q_real < 0.0d0 .OR. n < 1) then
-!     call exit_MPI(0, 'frequencies flipped or Q less than zero or N_SLS < 0')
-!  endif
-
-  ! Determine the Source frequency
-!  omega_not =  1.0e+03 * 10.0d0**(0.5 * (log10(f1) + log10(f2)))
-
-
-  ! Determine the Frequencies at which to compare solutions
-  !   The frequencies should be equally spaced in log10 frequency
-  do i = 1,nf
-     f(i) = exp1 + ((i-1)*1.0d0 * (exp2-exp1) / ((nf-1)*1.0d0))
-  enddo
-
-  ! Set the Tau_sigma (tau_s) to be equally spaced in log10 frequency
-!  dexp = (exp2-exp1) / ((n*1.0d0) - 1)
-!  do i = 1,n
-!     tau_s(i) = 1.0 / (PI * 2.0d0 * 10**(exp1 + (i - 1)* 1.0d0 *dexp))
-!  enddo
-
-
-  ! Shove the paramters into the module
-  call attenuation_simplex_setup(nf,n,f,Q_real,tau_s,AS_V)
-
-  ! Set the Tau_epsilon (tau_eps) to an initial value at omega*tau = 1
-  ! tan_delta = 1/Q = (tau_eps - tau_s)/(2 * sqrt(tau e*tau_s))
-  !    if we assume tau_eps =~ tau_s
-  !    we get the equation below
-  do i = 1,n
-     tau_eps(i) = tau_s(i) + (tau_s(i) * 2.0d0/Q_real)
-  enddo
-
-  ! Run a simplex search to determine the optimum values of tau_eps
-  call fminsearch(attenuation_eval, tau_eps, n, iterations, min_value, prnt, err,AS_V)
-  if(err > 0) then
-     write(*,*)'Search did not converge for an attenuation of ', Q_real
-     write(*,*)'    Iterations: ', iterations
-     write(*,*)'    Min Value:  ', min_value
-     write(*,*)'    Aborting program'
-     call exit_MPI(0,'attenuation_simplex: Search for Strain relaxation times did not converge')
-  endif
-
-  !deallocate(f)
-
-  call attenuation_simplex_finish(AS_V)
-
-  end subroutine attenuation_invert_by_simplex
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-  subroutine attenuation_simplex_setup(nf_in,nsls_in,f_in,Q_in,tau_s_in,AS_V)
-
-!   - Inserts necessary parameters into the module attenuation_simplex_variables
-!   - See module for explaination
-
-  implicit none
-
-  ! attenuation_simplex_variables
-  type attenuation_simplex_variables
-    sequence
-    double precision Q  ! Q     = Desired Value of Attenuation or Q
-    double precision iQ ! iQ    = 1/Q
-    double precision, dimension(:), pointer ::  f
-    ! f = Frequencies at which to evaluate the solution
-    double precision, dimension(:), pointer :: tau_s
-    ! tau_s = Tau_sigma defined by the frequency range and
-    !             number of standard linear solids
-    integer nf          ! nf    = Number of Frequencies
-    integer nsls        ! nsls  = Number of Standard Linear Solids
-  end type attenuation_simplex_variables
-  type(attenuation_simplex_variables) AS_V
-  ! attenuation_simplex_variables
-
-  integer nf_in, nsls_in
-  double precision Q_in
-  double precision, dimension(nf_in)   :: f_in
-  double precision, dimension(nsls_in) :: tau_s_in
-  integer ier
-
-  allocate(AS_V%f(nf_in), &
-          AS_V%tau_s(nsls_in),stat=ier)
-  if( ier /= 0 ) stop 'error allocating arrays for attenuation simplex'
-
-  AS_V%nf    = nf_in
-  AS_V%nsls  = nsls_in
-  AS_V%f     = f_in
-  AS_V%Q     = Q_in
-  AS_V%iQ    = 1.0d0/AS_V%Q
-  AS_V%tau_s = tau_s_in
-
-  end subroutine attenuation_simplex_setup
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-  double precision function attenuation_eval(Xin,AS_V)
-
-!    - Computes the misfit from a set of relaxation paramters
-!          given a set of frequencies and target attenuation
-!    - Evaluates only at the given frequencies
-!    - Evaluation is done with an L2 norm
-!
-!    Input
-!      Xin = Tau_epsilon, Strain Relaxation Time
-!                Note: Tau_sigma the Stress Relaxation Time is loaded
-!                      with attenuation_simplex_setup and stored in
-!                      attenuation_simplex_variables
-!
-!    Xi = Sum_i^N sqrt [ (1/Qc_i - 1/Qt_i)^2 / 1/Qt_i^2 ]
-!
-!     where Qc_i is the computed attenuation at a specific frequency
-!           Qt_i is the desired attenuaiton at that frequency
-!
-!    Uses attenuation_simplex_variables to store constant values
-!
-!    See atteunation_simplex_setup
-!
-
-  implicit none
-
-  ! attenuation_simplex_variables
-  type attenuation_simplex_variables
-    sequence
-    double precision Q  ! Q     = Desired Value of Attenuation or Q
-    double precision iQ ! iQ    = 1/Q
-    double precision, dimension(:), pointer ::  f
-    ! f = Frequencies at which to evaluate the solution
-    double precision, dimension(:), pointer :: tau_s
-    ! tau_s = Tau_sigma defined by the frequency range and
-    !             number of standard linear solids
-    integer nf          ! nf    = Number of Frequencies
-    integer nsls        ! nsls  = Number of Standard Linear Solids
-  end type attenuation_simplex_variables
-  type(attenuation_simplex_variables) AS_V
-  ! attenuation_simplex_variables
-
-   ! Input
-  double precision, dimension(AS_V%nsls) :: Xin
-  double precision, dimension(AS_V%nsls) :: tau_eps
-
-  double precision, dimension(AS_V%nf)   :: A, B, tan_delta
-
-  integer i
-  double precision xi, iQ2
-
-  tau_eps = Xin
-
-  call attenuation_maxwell(AS_V%nf,AS_V%nsls,AS_V%f,AS_V%tau_s,tau_eps,B,A)
-
-  tan_delta = B / A
-
-  attenuation_eval = 0.0d0
-  iQ2 = AS_V%iQ**2
-  do i = 1,AS_V%nf
-     xi = sqrt(( ( (tan_delta(i) - AS_V%iQ) ** 2 ) / iQ2 ))
-     attenuation_eval = attenuation_eval + xi
-  enddo
-
-  end function attenuation_eval
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine attenuation_maxwell(nf,nsls,f,tau_s,tau_eps,B,A)
-
-!   - Computes the Moduli (Maxwell Solid) for a series of
-!         Standard Linear Solids
-!   - Computes M1 and M2 parameters after Dahlen and Tromp pp.203
-!         here called B and A after Liu et al. 1976
-!   - Another formulation uses Kelvin-Voigt Solids and computes
-!         Compliences J1 and J2 after Dahlen and Tromp pp.203
-!
-!   Input
-!     nf    = Number of Frequencies
-!     nsls  = Number of Standard Linear Solids
-!     f     = Frequencies (in log10 of frequencies)
-!                dimension(nf)
-!     tau_s = Tau_sigma  Stress relaxation time (see References)
-!                dimension(nsls)
-!     tau_eps = Tau_epislon Strain relaxation time (see References)
-!                dimension(nsls)!
-!   Output
-!     B     = Real Moduli      ( M2 Dahlen and Tromp pp.203 )
-!                dimension(nf)
-!     A     = Imaginary Moduli ( M1 Dahlen and Tromp pp.203 )
-!                dimension(nf)
-!
-!   Dahlen and Tromp, 1998
-!      Theoretical Global Seismology
-!
-!   Liu et al. 1976
-!      Velocity dispersion due to anelasticity: implications for seismology and mantle composition
-!      Geophys, J. R. asts. Soc, Vol 47, pp. 41-58
-
-  implicit none
-
-  ! Input
-  integer nf, nsls
-  double precision, dimension(nf)   :: f
-  double precision, dimension(nsls) :: tau_s, tau_eps
-  ! Output
-  double precision, dimension(nf)   :: A,B
-
-  integer i,j
-  double precision w, pi, demon
-
-  PI = 3.14159265358979d0
-
-  A(:) = 1.0d0 -  nsls*1.0d0
-  B(:) = 0.0d0
-  do i = 1,nf
-     w = 2.0d0 * PI * 10**f(i)
-     do j = 1,nsls
-        !        write(*,*)j,tau_s(j),tau_eps(j)
-        demon = 1.0d0 + w**2 * tau_s(j)**2
-        A(i) = A(i) + ((1.0d0 + (w**2 * tau_eps(j) * tau_s(j)))/ demon)
-        B(i) = B(i) + ((w * (tau_eps(j) - tau_s(j))) / demon)
-     end do
-      !     write(*,*)A(i),B(i),10**f(i)
-  enddo
-
-  end subroutine attenuation_maxwell
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-  subroutine fminsearch(funk, x, n, itercount, tolf, prnt, err, AS_V)
-
-! subroutine fminsearch
-!   - Computes the minimization of funk(x(n)) using the simplex method
-!   - This subroutine is copied from Matlab fminsearch.m
-!         and modified to suit my nefarious needs
-!   Input
-!     funk = double precision function with one input parameter
-!                double precision function the_funk(x)
-!     x    = Input/Output
-!               variables to be minimized
-!               dimension(n)
-!            Input:  Initial Value
-!            Output: Mimimized Value
-!     n    = number of variables
-!     itercount = Input/Output
-!                 Input:  maximum number of iterations
-!                         if < 0 default is used (200 * n)
-!                 Output: total number of iterations on output
-!     tolf      = Input/Output
-!                 Input:  minimium tolerance of the function funk(x)
-!                 Output: minimium value of funk(x)(i.e. "a" solution)
-!     prnt      = Input
-!                 3 => report every iteration
-!                 4 => report every iteration, total simplex
-!     err       = Output
-!                 0 => Normal exeecution, converged within desired range
-!                 1 => Function Evaluation exceeded limit
-!                 2 => Iterations exceeded limit
-!
-!     See Matlab fminsearch
-
-  implicit none
-
-! attenuation_simplex_variables
-  type attenuation_simplex_variables
-    sequence
-    double precision Q  ! Q     = Desired Value of Attenuation or Q
-    double precision iQ ! iQ    = 1/Q
-    double precision, dimension(:), pointer ::  f
-    ! f = Frequencies at which to evaluate the solution
-    double precision, dimension(:), pointer :: tau_s
-    ! tau_s = Tau_sigma defined by the frequency range and
-    !             number of standard linear solids
-    integer nf          ! nf    = Number of Frequencies
-    integer nsls        ! nsls  = Number of Standard Linear Solids
-  end type attenuation_simplex_variables
-
-  type(attenuation_simplex_variables) AS_V
-! attenuation_simplex_variables
-
-  ! Input
-  double precision, external :: funk
-
-  integer n
-  double precision x(n) ! Also Output
-  integer itercount, prnt, err
-  double precision tolf
-
-  !Internal
-  integer i,j, how
-  integer, parameter :: none             = 0
-  integer, parameter :: initial          = 1
-  integer, parameter :: expand           = 2
-  integer, parameter :: reflect          = 3
-  integer, parameter :: contract_outside = 4
-  integer, parameter :: contract_inside  = 5
-  integer, parameter :: shrink           = 6
-
-  integer maxiter, maxfun
-  integer func_evals
-  double precision tolx
-
-  double precision rho, chi, psi, sigma
-  double precision xin(n), y(n), v(n,n+1), fv(n+1)
-  double precision vtmp(n,n+1)
-  double precision usual_delta, zero_term_delta
-  double precision xbar(n), xr(n), fxr, xe(n), fxe, xc(n), fxc, fxcc, xcc(n)
-  integer place(n+1)
-
-  double precision max_size_simplex, max_value
-
-  rho   = 1.0d0
-  chi   = 2.0d0
-  psi   = 0.5d0
-  sigma = 0.5d0
-
-
-  if(itercount > 0) then
-     maxiter = itercount
-  else
-     maxiter = 200 * n
-  endif
-  itercount = 0
-  maxfun  = 200 * n
-
-  if(tolf > 0.0d0) then
-     tolx = 1.0e-4
-  else
-     tolx = 1.0e-4
-     tolf = 1.0e-4
-  endif
-
-  err = 0
-
-  xin    = x
-  v(:,:) = 0.0d0
-  fv(:)  = 0.0d0
-
-  v(:,1) = xin
-  x      = xin
-
-  fv(1) = funk(xin,AS_V)
-
-  usual_delta = 0.05
-  zero_term_delta = 0.00025
-
-  do j = 1,n
-     y = xin
-     if(y(j) /= 0.0d0) then
-        y(j) = (1.0d0 + usual_delta) * y(j)
-     else
-        y(j) = zero_term_delta
-     endif
-     v(:,j+1) = y
-     x(:) = y
-     fv(j+1) = funk(x,AS_V)
-  enddo
-
-  call qsort_local(fv,n+1,place)
-
-  do i = 1,n+1
-     vtmp(:,i) = v(:,place(i))
-  enddo
-  v = vtmp
-
-  how = initial
-  itercount = 1
-  func_evals = n+1
-  if(prnt == 3) then
-     write(*,*)'Iterations   Funk Evals   Value How'
-     write(*,*)itercount, func_evals, fv(1), how
-  endif
-  if(prnt == 4) then
-     write(*,*)'How: ',how
-     write(*,*)'V: ', v
-     write(*,*)'fv: ',fv
-     write(*,*)'evals: ',func_evals
-  endif
-
-  do while (func_evals < maxfun .AND. itercount < maxiter)
-
-     if(max_size_simplex(v,n) <= tolx .AND. &
-          max_value(fv,n+1) <= tolf) then
-        goto 666
-     endif
-     how = none
-
-     ! xbar = average of the n (NOT n+1) best points
-     !     xbar = sum(v(:,1:n), 2)/n
-     xbar(:) = 0.0d0
-     do i = 1,n
-        do j = 1,n
-           xbar(i) = xbar(i) + v(i,j)
-        enddo
-        xbar(i) = xbar(i) / (n*1.0d0)
-     enddo
-     xr = (1 + rho)*xbar - rho*v(:,n+1)
-     x(:) = xr
-     fxr = funk(x,AS_V)
-     func_evals = func_evals + 1
-     if (fxr < fv(1)) then
-        ! Calculate the expansion point
-        xe = (1 + rho*chi)*xbar - rho*chi*v(:,n+1)
-        x = xe
-        fxe = funk(x,AS_V)
-        func_evals = func_evals+1
-        if (fxe < fxr) then
-           v(:,n+1) = xe
-           fv(n+1) = fxe
-           how = expand
-        else
-           v(:,n+1) = xr
-           fv(n+1) = fxr
-           how = reflect
-        endif
-     else ! fv(:,1) <= fxr
-        if (fxr < fv(n)) then
-           v(:,n+1) = xr
-           fv(n+1) = fxr
-           how = reflect
-        else ! fxr >= fv(:,n)
-           ! Perform contraction
-           if (fxr < fv(n+1)) then
-              ! Perform an outside contraction
-              xc = (1 + psi*rho)*xbar - psi*rho*v(:,n+1)
-              x(:) = xc
-              fxc = funk(x,AS_V)
-              func_evals = func_evals+1
-
-              if (fxc <= fxr) then
-                 v(:,n+1) = xc
-                 fv(n+1) = fxc
-                 how = contract_outside
-              else
-                 ! perform a shrink
-                 how = shrink
-              endif
-           else
-              ! Perform an inside contraction
-              xcc = (1-psi)*xbar + psi*v(:,n+1)
-              x(:) = xcc
-              fxcc = funk(x,AS_V)
-              func_evals = func_evals+1
-
-              if (fxcc < fv(n+1)) then
-                 v(:,n+1) = xcc
-                 fv(n+1) = fxcc
-                 how = contract_inside
-              else
-                 ! perform a shrink
-                 how = shrink
-              endif
-           endif
-           if (how == shrink) then
-              do j=2,n+1
-                 v(:,j)=v(:,1)+sigma*(v(:,j) - v(:,1))
-                 x(:) = v(:,j)
-                 fv(j) = funk(x,AS_V)
-              enddo
-              func_evals = func_evals + n
-           endif
-        endif
-     endif
-
-     call qsort_local(fv,n+1,place)
-     do i = 1,n+1
-        vtmp(:,i) = v(:,place(i))
-     enddo
-     v = vtmp
-
-     itercount = itercount + 1
-     if (prnt == 3) then
-        write(*,*)itercount, func_evals, fv(1), how
-     elseif (prnt == 4) then
-        write(*,*)
-        write(*,*)'How: ',how
-        write(*,*)'v: ',v
-        write(*,*)'fv: ',fv
-        write(*,*)'evals: ',func_evals
-     endif
-  enddo
-
-  if(func_evals > maxfun) then
-     write(*,*)'function evaluations exceeded prescribed limit', maxfun
-     err = 1
-  endif
-  if(itercount > maxiter) then
-     write(*,*)'iterations exceeded prescribed limit', maxiter
-     err = 2
-  endif
-
-666 continue
-  x = v(:,1)
-  tolf = fv(1)
-
-  end subroutine fminsearch
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  double precision function max_value(fv,n)
-
-!    - Finds the maximim value of the difference of between the first
-!          value and the remaining values of a vector
-!    Input
-!      fv = Input
-!             Vector
-!             dimension(n)
-!      n  = Input
-!             Length of fv
-!
-!      Returns:
-!         Xi = max( || fv(1)- fv(i) || ) for i=2:n
-!
-
-  implicit none
-  integer n
-  double precision fv(n)
-
-  integer i
-  double precision m, z
-
-  m = 0.0d0
-  do i = 2,n
-     z = abs(fv(1) - fv(i))
-     if(z > m) then
-        m = z
-     endif
-  enddo
-
-  max_value = m
-
-  end function max_value
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  double precision function max_size_simplex(v,n)
-
-!   - Determines the maximum distance between two point in a simplex
-!   Input
-!     v  = Input
-!            Simplex Verticies
-!            dimension(n, n+1)
-!     n  = Pseudo Length of n
-!
-!     Returns:
-!       Xi = max( max( || v(:,1) - v(:,i) || ) ) for i=2:n+1
-!
-
-  implicit none
-  integer n
-  double precision v(n,n+1)
-
-  integer i,j
-  double precision m, z
-
-  m = 0.0d0
-  do i = 1,n
-     do j = 2,n+1
-        z = abs(v(i,j) - v(i,1))
-        if(z > m) then
-           m = z
-        endif
-     enddo
-  enddo
-
-  max_size_simplex = m
-
-  end function max_size_simplex
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-  subroutine qsort_local(X,n,I)
-
-!    - Implementation of a Bubble Sort Routine
-!    Input
-!      X = Input/Output
-!         Vector to be sorted
-!         dimension(n)
-!      n = Input
-!         Length of X
-!      I = Output
-!         Sorted Indicies of vecotr X
-!
-!      Example:
-!         X = [ 4 3 1 2 ] on Input
-!         I = [ 1 2 3 4 ] Computed Internally (in order)
-!
-!         X = [ 1 2 3 4 ] on Output
-!         I = [ 3 4 2 1 ] on Output
-!
-
-  implicit none
-
-  integer n
-  double precision X(n)
-  integer I(n)
-
-  integer j,k
-  double precision rtmp
-  integer itmp
-
-  do j = 1,n
-     I(j) = j
-  enddo
-
-  do j = 1,n
-     do k = 1,n-j
-        if(X(k+1) < X(k)) then
-           rtmp   = X(k)
-           X(k)   = X(k+1)
-           X(k+1) = rtmp
-
-           itmp   = I(k)
-           I(k)   = I(k+1)
-           I(k+1) = itmp
-        endif
-     enddo
-  enddo
-
-  end subroutine qsort_local
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine attenuation_simplex_finish(AS_V)
-
-  implicit none
-
-  ! attenuation_simplex_variables
-  type attenuation_simplex_variables
-    sequence
-    double precision Q  ! Q     = Desired Value of Attenuation or Q
-    double precision iQ ! iQ    = 1/Q
-    double precision, dimension(:), pointer ::  f
-    ! f = Frequencies at which to evaluate the solution
-    double precision, dimension(:), pointer :: tau_s
-    ! tau_s = Tau_sigma defined by the frequency range and
-    !             number of standard linear solids
-    integer nf          ! nf    = Number of Frequencies
-    integer nsls        ! nsls  = Number of Standard Linear Solids
-  end type attenuation_simplex_variables
-  type(attenuation_simplex_variables) AS_V
-  ! attenuation_simplex_variables
-
-  deallocate(AS_V%f)
-  deallocate(AS_V%tau_s)
-
-  end subroutine attenuation_simplex_finish
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+  subroutine get_attenuation_model_olsen( vs_val, Q_mu )
+
+! uses scaling rule similar to Olsen et al. (2003) to determine attenuation medium
+!
+! returns: selected (sediment) Q_mu
+!
+! refers to:
+!   K. B. Olsen, S. M. Day and C. R. Bradley, 2003.
+!   Estimation of Q for Long-Period (>2 sec) Waves in the Los Angeles Basin
+!   BSSA, 93, 2, p. 627-638
+
+  implicit none
+
+  include "constants.h"
+
+  real(kind=CUSTOM_REAL) :: vs_val
+  double precision :: Q_mu
+
+  !local parameters
+  integer :: int_Q_mu
+
+  ! two variations of scaling rule handling
+  logical,parameter :: USE_SIMPLE_OLSEN = .false.
+  logical,parameter :: USE_DISCRETE_OLSEN = .true.
+
+  ! uses rule Q_mu = constant * v_s
+  ! v_s in m/s
+  Q_mu = OLSEN_ATTENUATION_RATIO * vs_val
+
+  ! uses a simple, 2-constant model mentioned in Olsen et al. (2003)
+  if( USE_SIMPLE_OLSEN ) then
+    ! vs (in m/s)
+    if( vs_val < 2000.0_CUSTOM_REAL ) then
+      Q_mu = 0.02 * vs_val
+    else
+      Q_mu = 0.1 * vs_val
+    endif
+  endif
+
+  ! uses discrete values in sediment range
+  if( USE_DISCRETE_OLSEN ) then
+    int_Q_mu = 10 * nint(Q_mu / 10.)
+
+    if(int_Q_mu < 40) int_Q_mu = 40
+    if(int_Q_mu > 150) int_Q_mu = 150
+
+    if(int_Q_mu == 40) then
+      Q_mu = 40.0d0
+    else if(int_Q_mu == 50) then
+      Q_mu = 50.0d0
+    else if(int_Q_mu == 60) then
+      Q_mu = 60.0d0
+    else if(int_Q_mu == 70) then
+      Q_mu = 70.0d0
+    else if(int_Q_mu == 80) then
+      Q_mu = 80.0d0
+    else if(int_Q_mu == 90) then
+      Q_mu = 90.0d0
+    else if(int_Q_mu == 100) then
+      Q_mu = 100.0d0
+    else if(int_Q_mu == 110) then
+      Q_mu = 110.0d0
+    else if(int_Q_mu == 120) then
+      Q_mu = 120.0d0
+    else if(int_Q_mu == 130) then
+      Q_mu = 130.0d0
+    else if(int_Q_mu == 140) then
+      Q_mu = 140.0d0
+    else if(int_Q_mu == 150) then
+      Q_mu = 150.0d0
+    else
+      stop 'incorrect attenuation coefficient'
+    endif
+  endif
+
+  ! limits Q_mu value range
+  if( Q_mu < 1.0d0 ) Q_mu = 1.0d0
+  if( Q_mu > ATTENUATION_COMP_MAXIMUM ) Q_mu = ATTENUATION_COMP_MAXIMUM
+
+
+  end subroutine get_attenuation_model_olsen
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine get_attenuation_model(myrank,nspec,USE_OLSEN_ATTENUATION, &
+                          mustore,rho_vs,qmu_attenuation_store, &
+                          ispec_is_elastic,min_resolved_period,prname)
+
+! precalculates attenuation arrays and stores arrays into files
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: myrank,nspec
+  logical :: USE_OLSEN_ATTENUATION
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: mustore
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rho_vs
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: qmu_attenuation_store
+
+  logical, dimension(nspec) :: ispec_is_elastic
+  real(kind=CUSTOM_REAL) :: min_resolved_period
+  character(len=256) :: prname
+
+  ! local parameters
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: one_minus_sum_beta
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: factor_common
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: scale_factor
+  double precision, dimension(N_SLS) :: tau_sigma_dble,beta_dble
+  double precision factor_scale_dble,one_minus_sum_beta_dble
+  double precision :: Q_mu
+  double precision :: f_c_source
+  double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: tau_sigma
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: tauinv
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: beta
+  real(kind=CUSTOM_REAL):: vs_val
+  integer :: i,j,k,ispec,ier
+  double precision :: qmin,qmax,qmin_all,qmax_all
+
+  ! initializes arrays
+  allocate(one_minus_sum_beta(NGLLX,NGLLY,NGLLZ,nspec), &
+          factor_common(N_SLS,NGLLX,NGLLY,NGLLZ,nspec), &
+          scale_factor(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocation attenuation arrays')
+
+  one_minus_sum_beta(:,:,:,:) = 1._CUSTOM_REAL
+  factor_common(:,:,:,:,:) = 1._CUSTOM_REAL
+  scale_factor(:,:,:,:) = 1._CUSTOM_REAL
+
+
+  ! gets stress relaxation times tau_sigma, i.e.
+  ! precalculates tau_sigma depending on period band (constant for all Q_mu), and
+  ! determines central frequency f_c_source of attenuation period band
+  call get_attenuation_constants(min_resolved_period,tau_sigma_dble,&
+              f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+  ! user output
+  if( myrank == 0 ) then
+    write(IMAIN,*)
+    write(IMAIN,*) "attenuation: "
+    write(IMAIN,*) "  reference period (s)   : ",sngl(1.0/ATTENUATION_f0_REFERENCE), &
+                  " frequency: ",sngl(ATTENUATION_f0_REFERENCE)
+    write(IMAIN,*) "  period band min/max (s): ",sngl(MIN_ATTENUATION_PERIOD),sngl(MAX_ATTENUATION_PERIOD)
+    write(IMAIN,*) "  central period (s)     : ",sngl(1.0/f_c_source), &
+                  " frequency: ",sngl(f_c_source)
+  endif
+
+  ! determines inverse of tau_sigma
+  if(CUSTOM_REAL == SIZE_REAL) then
+    tau_sigma(:) = sngl(tau_sigma_dble(:))
+  else
+    tau_sigma(:) = tau_sigma_dble(:)
+  endif
+  ! precalculates the negative inverse of tau_sigma
+  tauinv(:) = - 1._CUSTOM_REAL / tau_sigma(:)
+
+  ! precalculates factors for shear modulus scaling according to attenuation model
+  qmin = HUGEVAL
+  qmax = 0.0
+  do ispec = 1,nspec
+
+    ! skips non elastic elements
+    if( ispec_is_elastic(ispec) .eqv. .false. ) cycle
+
+    ! determines attenuation factors for each GLL point
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          ! shear moduli attenuation
+          ! gets Q_mu value
+          if(USE_OLSEN_ATTENUATION) then
+            ! use scaling rule similar to Olsen et al. (2003)
+            vs_val = mustore(i,j,k,ispec) / rho_vs(i,j,k,ispec)
+            call get_attenuation_model_olsen( vs_val, Q_mu )
+          else
+            ! takes Q set in (CUBIT) mesh
+            Q_mu = qmu_attenuation_store(i,j,k,ispec)
+
+            ! attenuation zero
+            if( Q_mu <= 1.e-5 ) cycle
+
+            ! limits Q
+            if( Q_mu < 1.0d0 ) Q_mu = 1.0d0
+            if( Q_mu > ATTENUATION_COMP_MAXIMUM ) Q_mu = ATTENUATION_COMP_MAXIMUM
+
+          endif
+          ! statistics
+          if( Q_mu < qmin ) qmin = Q_mu
+          if( Q_mu > qmax ) qmax = Q_mu
+
+          ! gets beta, on_minus_sum_beta and factor_scale
+          ! based on calculation of strain relaxation times tau_eps
+          call get_attenuation_factors(myrank,Q_mu,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD, &
+                            f_c_source,tau_sigma_dble, &
+                            beta_dble,one_minus_sum_beta_dble,factor_scale_dble)
+
+          ! stores factor for unrelaxed parameter
+          one_minus_sum_beta(i,j,k,ispec) = one_minus_sum_beta_dble
+
+          ! stores factor for runge-kutta scheme
+          ! using factor for modulus defect Delta M_i = - M_relaxed
+          ! see e.g. Savage et al. (BSSA, 2010): eq. 11
+          !     precomputes factor: 2 ( 1 - tau_eps_i / tau_sigma_i ) / tau_sigma_i
+          beta(:) = beta_dble(:)
+          factor_common(:,i,j,k,ispec) = 2._CUSTOM_REAL * beta(:) * tauinv(:)
+
+          ! stores scale factor for mu moduli
+          scale_factor(i,j,k,ispec) = factor_scale_dble
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! stores attenuation arrays into files
+  open(unit=27, file=prname(1:len_trim(prname))//'attenuation.bin', &
+        status='unknown',action='write',form='unformatted',iostat=ier)
+  if( ier /= 0 ) then
+    print*,'error: could not open ',prname(1:len_trim(prname))//'attenuation.bin'
+    call exit_mpi(myrank,'error opening attenuation.bin file')
+  endif
+  write(27) nspec
+  write(27) one_minus_sum_beta
+  write(27) factor_common
+  write(27) scale_factor
+  close(27)
+
+  deallocate(one_minus_sum_beta,factor_common,scale_factor)
+
+  ! statistics
+  call min_all_dp(qmin,qmin_all)
+  call max_all_dp(qmax,qmax_all)
+  ! user output
+  if( myrank == 0 ) then
+    write(IMAIN,*) "  Q_mu min/max           : ",sngl(qmin_all),sngl(qmax_all)
+    write(IMAIN,*)
+  endif
+
+  end subroutine get_attenuation_model
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine get_attenuation_memory_values(tau_s, deltat, alphaval,betaval,gammaval)
+
+! returns: runge-kutta scheme terms alphaval, betaval and gammaval
+
+  implicit none
+
+  include 'constants.h'
+
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: tau_s, alphaval, betaval,gammaval
+  real(kind=CUSTOM_REAL) :: deltat
+
+  ! local parameter
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: tauinv
+
+  ! inverse of tau_s
+  tauinv(:) = - 1._CUSTOM_REAL / tau_s(:)
+
+  ! runge-kutta coefficients
+  ! see e.g.: Savage et al. (BSSA, 2010): eq. (11)
+  alphaval(:) = 1.0 + deltat*tauinv(:) + deltat**2 * tauinv(:)**2 / 2._CUSTOM_REAL &
+                    + deltat**3 * tauinv(:)**3 / 6._CUSTOM_REAL &
+                    + deltat**4 * tauinv(:)**4 / 24._CUSTOM_REAL
+  betaval(:) = deltat / 2._CUSTOM_REAL + deltat**2 * tauinv(:) / 3._CUSTOM_REAL &
+                   + deltat**3 * tauinv(:)**2 / 8._CUSTOM_REAL &
+                   + deltat**4 * tauinv(:)**3 / 24._CUSTOM_REAL
+  gammaval(:) = deltat / 2._CUSTOM_REAL + deltat**2 * tauinv(:) / 6._CUSTOM_REAL &
+                    + deltat**3 * tauinv(:)**2 / 24._CUSTOM_REAL
+
+  end subroutine get_attenuation_memory_values
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine get_attenuation_constants(min_resolved_period,tau_sigma, &
+                              f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+! returns: period band constants tau_sigma and center frequency f_c_source
+
+  implicit none
+
+  include "constants.h"
+
+  real(kind=CUSTOM_REAL) :: min_resolved_period
+  double precision, dimension(N_SLS) :: tau_sigma
+  double precision :: f_c_source
+  double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
+
+  ! local parameters
+  real(kind=CUSTOM_REAL)  :: min_period
+
+  ! determines min/max periods for attenuation band based on minimum resolved period of mesh
+  min_period = 0.99 * min_resolved_period ! uses a small margin
+  ! debug for comparison with fix values from above
+  !min_resolved_period = 0.943
+  call get_attenuation_periods(min_period,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+  !  sets up stress relaxation times tau_sigma,
+  ! equally spaced based on number of standard linear solids and period band
+  call get_attenuation_tau_sigma(tau_sigma,N_SLS,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+  ! sets up central frequency
+  ! logarithmic mean of frequency interval of absorption band
+  call get_attenuation_source_freq(f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+  ! debug
+  !f_c_source = 0.141421d0
+  !tau_sigma(1) =  7.957747154594766669788441504352d0
+  !tau_sigma(2) =  1.125395395196382652969191440206d0
+  !tau_sigma(3) =  0.159154943091895345608222100964d0
+
+  end subroutine get_attenuation_constants
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine get_attenuation_factors(myrank,Q_mu,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD, &
+                              f_c_source,tau_sigma, &
+                              beta,one_minus_sum_beta,factor_scale)
+
+! returns: attenuation mechanisms beta,one_minus_sum_beta,factor_scale
+
+! variable frequency range
+! variable period range
+! variable central logarithmic frequency
+
+! in the future when more memory is available on computers
+! it would be more accurate to use four mechanisms instead of three
+
+
+  implicit none
+
+  include "constants.h"
+
+  integer:: myrank
+  double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
+  double precision :: f_c_source,Q_mu
+  double precision, dimension(N_SLS) :: tau_sigma
+  double precision, dimension(N_SLS) :: beta
+  double precision :: one_minus_sum_beta
+  double precision :: factor_scale
+  ! local parameters
+  double precision, dimension(N_SLS) :: tau_eps
+
+  ! determines tau_eps for Q_mu
+  call get_attenuation_tau_eps(Q_mu,tau_sigma,tau_eps, &
+                              MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+  ! determines one_minus_sum_beta
+  call get_attenuation_property_values(tau_sigma,tau_eps,beta,one_minus_sum_beta)
+
+  ! determines the "scale factor"
+  call get_attenuation_scale_factor(myrank,f_c_source,tau_eps,tau_sigma,Q_mu,factor_scale)
+
+  end subroutine get_attenuation_factors
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine get_attenuation_property_values(tau_s, tau_eps, beta, one_minus_sum_beta)
+
+! coefficients useful for calculation between relaxed and unrelaxed moduli
+!
+! returns: coefficients beta, one_minus_sum_beta
+
+  implicit none
+
+  include "constants.h"
+
+  double precision,dimension(N_SLS),intent(in) :: tau_s, tau_eps
+  double precision,dimension(N_SLS),intent(out) :: beta
+  double precision,intent(out):: one_minus_sum_beta
+
+  ! local parameters
+  double precision,dimension(N_SLS) :: tauinv
+  integer :: i
+
+  ! inverse of stress relaxation times
+  tauinv(:) = -1.0d0 / tau_s(:)
+
+  ! see e.g. Komatitsch & Tromp 1999, eq. (7)
+
+  ! coefficients beta
+  beta(:) = 1.0d0 - tau_eps(:) / tau_s(:)
+
+  ! sum of coefficients beta
+  one_minus_sum_beta = 1.0d0
+  do i = 1,N_SLS
+     one_minus_sum_beta = one_minus_sum_beta - beta(i)
+  enddo
+
+  end subroutine get_attenuation_property_values
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine get_attenuation_scale_factor(myrank, f_c_source, tau_eps, tau_sigma, Q_mu, scale_factor)
+
+! returns: physical dispersion scaling factor scale_factor
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: myrank
+  double precision :: scale_factor, Q_mu, f_c_source
+  ! strain and stress relaxation times
+  double precision, dimension(N_SLS) :: tau_eps, tau_sigma
+
+  ! local parameters
+  double precision w_c_source
+  double precision factor_scale_mu0, factor_scale_mu
+  double precision a_val, b_val
+  double precision big_omega
+  integer i
+
+
+  !--- compute central angular frequency of source (non dimensionalized)
+  w_c_source = TWO_PI * f_c_source
+
+
+  !--- quantity by which to scale mu_0 to get mu
+  ! this formula can be found for instance in
+  ! Liu, H. P., Anderson, D. L. and Kanamori, H., Velocity dispersion due to
+  ! anelasticity: implications for seismology and mantle composition,
+  ! Geophys. J. R. Astron. Soc., vol. 47, pp. 41-58 (1976)
+  ! and in Aki, K. and Richards, P. G., Quantitative seismology, theory and methods,
+  ! W. H. Freeman, (1980), second edition, sections 5.5 and 5.5.2, eq. (5.81) p. 170
+  factor_scale_mu0 = ONE + TWO * log(f_c_source / ATTENUATION_f0_REFERENCE ) / (PI * Q_mu)
+
+  !--- compute a, b and Omega parameters
+  ! see e.g.:
+  !   Liu et al. (1976): eq. 25
+  !   using complex modulus Mc = M_R / ( A - i B )
+  !   or
+  !   Savage et al. (BSSA, 2010): eq. (5) and (6)
+  !   complex modulus: M(t) = M_1(t) + i M_2(t)
+  a_val = ONE
+  b_val = ZERO
+  do i = 1,N_SLS
+    ! real part M_1 of complex modulus
+    a_val = a_val - w_c_source * w_c_source * tau_eps(i) * &
+      (tau_eps(i) - tau_sigma(i)) / (1.d0 + w_c_source * w_c_source * tau_eps(i) * tau_eps(i))
+    ! imaginary part M_2 of complex modulus
+    b_val = b_val + w_c_source * (tau_eps(i) - tau_sigma(i)) / &
+      (1.d0 + w_c_source * w_c_source * tau_eps(i) * tau_eps(i))
+  enddo
+
+  ! see e.g. Liu et al. (1976): Omega used in equation (20)
+  big_omega = a_val * ( sqrt(1.d0 + b_val*b_val/(a_val*a_val)) - 1.d0 )
+
+  !--- quantity by which to scale mu to get mu_relaxed
+  factor_scale_mu = b_val * b_val / (TWO * big_omega)
+
+  !--- total factor by which to scale mu0
+  scale_factor = factor_scale_mu * factor_scale_mu0
+
+  !--- check that the correction factor is close to one
+  if(scale_factor < 0.7 .or. scale_factor > 1.3) then
+     write(*,*) "error : in get_attenuation_scale_factor() "
+     write(*,*) "  scale factor: ", scale_factor, " should be between 0.7 and 1.3"
+     write(*,*) "  please check your reference frequency ATTENUATION_f0_REFERENCE in constants.h"
+     call exit_MPI(myrank,'incorrect correction factor in attenuation model')
+  endif
+
+  end subroutine get_attenuation_scale_factor
+
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+!compare: auto_ner.f90, GLOBE package
+
+  subroutine get_attenuation_periods(min_resolved_period,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+! determines min/max periods for attenuation based upon mininum resolved period of mesh
+
+  implicit none
+
+  include "constants.h"
+
+  real(kind=CUSTOM_REAL),intent(in) :: min_resolved_period
+  double precision,intent(out) :: MIN_ATTENUATION_PERIOD, MAX_ATTENUATION_PERIOD
+
+  ! local parameters
+  double precision :: THETA(5)
+
+  ! checks number of standard linear solids
+  if(N_SLS < 2 .OR. N_SLS > 5) then
+     stop 'N_SLS must be greater than 1 or less than 6'
+  endif
+
+  ! THETA defines the width of the Attenation Range in Decades
+  !   The number defined here were determined by minimizing
+  !   the "flatness" of the absoption spectrum.  Each THETA
+  !   is defined for a particular N_SLS (constants.h)
+  !   THETA(2) is for N_SLS = 2
+  THETA(1)           =   0.00d0
+  THETA(2)           =   0.75d0
+  THETA(3)           =   1.75d0
+  THETA(4)           =   2.25d0
+  THETA(5)           =   2.85d0
+
+  ! Compute Min Attenuation Period
+  !
+  ! The Minimum attenuation period = (Grid Spacing in km) / V_min
+  !  Grid spacing in km     = Width of an element in km * spacing for GLL point * points per wavelength
+
+  MIN_ATTENUATION_PERIOD = min_resolved_period
+
+
+  ! Compute Max Attenuation Period
+  !
+  ! The max attenuation period for 3 SLS is optimally
+  !   1.75 decades from the min attenuation period, see THETA above
+  !
+  ! this uses: theta = log( T_max / T_min ) to calculate T_max for a given T_min
+
+  MAX_ATTENUATION_PERIOD = MIN_ATTENUATION_PERIOD * 10.0d0**THETA(N_SLS)
+
+  end subroutine get_attenuation_periods
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine get_attenuation_tau_sigma(tau_s, nsls, min_period, max_period)
+
+! Determines stress relaxation times tau_sigma
+! Sets the Tau_sigma (tau_s) to be equally spaced in log10 frequency
+
+  implicit none
+
+  integer :: nsls
+  double precision,intent(in) :: min_period, max_period
+  double precision,intent(out) :: tau_s(nsls)
+  ! local parameters
+  double precision :: f1, f2
+  double precision :: exp1, exp2
+  double precision :: dexp
+  integer :: i
+  double precision, parameter :: PI = 3.14159265358979d0
+
+  ! min/max frequencies
+  f1 = 1.0d0 / max_period
+  f2 = 1.0d0 / min_period
+
+  ! logarithms
+  exp1 = log10(f1)
+  exp2 = log10(f2)
+
+  ! equally spaced in log10 frequency
+  dexp = (exp2-exp1) / ((nsls*1.0d0) - 1)
+  do i = 1,nsls
+     tau_s(i) = 1.0 / (PI * 2.0d0 * 10**(exp1 + (i - 1)* 1.0d0 *dexp))
+  enddo
+
+  end subroutine get_attenuation_tau_sigma
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine get_attenuation_source_freq(f_c_source,min_period,max_period)
+
+! Determines the Source Frequency
+
+  implicit none
+
+  double precision,intent(out) :: f_c_source
+  double precision,intent(in) :: min_period, max_period
+
+  ! local parameters
+  double precision f1, f2,T_c_source
+
+  ! min/max frequencies
+  f1 = 1.0d0 / max_period
+  f2 = 1.0d0 / min_period
+
+  T_c_source =  1.0e+03 * 10.0d0**(0.5 * (log10(f1) + log10(f2)))
+  ! central frequency
+  f_c_source = T_c_source / 1000.0d0
+
+  end subroutine get_attenuation_source_freq
+
+!--------------------------------------------------------------------------------------------------
+!
+!  This portion of the SPECFEM3D Code was written by:
+!  Brian Savage while at
+!     California Institute of Technology
+!     Department of Terrestrial Magnetism / Carnegie Institute of Washington
+!     Univeristy of Rhode Island
+!
+!  <savage at uri.edu>.
+!  <savage13 at gps.caltech.edu>
+!  <savage13 at dtm.ciw.edu>
+!
+!   It is based upon formulation in the following references:
+!
+!   Dahlen and Tromp, 1998
+!      Theoretical Global Seismology
+!
+!   Liu et al. 1976
+!      Velocity dispersion due to anelasticity: implications for seismology and mantle composition
+!      Geophys, J. R. asts. Soc, Vol 47, pp. 41-58
+!
+!   The methodology can be found in:
+!       Savage, B, D. Komatitsch and J. Tromp, 2010.
+!       Effects of 3D Attenuation on Seismic Wave Amplitude and Phase Measurements
+!       BSSA, 100, 3, p. 1241-1251.
+!
+! modifications:
+!  - minor modifications by Daniel Peter, november 2010
+!--------------------------------------------------------------------------------------------------
+
+  subroutine get_attenuation_tau_eps(Qmu_in,tau_s,tau_eps,min_period,max_period)
+
+! includes min_period, max_period, and N_SLS
+!
+! returns: determines strain relaxation times tau_eps
+
+  implicit none
+
+  include 'constants.h'
+
+! model_attenuation_variables
+!...
+
+  double precision :: Qmu_in
+  double precision, dimension(N_SLS) :: tau_s, tau_eps
+  double precision :: min_period,max_period
+
+  ! local parameters
+  integer :: rw
+  ! model_attenuation_storage_var
+  type model_attenuation_storage_var
+    sequence
+    double precision, dimension(:,:), pointer :: tau_eps_storage
+    double precision, dimension(:), pointer :: Qmu_storage
+    integer Q_resolution
+    integer Q_max
+  end type model_attenuation_storage_var
+  type (model_attenuation_storage_var) AM_S
+  ! attenuation_simplex_variables
+  type attenuation_simplex_variables
+    sequence
+    double precision Q  ! Q     = Desired Value of Attenuation or Q
+    double precision iQ ! iQ    = 1/Q
+    double precision, dimension(:), pointer ::  f
+    ! f = Frequencies at which to evaluate the solution
+    double precision, dimension(:), pointer :: tau_s
+    ! tau_s = Tau_sigma defined by the frequency range and
+    !             number of standard linear solids
+    integer nf          ! nf    = Number of Frequencies
+    integer nsls        ! nsls  = Number of Standard Linear Solids
+  end type attenuation_simplex_variables
+  type(attenuation_simplex_variables) AS_V
+
+  ! READ
+  rw = 1
+  call model_attenuation_storage(Qmu_in, tau_eps, rw, AM_S)
+  if(rw > 0) return
+
+  call attenuation_invert_by_simplex(min_period, max_period, N_SLS, Qmu_in, tau_s, tau_eps, AS_V)
+
+  ! WRITE
+  rw = -1
+  call model_attenuation_storage(Qmu_in, tau_eps, rw, AM_S)
+
+  end subroutine get_attenuation_tau_eps
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine model_attenuation_storage(Qmu, tau_eps, rw, AM_S)
+
+  implicit none
+  include 'constants.h'
+
+! model_attenuation_storage_var
+  type model_attenuation_storage_var
+    sequence
+    double precision, dimension(:,:), pointer :: tau_eps_storage
+    double precision, dimension(:), pointer :: Qmu_storage
+    integer Q_resolution
+    integer Q_max
+  end type model_attenuation_storage_var
+
+  type (model_attenuation_storage_var) AM_S
+! model_attenuation_storage_var
+
+  double precision Qmu, Qmu_new
+  double precision, dimension(N_SLS) :: tau_eps
+  integer rw
+
+  integer Qtmp
+  integer, save :: first_time_called = 1
+  double precision, parameter :: ZERO_TOL = 1.e-5
+  integer ier
+
+  if(first_time_called == 1) then
+     first_time_called       = 0
+     AM_S%Q_resolution = 10**ATTENUATION_COMP_RESOLUTION
+     AM_S%Q_max        = ATTENUATION_COMP_MAXIMUM
+     Qtmp         = AM_S%Q_resolution * AM_S%Q_max
+
+     allocate(AM_S%tau_eps_storage(N_SLS, Qtmp), &
+             AM_S%Qmu_storage(Qtmp),stat=ier)
+     if( ier /= 0 ) stop 'error allocating arrays for attenuation storage'
+     AM_S%Qmu_storage(:) = -1
+  endif
+
+  if(Qmu < 0.0d0 .OR. Qmu > AM_S%Q_max) then
+     write(IMAIN,*) 'Error attenuation_storage()'
+     write(IMAIN,*) 'Attenuation Value out of Range: ', Qmu
+     write(IMAIN,*) 'Attenuation Value out of Range: Min, Max ', 0, AM_S%Q_max
+     call exit_MPI(0, 'Attenuation Value out of Range')
+  endif
+
+  if(rw > 0 .AND. Qmu <= ZERO_TOL) then
+     Qmu = 0.0d0;
+     tau_eps(:) = 0.0d0;
+     return
+  endif
+  ! Generate index for Storage Array
+  ! and Recast Qmu using this index
+  ! Accroding to Brian, use float
+  !Qtmp = Qmu * Q_resolution
+  !Qmu = Qtmp / Q_resolution;
+
+  ! by default: resolution is Q_resolution = 10
+  ! converts Qmu to an array integer index:
+  ! e.g. Qmu = 150.31 -> Qtmp = 150.31 * 10 = int( 1503.10 ) = 1503
+  Qtmp    = Qmu * dble(AM_S%Q_resolution)
+
+  ! rounds to corresponding double value:
+  ! e.g. Qmu_new = dble( 1503 ) / dble(10) = 150.30
+  ! but Qmu_new is not used any further...
+  Qmu_new = dble(Qtmp) / dble(AM_S%Q_resolution)
+
+  if(rw > 0) then
+     ! READ
+     if(AM_S%Qmu_storage(Qtmp) > 0) then
+        ! READ SUCCESSFUL
+        tau_eps(:)   = AM_S%tau_eps_storage(:, Qtmp)
+        Qmu        = AM_S%Qmu_storage(Qtmp)
+        rw = 1
+     else
+        ! READ NOT SUCCESSFUL
+        rw = -1
+     endif
+  else
+     ! WRITE SUCCESSFUL
+     AM_S%tau_eps_storage(:,Qtmp)    = tau_eps(:)
+     AM_S%Qmu_storage(Qtmp)        = Qmu
+     rw = 1
+  endif
+
+  end subroutine model_attenuation_storage
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine attenuation_invert_by_simplex(t2, t1, n, Q_real, tau_s, tau_eps, AS_V)
+
+  implicit none
+
+  ! attenuation_simplex_variables
+  type attenuation_simplex_variables
+    sequence
+    double precision Q  ! Q     = Desired Value of Attenuation or Q
+    double precision iQ ! iQ    = 1/Q
+    double precision, dimension(:), pointer ::  f
+    ! f = Frequencies at which to evaluate the solution
+    double precision, dimension(:), pointer :: tau_s
+    ! tau_s = Tau_sigma defined by the frequency range and
+    !             number of standard linear solids
+    integer nf          ! nf    = Number of Frequencies
+    integer nsls        ! nsls  = Number of Standard Linear Solids
+  end type attenuation_simplex_variables
+  type(attenuation_simplex_variables) AS_V
+  ! attenuation_simplex_variables
+
+  ! Input / Output
+  double precision  t1, t2
+  double precision  Q_real
+!  double precision  omega_not
+  integer  n
+  double precision, dimension(n)   :: tau_s, tau_eps
+
+  ! Internal
+  integer i, iterations, err,prnt
+  double precision f1, f2, exp1,exp2, min_value !, dexp
+  integer, parameter :: nf = 100
+  double precision, dimension(nf) :: f
+  double precision, parameter :: PI = 3.14159265358979d0
+  double precision, external :: attenuation_eval
+
+  ! Values to be passed into the simplex minimization routine
+  iterations = -1
+  min_value  = -1.0e-4
+  err        = 0
+  prnt       = 0
+
+  !allocate(f(nf))
+
+  ! Determine the min and max frequencies
+  f1 = 1.0d0 / t1
+  f2 = 1.0d0 / t2
+
+  ! Determine the exponents of the frequencies
+  exp1 = log10(f1)
+  exp2 = log10(f2)
+
+!  if(f2 < f1 .OR. Q_real < 0.0d0 .OR. n < 1) then
+!     call exit_MPI(0, 'frequencies flipped or Q less than zero or N_SLS < 0')
+!  endif
+
+  ! Determine the Source frequency
+!  omega_not =  1.0e+03 * 10.0d0**(0.5 * (log10(f1) + log10(f2)))
+
+
+  ! Determine the Frequencies at which to compare solutions
+  !   The frequencies should be equally spaced in log10 frequency
+  do i = 1,nf
+     f(i) = exp1 + ((i-1)*1.0d0 * (exp2-exp1) / ((nf-1)*1.0d0))
+  enddo
+
+  ! Set the Tau_sigma (tau_s) to be equally spaced in log10 frequency
+!  dexp = (exp2-exp1) / ((n*1.0d0) - 1)
+!  do i = 1,n
+!     tau_s(i) = 1.0 / (PI * 2.0d0 * 10**(exp1 + (i - 1)* 1.0d0 *dexp))
+!  enddo
+
+
+  ! Shove the paramters into the module
+  call attenuation_simplex_setup(nf,n,f,Q_real,tau_s,AS_V)
+
+  ! Set the Tau_epsilon (tau_eps) to an initial value at omega*tau = 1
+  ! tan_delta = 1/Q = (tau_eps - tau_s)/(2 * sqrt(tau e*tau_s))
+  !    if we assume tau_eps =~ tau_s
+  !    we get the equation below
+  do i = 1,n
+     tau_eps(i) = tau_s(i) + (tau_s(i) * 2.0d0/Q_real)
+  enddo
+
+  ! Run a simplex search to determine the optimum values of tau_eps
+  call fminsearch(attenuation_eval, tau_eps, n, iterations, min_value, prnt, err,AS_V)
+  if(err > 0) then
+     write(*,*)'Search did not converge for an attenuation of ', Q_real
+     write(*,*)'    Iterations: ', iterations
+     write(*,*)'    Min Value:  ', min_value
+     write(*,*)'    Aborting program'
+     call exit_MPI(0,'attenuation_simplex: Search for Strain relaxation times did not converge')
+  endif
+
+  !deallocate(f)
+
+  call attenuation_simplex_finish(AS_V)
+
+  end subroutine attenuation_invert_by_simplex
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine attenuation_simplex_setup(nf_in,nsls_in,f_in,Q_in,tau_s_in,AS_V)
+
+!   - Inserts necessary parameters into the module attenuation_simplex_variables
+!   - See module for explaination
+
+  implicit none
+
+  ! attenuation_simplex_variables
+  type attenuation_simplex_variables
+    sequence
+    double precision Q  ! Q     = Desired Value of Attenuation or Q
+    double precision iQ ! iQ    = 1/Q
+    double precision, dimension(:), pointer ::  f
+    ! f = Frequencies at which to evaluate the solution
+    double precision, dimension(:), pointer :: tau_s
+    ! tau_s = Tau_sigma defined by the frequency range and
+    !             number of standard linear solids
+    integer nf          ! nf    = Number of Frequencies
+    integer nsls        ! nsls  = Number of Standard Linear Solids
+  end type attenuation_simplex_variables
+  type(attenuation_simplex_variables) AS_V
+  ! attenuation_simplex_variables
+
+  integer nf_in, nsls_in
+  double precision Q_in
+  double precision, dimension(nf_in)   :: f_in
+  double precision, dimension(nsls_in) :: tau_s_in
+  integer ier
+
+  allocate(AS_V%f(nf_in), &
+          AS_V%tau_s(nsls_in),stat=ier)
+  if( ier /= 0 ) stop 'error allocating arrays for attenuation simplex'
+
+  AS_V%nf    = nf_in
+  AS_V%nsls  = nsls_in
+  AS_V%f     = f_in
+  AS_V%Q     = Q_in
+  AS_V%iQ    = 1.0d0/AS_V%Q
+  AS_V%tau_s = tau_s_in
+
+  end subroutine attenuation_simplex_setup
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  double precision function attenuation_eval(Xin,AS_V)
+
+!    - Computes the misfit from a set of relaxation paramters
+!          given a set of frequencies and target attenuation
+!    - Evaluates only at the given frequencies
+!    - Evaluation is done with an L2 norm
+!
+!    Input
+!      Xin = Tau_epsilon, Strain Relaxation Time
+!                Note: Tau_sigma the Stress Relaxation Time is loaded
+!                      with attenuation_simplex_setup and stored in
+!                      attenuation_simplex_variables
+!
+!    Xi = Sum_i^N sqrt [ (1/Qc_i - 1/Qt_i)^2 / 1/Qt_i^2 ]
+!
+!     where Qc_i is the computed attenuation at a specific frequency
+!           Qt_i is the desired attenuaiton at that frequency
+!
+!    Uses attenuation_simplex_variables to store constant values
+!
+!    See atteunation_simplex_setup
+!
+
+  implicit none
+
+  ! attenuation_simplex_variables
+  type attenuation_simplex_variables
+    sequence
+    double precision Q  ! Q     = Desired Value of Attenuation or Q
+    double precision iQ ! iQ    = 1/Q
+    double precision, dimension(:), pointer ::  f
+    ! f = Frequencies at which to evaluate the solution
+    double precision, dimension(:), pointer :: tau_s
+    ! tau_s = Tau_sigma defined by the frequency range and
+    !             number of standard linear solids
+    integer nf          ! nf    = Number of Frequencies
+    integer nsls        ! nsls  = Number of Standard Linear Solids
+  end type attenuation_simplex_variables
+  type(attenuation_simplex_variables) AS_V
+  ! attenuation_simplex_variables
+
+   ! Input
+  double precision, dimension(AS_V%nsls) :: Xin
+  double precision, dimension(AS_V%nsls) :: tau_eps
+
+  double precision, dimension(AS_V%nf)   :: A, B, tan_delta
+
+  integer i
+  double precision xi, iQ2
+
+  tau_eps = Xin
+
+  call attenuation_maxwell(AS_V%nf,AS_V%nsls,AS_V%f,AS_V%tau_s,tau_eps,B,A)
+
+  tan_delta = B / A
+
+  attenuation_eval = 0.0d0
+  iQ2 = AS_V%iQ**2
+  do i = 1,AS_V%nf
+     xi = sqrt(( ( (tan_delta(i) - AS_V%iQ) ** 2 ) / iQ2 ))
+     attenuation_eval = attenuation_eval + xi
+  enddo
+
+  end function attenuation_eval
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine attenuation_maxwell(nf,nsls,f,tau_s,tau_eps,B,A)
+
+!   - Computes the Moduli (Maxwell Solid) for a series of
+!         Standard Linear Solids
+!   - Computes M1 and M2 parameters after Dahlen and Tromp pp.203
+!         here called B and A after Liu et al. 1976
+!   - Another formulation uses Kelvin-Voigt Solids and computes
+!         Compliences J1 and J2 after Dahlen and Tromp pp.203
+!
+!   Input
+!     nf    = Number of Frequencies
+!     nsls  = Number of Standard Linear Solids
+!     f     = Frequencies (in log10 of frequencies)
+!                dimension(nf)
+!     tau_s = Tau_sigma  Stress relaxation time (see References)
+!                dimension(nsls)
+!     tau_eps = Tau_epislon Strain relaxation time (see References)
+!                dimension(nsls)!
+!   Output
+!     B     = Real Moduli      ( M2 Dahlen and Tromp pp.203 )
+!                dimension(nf)
+!     A     = Imaginary Moduli ( M1 Dahlen and Tromp pp.203 )
+!                dimension(nf)
+!
+!   Dahlen and Tromp, 1998
+!      Theoretical Global Seismology
+!
+!   Liu et al. 1976
+!      Velocity dispersion due to anelasticity: implications for seismology and mantle composition
+!      Geophys, J. R. asts. Soc, Vol 47, pp. 41-58
+
+  implicit none
+
+  ! Input
+  integer nf, nsls
+  double precision, dimension(nf)   :: f
+  double precision, dimension(nsls) :: tau_s, tau_eps
+  ! Output
+  double precision, dimension(nf)   :: A,B
+
+  integer i,j
+  double precision w, pi, demon
+
+  PI = 3.14159265358979d0
+
+  A(:) = 1.0d0 -  nsls*1.0d0
+  B(:) = 0.0d0
+  do i = 1,nf
+     w = 2.0d0 * PI * 10**f(i)
+     do j = 1,nsls
+        !        write(*,*)j,tau_s(j),tau_eps(j)
+        demon = 1.0d0 + w**2 * tau_s(j)**2
+        A(i) = A(i) + ((1.0d0 + (w**2 * tau_eps(j) * tau_s(j)))/ demon)
+        B(i) = B(i) + ((w * (tau_eps(j) - tau_s(j))) / demon)
+     end do
+      !     write(*,*)A(i),B(i),10**f(i)
+  enddo
+
+  end subroutine attenuation_maxwell
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine fminsearch(funk, x, n, itercount, tolf, prnt, err, AS_V)
+
+! subroutine fminsearch
+!   - Computes the minimization of funk(x(n)) using the simplex method
+!   - This subroutine is copied from Matlab fminsearch.m
+!         and modified to suit my nefarious needs
+!   Input
+!     funk = double precision function with one input parameter
+!                double precision function the_funk(x)
+!     x    = Input/Output
+!               variables to be minimized
+!               dimension(n)
+!            Input:  Initial Value
+!            Output: Mimimized Value
+!     n    = number of variables
+!     itercount = Input/Output
+!                 Input:  maximum number of iterations
+!                         if < 0 default is used (200 * n)
+!                 Output: total number of iterations on output
+!     tolf      = Input/Output
+!                 Input:  minimium tolerance of the function funk(x)
+!                 Output: minimium value of funk(x)(i.e. "a" solution)
+!     prnt      = Input
+!                 3 => report every iteration
+!                 4 => report every iteration, total simplex
+!     err       = Output
+!                 0 => Normal exeecution, converged within desired range
+!                 1 => Function Evaluation exceeded limit
+!                 2 => Iterations exceeded limit
+!
+!     See Matlab fminsearch
+
+  implicit none
+
+! attenuation_simplex_variables
+  type attenuation_simplex_variables
+    sequence
+    double precision Q  ! Q     = Desired Value of Attenuation or Q
+    double precision iQ ! iQ    = 1/Q
+    double precision, dimension(:), pointer ::  f
+    ! f = Frequencies at which to evaluate the solution
+    double precision, dimension(:), pointer :: tau_s
+    ! tau_s = Tau_sigma defined by the frequency range and
+    !             number of standard linear solids
+    integer nf          ! nf    = Number of Frequencies
+    integer nsls        ! nsls  = Number of Standard Linear Solids
+  end type attenuation_simplex_variables
+
+  type(attenuation_simplex_variables) AS_V
+! attenuation_simplex_variables
+
+  ! Input
+  double precision, external :: funk
+
+  integer n
+  double precision x(n) ! Also Output
+  integer itercount, prnt, err
+  double precision tolf
+
+  !Internal
+  integer i,j, how
+  integer, parameter :: none             = 0
+  integer, parameter :: initial          = 1
+  integer, parameter :: expand           = 2
+  integer, parameter :: reflect          = 3
+  integer, parameter :: contract_outside = 4
+  integer, parameter :: contract_inside  = 5
+  integer, parameter :: shrink           = 6
+
+  integer maxiter, maxfun
+  integer func_evals
+  double precision tolx
+
+  double precision rho, chi, psi, sigma
+  double precision xin(n), y(n), v(n,n+1), fv(n+1)
+  double precision vtmp(n,n+1)
+  double precision usual_delta, zero_term_delta
+  double precision xbar(n), xr(n), fxr, xe(n), fxe, xc(n), fxc, fxcc, xcc(n)
+  integer place(n+1)
+
+  double precision max_size_simplex, max_value
+
+  rho   = 1.0d0
+  chi   = 2.0d0
+  psi   = 0.5d0
+  sigma = 0.5d0
+
+
+  if(itercount > 0) then
+     maxiter = itercount
+  else
+     maxiter = 200 * n
+  endif
+  itercount = 0
+  maxfun  = 200 * n
+
+  if(tolf > 0.0d0) then
+     tolx = 1.0e-4
+  else
+     tolx = 1.0e-4
+     tolf = 1.0e-4
+  endif
+
+  err = 0
+
+  xin    = x
+  v(:,:) = 0.0d0
+  fv(:)  = 0.0d0
+
+  v(:,1) = xin
+  x      = xin
+
+  fv(1) = funk(xin,AS_V)
+
+  usual_delta = 0.05
+  zero_term_delta = 0.00025
+
+  do j = 1,n
+     y = xin
+     if(y(j) /= 0.0d0) then
+        y(j) = (1.0d0 + usual_delta) * y(j)
+     else
+        y(j) = zero_term_delta
+     endif
+     v(:,j+1) = y
+     x(:) = y
+     fv(j+1) = funk(x,AS_V)
+  enddo
+
+  call qsort_local(fv,n+1,place)
+
+  do i = 1,n+1
+     vtmp(:,i) = v(:,place(i))
+  enddo
+  v = vtmp
+
+  how = initial
+  itercount = 1
+  func_evals = n+1
+  if(prnt == 3) then
+     write(*,*)'Iterations   Funk Evals   Value How'
+     write(*,*)itercount, func_evals, fv(1), how
+  endif
+  if(prnt == 4) then
+     write(*,*)'How: ',how
+     write(*,*)'V: ', v
+     write(*,*)'fv: ',fv
+     write(*,*)'evals: ',func_evals
+  endif
+
+  do while (func_evals < maxfun .AND. itercount < maxiter)
+
+     if(max_size_simplex(v,n) <= tolx .AND. &
+          max_value(fv,n+1) <= tolf) then
+        goto 666
+     endif
+     how = none
+
+     ! xbar = average of the n (NOT n+1) best points
+     !     xbar = sum(v(:,1:n), 2)/n
+     xbar(:) = 0.0d0
+     do i = 1,n
+        do j = 1,n
+           xbar(i) = xbar(i) + v(i,j)
+        enddo
+        xbar(i) = xbar(i) / (n*1.0d0)
+     enddo
+     xr = (1 + rho)*xbar - rho*v(:,n+1)
+     x(:) = xr
+     fxr = funk(x,AS_V)
+     func_evals = func_evals + 1
+     if (fxr < fv(1)) then
+        ! Calculate the expansion point
+        xe = (1 + rho*chi)*xbar - rho*chi*v(:,n+1)
+        x = xe
+        fxe = funk(x,AS_V)
+        func_evals = func_evals+1
+        if (fxe < fxr) then
+           v(:,n+1) = xe
+           fv(n+1) = fxe
+           how = expand
+        else
+           v(:,n+1) = xr
+           fv(n+1) = fxr
+           how = reflect
+        endif
+     else ! fv(:,1) <= fxr
+        if (fxr < fv(n)) then
+           v(:,n+1) = xr
+           fv(n+1) = fxr
+           how = reflect
+        else ! fxr >= fv(:,n)
+           ! Perform contraction
+           if (fxr < fv(n+1)) then
+              ! Perform an outside contraction
+              xc = (1 + psi*rho)*xbar - psi*rho*v(:,n+1)
+              x(:) = xc
+              fxc = funk(x,AS_V)
+              func_evals = func_evals+1
+
+              if (fxc <= fxr) then
+                 v(:,n+1) = xc
+                 fv(n+1) = fxc
+                 how = contract_outside
+              else
+                 ! perform a shrink
+                 how = shrink
+              endif
+           else
+              ! Perform an inside contraction
+              xcc = (1-psi)*xbar + psi*v(:,n+1)
+              x(:) = xcc
+              fxcc = funk(x,AS_V)
+              func_evals = func_evals+1
+
+              if (fxcc < fv(n+1)) then
+                 v(:,n+1) = xcc
+                 fv(n+1) = fxcc
+                 how = contract_inside
+              else
+                 ! perform a shrink
+                 how = shrink
+              endif
+           endif
+           if (how == shrink) then
+              do j=2,n+1
+                 v(:,j)=v(:,1)+sigma*(v(:,j) - v(:,1))
+                 x(:) = v(:,j)
+                 fv(j) = funk(x,AS_V)
+              enddo
+              func_evals = func_evals + n
+           endif
+        endif
+     endif
+
+     call qsort_local(fv,n+1,place)
+     do i = 1,n+1
+        vtmp(:,i) = v(:,place(i))
+     enddo
+     v = vtmp
+
+     itercount = itercount + 1
+     if (prnt == 3) then
+        write(*,*)itercount, func_evals, fv(1), how
+     elseif (prnt == 4) then
+        write(*,*)
+        write(*,*)'How: ',how
+        write(*,*)'v: ',v
+        write(*,*)'fv: ',fv
+        write(*,*)'evals: ',func_evals
+     endif
+  enddo
+
+  if(func_evals > maxfun) then
+     write(*,*)'function evaluations exceeded prescribed limit', maxfun
+     err = 1
+  endif
+  if(itercount > maxiter) then
+     write(*,*)'iterations exceeded prescribed limit', maxiter
+     err = 2
+  endif
+
+666 continue
+  x = v(:,1)
+  tolf = fv(1)
+
+  end subroutine fminsearch
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  double precision function max_value(fv,n)
+
+!    - Finds the maximim value of the difference of between the first
+!          value and the remaining values of a vector
+!    Input
+!      fv = Input
+!             Vector
+!             dimension(n)
+!      n  = Input
+!             Length of fv
+!
+!      Returns:
+!         Xi = max( || fv(1)- fv(i) || ) for i=2:n
+!
+
+  implicit none
+  integer n
+  double precision fv(n)
+
+  integer i
+  double precision m, z
+
+  m = 0.0d0
+  do i = 2,n
+     z = abs(fv(1) - fv(i))
+     if(z > m) then
+        m = z
+     endif
+  enddo
+
+  max_value = m
+
+  end function max_value
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  double precision function max_size_simplex(v,n)
+
+!   - Determines the maximum distance between two point in a simplex
+!   Input
+!     v  = Input
+!            Simplex Verticies
+!            dimension(n, n+1)
+!     n  = Pseudo Length of n
+!
+!     Returns:
+!       Xi = max( max( || v(:,1) - v(:,i) || ) ) for i=2:n+1
+!
+
+  implicit none
+  integer n
+  double precision v(n,n+1)
+
+  integer i,j
+  double precision m, z
+
+  m = 0.0d0
+  do i = 1,n
+     do j = 2,n+1
+        z = abs(v(i,j) - v(i,1))
+        if(z > m) then
+           m = z
+        endif
+     enddo
+  enddo
+
+  max_size_simplex = m
+
+  end function max_size_simplex
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine qsort_local(X,n,I)
+
+!    - Implementation of a Bubble Sort Routine
+!    Input
+!      X = Input/Output
+!         Vector to be sorted
+!         dimension(n)
+!      n = Input
+!         Length of X
+!      I = Output
+!         Sorted Indicies of vecotr X
+!
+!      Example:
+!         X = [ 4 3 1 2 ] on Input
+!         I = [ 1 2 3 4 ] Computed Internally (in order)
+!
+!         X = [ 1 2 3 4 ] on Output
+!         I = [ 3 4 2 1 ] on Output
+!
+
+  implicit none
+
+  integer n
+  double precision X(n)
+  integer I(n)
+
+  integer j,k
+  double precision rtmp
+  integer itmp
+
+  do j = 1,n
+     I(j) = j
+  enddo
+
+  do j = 1,n
+     do k = 1,n-j
+        if(X(k+1) < X(k)) then
+           rtmp   = X(k)
+           X(k)   = X(k+1)
+           X(k+1) = rtmp
+
+           itmp   = I(k)
+           I(k)   = I(k+1)
+           I(k+1) = itmp
+        endif
+     enddo
+  enddo
+
+  end subroutine qsort_local
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine attenuation_simplex_finish(AS_V)
+
+  implicit none
+
+  ! attenuation_simplex_variables
+  type attenuation_simplex_variables
+    sequence
+    double precision Q  ! Q     = Desired Value of Attenuation or Q
+    double precision iQ ! iQ    = 1/Q
+    double precision, dimension(:), pointer ::  f
+    ! f = Frequencies at which to evaluate the solution
+    double precision, dimension(:), pointer :: tau_s
+    ! tau_s = Tau_sigma defined by the frequency range and
+    !             number of standard linear solids
+    integer nf          ! nf    = Number of Frequencies
+    integer nsls        ! nsls  = Number of Standard Linear Solids
+  end type attenuation_simplex_variables
+  type(attenuation_simplex_variables) AS_V
+  ! attenuation_simplex_variables
+
+  deallocate(AS_V%f)
+  deallocate(AS_V%tau_s)
+
+  end subroutine attenuation_simplex_finish

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_cmt.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_cmt.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_cmt.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,209 +1,209 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine get_cmt(yr,jda,ho,mi,sec,tshift_cmt,hdur,lat,long,depth,moment_tensor,&
-                    DT,NSOURCES,min_tshift_cmt_original)
-
-  implicit none
-
-  include "constants.h"
-
-!--- input or output arguments of the subroutine below
-
-  integer, intent(in) :: NSOURCES
-  double precision, intent(in) :: DT
-
-  integer, intent(out) :: yr,jda,ho,mi
-  double precision, intent(out) :: sec,min_tshift_cmt_original
-  double precision, dimension(NSOURCES), intent(out) :: tshift_cmt,hdur,lat,long,depth
-  double precision, dimension(6,NSOURCES), intent(out) :: moment_tensor
-
-!--- local variables below
-
-  integer mo,da,julian_day,isource
-  double precision t_shift(NSOURCES)
-  character(len=5) datasource
-  character(len=256) string, CMTSOLUTION
-
-  ! initializes
-  lat(:) = 0.d0
-  long(:) = 0.d0
-  depth(:) = 0.d0
-  t_shift(:) = 0.d0
-  tshift_cmt(:) = 0.d0
-  hdur(:) = 0.d0
-  moment_tensor(:,:) = 0.d0
-  yr = 0
-  jda = 0
-  ho = 0
-  mi = 0
-  sec = 0.d0
-
-!
-!---- read hypocenter info
-!
-  call get_value_string(CMTSOLUTION, 'solver.CMTSOLUTION', &
-       IN_DATA_FILES_PATH(1:len_trim(IN_DATA_FILES_PATH))//'CMTSOLUTION')
-
-  open(unit=1,file=CMTSOLUTION,status='old',action='read')
-
-! read source number isource
-  do isource=1,NSOURCES
-
-    read(1,"(a256)") string
-    ! skips empty lines
-    do while( len_trim(string) == 0 )
-      read(1,"(a256)") string
-    enddo
-
-    ! read header with event information
-    read(string,"(a4,i5,i3,i3,i3,i3,f6.2)") datasource,yr,mo,da,ho,mi,sec
-    jda=julian_day(yr,mo,da)
-
-    ! ignore line with event name
-    read(1,"(a)") string
-
-    ! read time shift
-    read(1,"(a)") string
-    !read(string(12:len_trim(string)),*) tshift_cmt(isource)
-    read(string(12:len_trim(string)),*) t_shift(isource)
-
-    ! read half duration
-    read(1,"(a)") string
-    read(string(15:len_trim(string)),*) hdur(isource)
-
-    ! read latitude
-    read(1,"(a)") string
-    read(string(10:len_trim(string)),*) lat(isource)
-
-    ! read longitude
-    read(1,"(a)") string
-    read(string(11:len_trim(string)),*) long(isource)
-
-    ! read depth
-    read(1,"(a)") string
-    read(string(7:len_trim(string)),*) depth(isource)
-
-    ! read Mrr
-    read(1,"(a)") string
-    read(string(5:len_trim(string)),*) moment_tensor(1,isource)
-
-    ! read Mtt
-    read(1,"(a)") string
-    read(string(5:len_trim(string)),*) moment_tensor(2,isource)
-
-    ! read Mpp
-    read(1,"(a)") string
-    read(string(5:len_trim(string)),*) moment_tensor(3,isource)
-
-    ! read Mrt
-    read(1,"(a)") string
-    read(string(5:len_trim(string)),*) moment_tensor(4,isource)
-
-    ! read Mrp
-    read(1,"(a)") string
-    read(string(5:len_trim(string)),*) moment_tensor(5,isource)
-
-    ! read Mtp
-    read(1,"(a)") string
-    read(string(5:len_trim(string)),*) moment_tensor(6,isource)
-
-    ! checks half-duration
-    if( USE_FORCE_POINT_SOURCE ) then
-      ! half-duration is the dominant frequency of the source
-      ! point forces use a Ricker source time function
-      ! null half-duration indicates a very low-frequency source
-      ! (see constants.h: TINYVAL = 1.d-9 )
-      if( hdur(isource) < TINYVAL ) hdur(isource) = TINYVAL
-    else
-      ! null half-duration indicates a Heaviside
-      ! replace with very short error function
-      if( hdur(isource) < 5. * DT ) hdur(isource) = 5. * DT
-    endif
-
-  enddo
-
-  close(1)
-
-  ! Sets tshift_cmt to zero to initiate the simulation!
-  if(NSOURCES == 1)then
-      tshift_cmt = 0.d0
-      min_tshift_cmt_original = t_shift(1)
-  else
-      tshift_cmt(1:NSOURCES) = t_shift(1:NSOURCES)-minval(t_shift)
-      min_tshift_cmt_original = minval(t_shift)
-  endif
-
-
-  !
-  ! scale the moment tensor
-  ! CMTSOLUTION file values are in dyne.cm
-  ! 1 dyne is 1 gram * 1 cm / (1 second)^2
-  ! 1 Newton is 1 kg * 1 m / (1 second)^2
-  ! thus 1 Newton = 100,000 dynes
-  ! therefore 1 dyne.cm = 1e-7 Newton.m
-  !
-  moment_tensor(:,:) = moment_tensor(:,:) * 1.d-7
-
-  end subroutine get_cmt
-
-! ------------------------------------------------------------------
-
-  integer function julian_day(yr,mo,da)
-
-  implicit none
-
-  integer yr,mo,da
-
-  integer mon(12)
-  integer lpyr
-  data mon /0,31,59,90,120,151,181,212,243,273,304,334/
-
-  julian_day = da + mon(mo)
-  if(mo>2) julian_day = julian_day + lpyr(yr)
-
-  end function julian_day
-
-! ------------------------------------------------------------------
-
-  integer function lpyr(yr)
-
-  implicit none
-
-  integer yr
-!
-!---- returns 1 if leap year
-!
-  lpyr=0
-  if(mod(yr,400) == 0) then
-    lpyr=1
-  else if(mod(yr,4) == 0) then
-    lpyr=1
-    if(mod(yr,100) == 0) lpyr=0
-  endif
-
-  end function lpyr
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine get_cmt(yr,jda,ho,mi,sec,tshift_cmt,hdur,lat,long,depth,moment_tensor,&
+                    DT,NSOURCES,min_tshift_cmt_original)
+
+  implicit none
+
+  include "constants.h"
+
+!--- input or output arguments of the subroutine below
+
+  integer, intent(in) :: NSOURCES
+  double precision, intent(in) :: DT
+
+  integer, intent(out) :: yr,jda,ho,mi
+  double precision, intent(out) :: sec,min_tshift_cmt_original
+  double precision, dimension(NSOURCES), intent(out) :: tshift_cmt,hdur,lat,long,depth
+  double precision, dimension(6,NSOURCES), intent(out) :: moment_tensor
+
+!--- local variables below
+
+  integer mo,da,julian_day,isource
+  double precision t_shift(NSOURCES)
+  character(len=5) datasource
+  character(len=256) string, CMTSOLUTION
+
+  ! initializes
+  lat(:) = 0.d0
+  long(:) = 0.d0
+  depth(:) = 0.d0
+  t_shift(:) = 0.d0
+  tshift_cmt(:) = 0.d0
+  hdur(:) = 0.d0
+  moment_tensor(:,:) = 0.d0
+  yr = 0
+  jda = 0
+  ho = 0
+  mi = 0
+  sec = 0.d0
+
+!
+!---- read hypocenter info
+!
+  call get_value_string(CMTSOLUTION, 'solver.CMTSOLUTION', &
+       IN_DATA_FILES_PATH(1:len_trim(IN_DATA_FILES_PATH))//'CMTSOLUTION')
+
+  open(unit=1,file=CMTSOLUTION,status='old',action='read')
+
+! read source number isource
+  do isource=1,NSOURCES
+
+    read(1,"(a256)") string
+    ! skips empty lines
+    do while( len_trim(string) == 0 )
+      read(1,"(a256)") string
+    enddo
+
+    ! read header with event information
+    read(string,"(a4,i5,i3,i3,i3,i3,f6.2)") datasource,yr,mo,da,ho,mi,sec
+    jda=julian_day(yr,mo,da)
+
+    ! ignore line with event name
+    read(1,"(a)") string
+
+    ! read time shift
+    read(1,"(a)") string
+    !read(string(12:len_trim(string)),*) tshift_cmt(isource)
+    read(string(12:len_trim(string)),*) t_shift(isource)
+
+    ! read half duration
+    read(1,"(a)") string
+    read(string(15:len_trim(string)),*) hdur(isource)
+
+    ! read latitude
+    read(1,"(a)") string
+    read(string(10:len_trim(string)),*) lat(isource)
+
+    ! read longitude
+    read(1,"(a)") string
+    read(string(11:len_trim(string)),*) long(isource)
+
+    ! read depth
+    read(1,"(a)") string
+    read(string(7:len_trim(string)),*) depth(isource)
+
+    ! read Mrr
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) moment_tensor(1,isource)
+
+    ! read Mtt
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) moment_tensor(2,isource)
+
+    ! read Mpp
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) moment_tensor(3,isource)
+
+    ! read Mrt
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) moment_tensor(4,isource)
+
+    ! read Mrp
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) moment_tensor(5,isource)
+
+    ! read Mtp
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) moment_tensor(6,isource)
+
+    ! checks half-duration
+    if( USE_FORCE_POINT_SOURCE ) then
+      ! half-duration is the dominant frequency of the source
+      ! point forces use a Ricker source time function
+      ! null half-duration indicates a very low-frequency source
+      ! (see constants.h: TINYVAL = 1.d-9 )
+      if( hdur(isource) < TINYVAL ) hdur(isource) = TINYVAL
+    else
+      ! null half-duration indicates a Heaviside
+      ! replace with very short error function
+      if( hdur(isource) < 5. * DT ) hdur(isource) = 5. * DT
+    endif
+
+  enddo
+
+  close(1)
+
+  ! Sets tshift_cmt to zero to initiate the simulation!
+  if(NSOURCES == 1)then
+      tshift_cmt = 0.d0
+      min_tshift_cmt_original = t_shift(1)
+  else
+      tshift_cmt(1:NSOURCES) = t_shift(1:NSOURCES)-minval(t_shift)
+      min_tshift_cmt_original = minval(t_shift)
+  endif
+
+
+  !
+  ! scale the moment tensor
+  ! CMTSOLUTION file values are in dyne.cm
+  ! 1 dyne is 1 gram * 1 cm / (1 second)^2
+  ! 1 Newton is 1 kg * 1 m / (1 second)^2
+  ! thus 1 Newton = 100,000 dynes
+  ! therefore 1 dyne.cm = 1e-7 Newton.m
+  !
+  moment_tensor(:,:) = moment_tensor(:,:) * 1.d-7
+
+  end subroutine get_cmt
+
+! ------------------------------------------------------------------
+
+  integer function julian_day(yr,mo,da)
+
+  implicit none
+
+  integer yr,mo,da
+
+  integer mon(12)
+  integer lpyr
+  data mon /0,31,59,90,120,151,181,212,243,273,304,334/
+
+  julian_day = da + mon(mo)
+  if(mo>2) julian_day = julian_day + lpyr(yr)
+
+  end function julian_day
+
+! ------------------------------------------------------------------
+
+  integer function lpyr(yr)
+
+  implicit none
+
+  integer yr
+!
+!---- returns 1 if leap year
+!
+  lpyr=0
+  if(mod(yr,400) == 0) then
+    lpyr=1
+  else if(mod(yr,4) == 0) then
+    lpyr=1
+    if(mod(yr,100) == 0) lpyr=0
+  endif
+
+  end function lpyr
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_jacobian_boundaries.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_jacobian_boundaries.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_jacobian_boundaries.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,933 +1,933 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-
-  subroutine get_jacobian_boundary_face(myrank,nspec, &
-                        xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
-                        dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
-                        ispec,iface,jacobian2Dw_face,normal_face,NGLLA,NGLLB)
-
-! returns jacobian2Dw_face and normal_face (pointing outwards of element)
-
-  implicit none
-
-  include "constants.h"
-
-  integer nspec,myrank,nglob
-
-! arrays with the mesh
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-  real(kind=CUSTOM_REAL) :: xstore_dummy(nglob),ystore_dummy(nglob),zstore_dummy(nglob)
-
-! face information
-  integer :: iface,ispec,NGLLA,NGLLB
-  real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
-  real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
-
-  double precision dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ)
-  double precision dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ)
-  double precision dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY)
-  double precision dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY)
-
-  double precision, dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  double precision, dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  double precision, dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! local parameters
-! face corners
-  double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
-
-! check that the parameter file is correct
-  if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
-  if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
-
-  select case ( iface )
-  ! on reference face: xmin
-  case(1)
-    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
-    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
-    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
-    xelm(2)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
-    yelm(2)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
-    zelm(2)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
-    xelm(3)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-    yelm(3)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-    zelm(3)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-    xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
-    yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
-    zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
-
-    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
-                  dershape2D_x,wgllwgll_yz, &
-                  jacobian2Dw_face,normal_face,NGLLY,NGLLZ)
-
-! on boundary: xmax
-  case(2)
-    xelm(1)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
-    yelm(1)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
-    zelm(1)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
-    xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-    yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-    zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-    xelm(4)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-    yelm(4)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-    zelm(4)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-
-    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
-                  dershape2D_x,wgllwgll_yz, &
-                  jacobian2Dw_face,normal_face,NGLLY,NGLLZ)
-
-! on boundary: ymin
-  case(3)
-    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
-    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
-    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
-    xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
-    yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
-    zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
-    xelm(3)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-    yelm(3)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-    zelm(3)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-    xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
-    yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
-    zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
-
-    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
-                  dershape2D_y,wgllwgll_xz, &
-                  jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
-
-! on boundary: ymax
-  case(4)
-    xelm(1)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
-    yelm(1)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
-    zelm(1)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
-    xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-    yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-    zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-    xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-    yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-    zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-
-    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
-                  dershape2D_y, wgllwgll_xz, &
-                  jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
-
-
-! on boundary: bottom
-  case(5)
-    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
-    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
-    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
-    xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
-    yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
-    zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
-    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-    xelm(4)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
-    yelm(4)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
-    zelm(4)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
-
-    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm,&
-                  dershape2D_bottom,wgllwgll_xy, &
-                  jacobian2Dw_face,normal_face,NGLLX,NGLLY)
-
-! on boundary: top
-  case(6)
-    xelm(1)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
-    yelm(1)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
-    zelm(1)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
-    xelm(2)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-    yelm(2)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-    zelm(2)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-    xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-    yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-    zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-
-    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm,&
-                  dershape2D_top, wgllwgll_xy, &
-                  jacobian2Dw_face,normal_face,NGLLX,NGLLY)
-
-  case default
-    stop 'error 2D jacobian'
-  end select
-
-  end subroutine get_jacobian_boundary_face
-
-
-! -------------------------------------------------------
-
-  subroutine compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
-                                dershape2D,wgllwgll, &
-                                jacobian2Dw_face,normal_face,NGLLA,NGLLB)
-
-  implicit none
-
-  include "constants.h"
-
-! generic routine that accepts any polynomial degree in each direction
-! returns 2D jacobian and normal for this face only
-
-  integer NGLLA,NGLLB,myrank
-
-  double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
-  double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
-  double precision wgllwgll(NGLLA,NGLLB)
-
-  real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
-  real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
-
-  integer i,j,ia
-  double precision xxi,xeta,yxi,yeta,zxi,zeta
-  double precision unx,uny,unz,jacobian
-
-  do j=1,NGLLB
-    do i=1,NGLLA
-
-    xxi=ZERO
-    xeta=ZERO
-    yxi=ZERO
-    yeta=ZERO
-    zxi=ZERO
-    zeta=ZERO
-    do ia=1,NGNOD2D
-      xxi=xxi+dershape2D(1,ia,i,j)*xelm(ia)
-      xeta=xeta+dershape2D(2,ia,i,j)*xelm(ia)
-      yxi=yxi+dershape2D(1,ia,i,j)*yelm(ia)
-      yeta=yeta+dershape2D(2,ia,i,j)*yelm(ia)
-      zxi=zxi+dershape2D(1,ia,i,j)*zelm(ia)
-      zeta=zeta+dershape2D(2,ia,i,j)*zelm(ia)
-    enddo
-
-!   calculate the unnormalized normal to the boundary
-    unx=yxi*zeta-yeta*zxi
-    uny=zxi*xeta-zeta*xxi
-    unz=xxi*yeta-xeta*yxi
-    jacobian=dsqrt(unx**2+uny**2+unz**2)
-    if(jacobian == ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
-
-!   normalize normal vector and store weighted surface jacobian
-
-! distinguish if single or double precision for reals
-    if(CUSTOM_REAL == SIZE_REAL) then
-      jacobian2Dw_face(i,j) = sngl(jacobian * wgllwgll(i,j) )
-      normal_face(1,i,j)=sngl(unx/jacobian)
-      normal_face(2,i,j)=sngl(uny/jacobian)
-      normal_face(3,i,j)=sngl(unz/jacobian)
-    else
-      jacobian2Dw_face(i,j) = jacobian * wgllwgll(i,j)
-      normal_face(1,i,j)=unx/jacobian
-      normal_face(2,i,j)=uny/jacobian
-      normal_face(3,i,j)=unz/jacobian
-    endif
-
-    enddo
-  enddo
-
-  end subroutine compute_jacobian_2D_face
-
-
-! This subroutine recompute the 3D jacobian for one element
-! based upon 125 GLL points
-! Hejun Zhu OCT16,2009
-
-! input: myrank,
-!        xstore,ystore,zstore ----- input position
-!        xigll,yigll,zigll ----- gll points position
-!        ispec,nspec       ----- element number
-!        ACTUALLY_STORE_ARRAYS   ------ save array or not
-
-! output: xixstore,xiystore,xizstore,
-!         etaxstore,etaystore,etazstore,
-!         gammaxstore,gammaystore,gammazstore ------ parameters used for calculating jacobian
-
-
-  subroutine recalc_jacobian_gll2D(myrank,xstore,ystore,zstore, &
-                                  xigll,yigll,wgllwgll,NGLLA,NGLLB, &
-                                  ispec,nspec,jacobian2Dw_face,normal_face)
-
-  implicit none
-
-  include "constants.h"
-
-  ! input parameter
-  integer::myrank,ispec,nspec
-  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-
-  integer :: NGLLA,NGLLB
-  double precision, dimension(NGLLA):: xigll
-  double precision, dimension(NGLLB):: yigll
-  double precision:: wgllwgll(NGLLA,NGLLB)
-
-  real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
-  real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
-
-  ! other parameters for this subroutine
-  integer:: i,j,k,i1,j1,k1
-  double precision:: xxi,xeta,yxi,yeta,zxi,zeta
-  double precision:: xi,eta
-  double precision,dimension(NGLLA):: hxir,hpxir
-  double precision,dimension(NGLLB):: hetar,hpetar
-  double precision:: hlagrange,hlagrange_xi,hlagrange_eta
-  double precision:: jacobian
-  double precision:: unx,uny,unz
-
-
-
-  ! test parameters which can be deleted
-  double precision:: xmesh,ymesh,zmesh
-  double precision:: sumshape,sumdershapexi,sumdershapeeta
-
-  ! first go over all gll points on face
-  k=1
-  do j=1,NGLLB
-    do i=1,NGLLA
-
-      xxi = 0.0
-      xeta = 0.0
-      yxi = 0.0
-      yeta = 0.0
-      zxi = 0.0
-      zeta = 0.0
-
-      xi = xigll(i)
-      eta = yigll(j)
-
-      ! calculate lagrange polynomial and its derivative
-      call lagrange_any(xi,NGLLA,xigll,hxir,hpxir)
-      call lagrange_any(eta,NGLLB,yigll,hetar,hpetar)
-
-      ! test parameters
-      sumshape = 0.0
-      sumdershapexi = 0.0
-      sumdershapeeta = 0.0
-      xmesh = 0.0
-      ymesh = 0.0
-      zmesh = 0.0
-
-      k1=1
-      do j1 = 1,NGLLB
-        do i1 = 1,NGLLA
-         hlagrange = hxir(i1)*hetar(j1)
-         hlagrange_xi = hpxir(i1)*hetar(j1)
-         hlagrange_eta = hxir(i1)*hpetar(j1)
-
-
-         xxi = xxi + xstore(i1,j1,k1,ispec)*hlagrange_xi
-         xeta = xeta + xstore(i1,j1,k1,ispec)*hlagrange_eta
-
-         yxi = yxi + ystore(i1,j1,k1,ispec)*hlagrange_xi
-         yeta = yeta + ystore(i1,j1,k1,ispec)*hlagrange_eta
-
-         zxi = zxi + zstore(i1,j1,k1,ispec)*hlagrange_xi
-         zeta = zeta + zstore(i1,j1,k1,ispec)*hlagrange_eta
-
-         ! test the lagrange polynomial and its derivate
-         xmesh = xmesh + xstore(i1,j1,k1,ispec)*hlagrange
-         ymesh = ymesh + ystore(i1,j1,k1,ispec)*hlagrange
-         zmesh = zmesh + zstore(i1,j1,k1,ispec)*hlagrange
-         sumshape = sumshape + hlagrange
-         sumdershapexi = sumdershapexi + hlagrange_xi
-         sumdershapeeta = sumdershapeeta + hlagrange_eta
-
-         end do
-      end do
-
-      ! Check the lagrange polynomial and its derivative
-      if (xmesh /=xstore(i,j,k,ispec).or.ymesh/=ystore(i,j,k,ispec).or.zmesh/=zstore(i,j,k,ispec)) then
-        call exit_MPI(myrank,'new mesh positions are wrong in recalc_jacobian_gall3D.f90')
-      end if
-      if(abs(sumshape-one) >  TINYVAL) then
-        call exit_MPI(myrank,'error shape functions in recalc_jacobian_gll3D.f90')
-      end if
-      if(abs(sumdershapexi) >  TINYVAL) then
-        call exit_MPI(myrank,'error derivative xi shape functions in recalc_jacobian_gll3D.f90')
-      end if
-      if(abs(sumdershapeeta) >  TINYVAL) then
-        call exit_MPI(myrank,'error derivative eta shape functions in recalc_jacobian_gll3D.f90')
-      end if
-
-!   calculate the unnormalized normal to the boundary
-      unx=yxi*zeta-yeta*zxi
-      uny=zxi*xeta-zeta*xxi
-      unz=xxi*yeta-xeta*yxi
-      jacobian=dsqrt(unx**2+uny**2+unz**2)
-      if(jacobian <= ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
-
-!   normalize normal vector and store weighted surface jacobian
-
-! distinguish if single or double precision for reals
-      if(CUSTOM_REAL == SIZE_REAL) then
-        jacobian2Dw_face(i,j) = sngl(jacobian * wgllwgll(i,j) )
-        normal_face(1,i,j)=sngl(unx/jacobian)
-        normal_face(2,i,j)=sngl(uny/jacobian)
-        normal_face(3,i,j)=sngl(unz/jacobian)
-      else
-        jacobian2Dw_face(i,j) = jacobian * wgllwgll(i,j)
-        normal_face(1,i,j)=unx/jacobian
-        normal_face(2,i,j)=uny/jacobian
-        normal_face(3,i,j)=unz/jacobian
-      endif
-
-    enddo
-  enddo
-
-  end subroutine recalc_jacobian_gll2D
-
-!
-!------------------------------------------------------------------------------------------------
-!
-!
-!  subroutine get_jacobian_boundaries(myrank,iboun,nspec, &
-!              xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
-!              dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
-!              wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
-!              ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
-!              xcoord_iboun,ycoord_iboun,zcoord_iboun, &
-!              nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
-!              jacobian2D_xmin,jacobian2D_xmax, &
-!              jacobian2D_ymin,jacobian2D_ymax, &
-!              jacobian2D_bottom,jacobian2D_top, &
-!              normal_xmin,normal_xmax, &
-!              normal_ymin,normal_ymax, &
-!              normal_bottom,normal_top, &
-!              NSPEC2D_BOTTOM,NSPEC2D_TOP)
-!
-!  implicit none
-!
-!  include "constants.h"
-!
-!  integer nspec,myrank,nglob
-!
-!! arrays with the mesh
-!  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-!  real(kind=CUSTOM_REAL) :: xstore_dummy(nglob),ystore_dummy(nglob),zstore_dummy(nglob)
-!
-!
-!! absorbing boundaries
-!! (careful with array bounds, no need for NSPEC2DMAX_XMIN_XMAX & NSPEC2DMAX_YMIN_YMAX  anymore)
-!  integer  :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, NSPEC2D_BOTTOM, NSPEC2D_TOP
-!  integer, dimension(nspec2D_xmin)  :: ibelm_xmin
-!  integer, dimension(nspec2D_xmax)  :: ibelm_xmax
-!  integer, dimension(nspec2D_ymin)  :: ibelm_ymin
-!  integer, dimension(nspec2D_ymax)  :: ibelm_ymax
-!  integer, dimension(NSPEC2D_BOTTOM)  :: ibelm_bottom
-!  integer, dimension(NSPEC2D_TOP)  :: ibelm_top
-!
-!  logical iboun(6,nspec)
-!  real(kind=CUSTOM_REAL), dimension(NGNOD2D,6,nspec) :: xcoord_iboun,ycoord_iboun,zcoord_iboun
-!
-!!  double precision xstore(NGLLX,NGLLY,NGLLZ,nspec)
-!!  double precision ystore(NGLLX,NGLLY,NGLLZ,nspec)
-!!  double precision zstore(NGLLX,NGLLY,NGLLZ,nspec)
-!
-!  real(kind=CUSTOM_REAL) jacobian2D_xmin(NGLLY,NGLLZ,NSPEC2D_xmin)
-!  real(kind=CUSTOM_REAL) jacobian2D_xmax(NGLLY,NGLLZ,NSPEC2D_xmax)
-!  real(kind=CUSTOM_REAL) jacobian2D_ymin(NGLLX,NGLLZ,NSPEC2D_ymin)
-!  real(kind=CUSTOM_REAL) jacobian2D_ymax(NGLLX,NGLLZ,NSPEC2D_ymax)
-!  real(kind=CUSTOM_REAL) jacobian2D_bottom(NGLLX,NGLLY,NSPEC2D_BOTTOM)
-!  real(kind=CUSTOM_REAL) jacobian2D_top(NGLLX,NGLLY,NSPEC2D_TOP)
-!
-!  real(kind=CUSTOM_REAL) normal_xmin(NDIM,NGLLY,NGLLZ,NSPEC2D_xmin)
-!  real(kind=CUSTOM_REAL) normal_xmax(NDIM,NGLLY,NGLLZ,NSPEC2D_xmax)
-!  real(kind=CUSTOM_REAL) normal_ymin(NDIM,NGLLX,NGLLZ,NSPEC2D_ymin)
-!  real(kind=CUSTOM_REAL) normal_ymax(NDIM,NGLLX,NGLLZ,NSPEC2D_ymax)
-!  real(kind=CUSTOM_REAL) normal_bottom(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM)
-!  real(kind=CUSTOM_REAL) normal_top(NDIM,NGLLX,NGLLY,NSPEC2D_TOP)
-!
-!  double precision dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ)
-!  double precision dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ)
-!  double precision dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY)
-!  double precision dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY)
-!
-!  double precision, dimension(NGLLX,NGLLY) :: wgllwgll_xy
-!  double precision, dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-!  double precision, dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-!
-!  double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
-!
-!! element numbering
-!  integer ispec,i,j
-!
-!! counters to keep track of number of elements on each of the boundaries
-!  integer ispecb1,ispecb2,ispecb3,ispecb4,ispecb5,ispecb6
-!
-!
-!! check that the parameter file is correct
-!  if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
-!  if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
-!
-!  ispecb1 = 0
-!  ispecb2 = 0
-!  ispecb3 = 0
-!  ispecb4 = 0
-!  ispecb5 = 0
-!  ispecb6 = 0
-!
-!  do ispec=1,nspec
-!
-!! determine if the element falls on a boundary
-!
-!! on boundary: xmin
-!
-!  if(iboun(1,ispec)) then
-!
-!    ispecb1=ispecb1+1
-!    ibelm_xmin(ispecb1)=ispec
-!
-!!   specify the 4 nodes for the 2-D boundary element
-!!   i.e. face (0,0,0),(0,1,0),(0,1,1),(0,0,1)
-!
-!! careful: these points may not be on the xmin face for unstructured grids
-!!    xelm(1)=xstore(1,1,1,ispec)
-!!    yelm(1)=ystore(1,1,1,ispec)
-!!    zelm(1)=zstore(1,1,1,ispec)
-!!    xelm(2)=xstore(1,NGLLY,1,ispec)
-!!    yelm(2)=ystore(1,NGLLY,1,ispec)
-!!    zelm(2)=zstore(1,NGLLY,1,ispec)
-!!    xelm(3)=xstore(1,NGLLY,NGLLZ,ispec)
-!!    yelm(3)=ystore(1,NGLLY,NGLLZ,ispec)
-!!    zelm(3)=zstore(1,NGLLY,NGLLZ,ispec)
-!!    xelm(4)=xstore(1,1,NGLLZ,ispec)
-!!    yelm(4)=ystore(1,1,NGLLZ,ispec)
-!!    zelm(4)=zstore(1,1,NGLLZ,ispec)
-!
-!    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
-!    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
-!    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
-!    xelm(2)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
-!    yelm(2)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
-!    zelm(2)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
-!    xelm(3)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-!    yelm(3)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-!    zelm(3)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-!    xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
-!    yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
-!    zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
-!
-!! takes coordinates from boundary faces
-!!    do i=1,NGNOD2D
-!!      xelm(i) = xcoord_iboun(i,1,ispec)
-!!      yelm(i) = ycoord_iboun(i,1,ispec)
-!!      zelm(i) = zcoord_iboun(i,1,ispec)
-!!    enddo
-!
-!    call compute_jacobian_2D(myrank,ispecb1,xelm,yelm,zelm, &
-!                  dershape2D_x,wgllwgll_yz, &
-!                  jacobian2D_xmin,normal_xmin,NGLLY,NGLLZ,NSPEC2D_xmin)
-!
-!    ! normal convention: points away from element
-!    ! switches normal direction if necessary
-!    do i=1,NGLLY
-!      do j=1,NGLLZ
-!        call get_element_face_normal(ispecb1, 1, xelm,yelm,zelm, &
-!                                ibool,nspec,nglob, &
-!                                xstore_dummy,ystore_dummy,zstore_dummy, &
-!                                normal_xmin(:,i,j,ispecb1) )
-!      enddo
-!    enddo
-!
-!  endif
-!
-!! on boundary: xmax
-!
-!  if(iboun(2,ispec)) then
-!
-!    ispecb2=ispecb2+1
-!    ibelm_xmax(ispecb2)=ispec
-!
-!! careful...
-!!   specify the 4 nodes for the 2-D boundary element
-!!    xelm(1)=xstore(NGLLX,1,1,ispec)
-!!    yelm(1)=ystore(NGLLX,1,1,ispec)
-!!    zelm(1)=zstore(NGLLX,1,1,ispec)
-!!    xelm(2)=xstore(NGLLX,NGLLY,1,ispec)
-!!    yelm(2)=ystore(NGLLX,NGLLY,1,ispec)
-!!    zelm(2)=zstore(NGLLX,NGLLY,1,ispec)
-!!    xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
-!!    yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
-!!    zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
-!!    xelm(4)=xstore(NGLLX,1,NGLLZ,ispec)
-!!    yelm(4)=ystore(NGLLX,1,NGLLZ,ispec)
-!!    zelm(4)=zstore(NGLLX,1,NGLLZ,ispec)
-!
-!    xelm(1)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
-!    yelm(1)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
-!    zelm(1)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
-!    xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-!    yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-!    zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-!    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-!    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-!    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-!    xelm(4)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-!    yelm(4)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-!    zelm(4)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-!
-!! takes coordinates from boundary faces
-!!    do i=1,NGNOD2D
-!!      xelm(i) = xcoord_iboun(i,2,ispec)
-!!      yelm(i) = ycoord_iboun(i,2,ispec)
-!!      zelm(i) = zcoord_iboun(i,2,ispec)
-!!    enddo
-!
-!    call compute_jacobian_2D(myrank,ispecb2,xelm,yelm,zelm, &
-!                  dershape2D_x,wgllwgll_yz, &
-!                  jacobian2D_xmax,normal_xmax,NGLLY,NGLLZ,NSPEC2D_xmax)
-!
-!    ! normal convention: points away from element
-!    ! switch normal direction if necessary
-!    do i=1,NGLLY
-!      do j=1,NGLLZ
-!        call get_element_face_normal(ispecb2, 2, xelm,yelm,zelm, &
-!                                ibool,nspec,nglob, &
-!                                xstore_dummy,ystore_dummy,zstore_dummy, &
-!                                normal_xmax(:,i,j,ispecb2) )
-!      enddo
-!    enddo
-!
-!  endif
-!
-!! on boundary: ymin
-!
-!  if(iboun(3,ispec)) then
-!
-!    ispecb3=ispecb3+1
-!    ibelm_ymin(ispecb3)=ispec
-!
-!! careful...
-!!   specify the 4 nodes for the 2-D boundary element
-!!    xelm(1)=xstore(1,1,1,ispec)
-!!    yelm(1)=ystore(1,1,1,ispec)
-!!    zelm(1)=zstore(1,1,1,ispec)
-!!    xelm(2)=xstore(NGLLX,1,1,ispec)
-!!    yelm(2)=ystore(NGLLX,1,1,ispec)
-!!    zelm(2)=zstore(NGLLX,1,1,ispec)
-!!    xelm(3)=xstore(NGLLX,1,NGLLZ,ispec)
-!!    yelm(3)=ystore(NGLLX,1,NGLLZ,ispec)
-!!    zelm(3)=zstore(NGLLX,1,NGLLZ,ispec)
-!!    xelm(4)=xstore(1,1,NGLLZ,ispec)
-!!    yelm(4)=ystore(1,1,NGLLZ,ispec)
-!!    zelm(4)=zstore(1,1,NGLLZ,ispec)
-!
-!    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
-!    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
-!    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
-!    xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
-!    yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
-!    zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
-!    xelm(3)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-!    yelm(3)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-!    zelm(3)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-!    xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
-!    yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
-!    zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
-!
-!! takes coordinates from boundary faces
-!!    do i=1,NGNOD2D
-!!      xelm(i) = xcoord_iboun(i,3,ispec)
-!!      yelm(i) = ycoord_iboun(i,3,ispec)
-!!      zelm(i) = zcoord_iboun(i,3,ispec)
-!!    enddo
-!
-!    call compute_jacobian_2D(myrank,ispecb3,xelm,yelm,zelm, &
-!                  dershape2D_y,wgllwgll_xz, &
-!                  jacobian2D_ymin,normal_ymin,NGLLX,NGLLZ,NSPEC2D_ymin)
-!
-!    ! normal convention: points away from element
-!    ! switch normal direction if necessary
-!    do i=1,NGLLX
-!      do j=1,NGLLZ
-!        call get_element_face_normal(ispecb3, 3, xelm,yelm,zelm, &
-!                                ibool,nspec,nglob, &
-!                                xstore_dummy,ystore_dummy,zstore_dummy, &
-!                                normal_ymin(:,i,j,ispecb3) )
-!      enddo
-!    enddo
-!
-!
-!  endif
-!
-!! on boundary: ymax
-!
-!  if(iboun(4,ispec)) then
-!
-!    ispecb4=ispecb4+1
-!    ibelm_ymax(ispecb4)=ispec
-!
-!!careful...
-!!   specify the 4 nodes for the 2-D boundary element
-!!    xelm(1)=xstore(1,NGLLY,1,ispec)
-!!    yelm(1)=ystore(1,NGLLY,1,ispec)
-!!    zelm(1)=zstore(1,NGLLY,1,ispec)
-!!    xelm(2)=xstore(NGLLX,NGLLY,1,ispec)
-!!    yelm(2)=ystore(NGLLX,NGLLY,1,ispec)
-!!    zelm(2)=zstore(NGLLX,NGLLY,1,ispec)
-!!    xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
-!!    yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
-!!    zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
-!!    xelm(4)=xstore(1,NGLLY,NGLLZ,ispec)
-!!    yelm(4)=ystore(1,NGLLY,NGLLZ,ispec)
-!!    zelm(4)=zstore(1,NGLLY,NGLLZ,ispec)
-!
-!    xelm(1)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
-!    yelm(1)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
-!    zelm(1)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
-!    xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-!    yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-!    zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-!    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-!    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-!    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-!    xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-!    yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-!    zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-!
-!! takes coordinates from boundary faces
-!!    do i=1,NGNOD2D
-!!      xelm(i) = xcoord_iboun(i,4,ispec)
-!!      yelm(i) = ycoord_iboun(i,4,ispec)
-!!      zelm(i) = zcoord_iboun(i,4,ispec)
-!!    enddo
-!!
-!    call compute_jacobian_2D(myrank,ispecb4,xelm,yelm,zelm, &
-!                  dershape2D_y, wgllwgll_xz, &
-!                  jacobian2D_ymax,normal_ymax,NGLLX,NGLLZ,NSPEC2D_ymax)
-!
-!    ! normal convention: points away from element
-!    ! switch normal direction if necessary
-!    do i=1,NGLLX
-!      do j=1,NGLLZ
-!        call get_element_face_normal(ispecb4, 4, xelm,yelm,zelm, &
-!                                ibool,nspec,nglob, &
-!                                xstore_dummy,ystore_dummy,zstore_dummy, &
-!                                normal_ymax(:,i,j,ispecb4) )
-!      enddo
-!    enddo
-!
-!  endif
-!
-!! on boundary: bottom
-!
-!  if(iboun(5,ispec)) then
-!
-!    ispecb5=ispecb5+1
-!    ibelm_bottom(ispecb5)=ispec
-!
-!! careful...
-!! for bottom, this might be actually working... when mesh is oriented along z direction...
-!!    xelm(1)=xstore(1,1,1,ispec)
-!!    yelm(1)=ystore(1,1,1,ispec)
-!!    zelm(1)=zstore(1,1,1,ispec)
-!!    xelm(2)=xstore(NGLLX,1,1,ispec)
-!!    yelm(2)=ystore(NGLLX,1,1,ispec)
-!!    zelm(2)=zstore(NGLLX,1,1,ispec)
-!!    xelm(3)=xstore(NGLLX,NGLLY,1,ispec)
-!!    yelm(3)=ystore(NGLLX,NGLLY,1,ispec)
-!!    zelm(3)=zstore(NGLLX,NGLLY,1,ispec)
-!!    xelm(4)=xstore(1,NGLLY,1,ispec)
-!!    yelm(4)=ystore(1,NGLLY,1,ispec)
-!!    zelm(4)=zstore(1,NGLLY,1,ispec)
-!
-!    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
-!    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
-!    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
-!    xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
-!    yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
-!    zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
-!    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-!    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-!    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-!    xelm(4)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
-!    yelm(4)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
-!    zelm(4)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
-!
-!
-!! takes coordinates from boundary faces
-!!    do i=1,NGNOD2D
-!!      xelm(i) = xcoord_iboun(i,5,ispec)
-!!      yelm(i) = ycoord_iboun(i,5,ispec)
-!!      zelm(i) = zcoord_iboun(i,5,ispec)
-!!    enddo
-!
-!    call compute_jacobian_2D(myrank,ispecb5,xelm,yelm,zelm,&
-!                  dershape2D_bottom,wgllwgll_xy, &
-!                  jacobian2D_bottom,normal_bottom,NGLLX,NGLLY,NSPEC2D_BOTTOM)
-!
-!    ! normal convention: points away from element
-!    ! switch normal direction if necessary
-!    do i=1,NGLLX
-!      do j=1,NGLLY
-!        call get_element_face_normal(ispecb5, 5, xelm,yelm,zelm, &
-!                                ibool,nspec,nglob, &
-!                                xstore_dummy,ystore_dummy,zstore_dummy, &
-!                                normal_bottom(:,i,j,ispecb5) )
-!      enddo
-!    enddo
-!
-!  endif
-!
-!! on boundary: top
-!
-!  if(iboun(6,ispec)) then
-!
-!    ispecb6=ispecb6+1
-!    ibelm_top(ispecb6)=ispec
-!
-!! careful...
-!! for top, this might be working as well ... when mesh is oriented along z direction...
-!!    xelm(1)=xstore(1,1,NGLLZ,ispec)
-!!    yelm(1)=ystore(1,1,NGLLZ,ispec)
-!!    zelm(1)=zstore(1,1,NGLLZ,ispec)
-!!    xelm(2)=xstore(NGLLX,1,NGLLZ,ispec)
-!!    yelm(2)=ystore(NGLLX,1,NGLLZ,ispec)
-!!    zelm(2)=zstore(NGLLX,1,NGLLZ,ispec)
-!!    xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
-!!    yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
-!!    zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
-!!    xelm(4)=xstore(1,NGLLY,NGLLZ,ispec)
-!!    yelm(4)=ystore(1,NGLLY,NGLLZ,ispec)
-!!    zelm(4)=zstore(1,NGLLY,NGLLZ,ispec)
-!
-!
-!! takes coordinates from boundary faces
-!!    do i=1,NGNOD2D
-!!      xelm(i) = xcoord_iboun(i,6,ispec)
-!!      yelm(i) = ycoord_iboun(i,6,ispec)
-!!      zelm(i) = zcoord_iboun(i,6,ispec)
-!!    enddo
-!
-!    call compute_jacobian_2D(myrank,ispecb6,xelm,yelm,zelm,&
-!                  dershape2D_top, wgllwgll_xy, &
-!                  jacobian2D_top,normal_top,NGLLX,NGLLY,NSPEC2D_TOP)
-!
-!    ! normal convention: points away from element
-!    ! switch normal direction if necessary
-!    do i=1,NGLLX
-!      do j=1,NGLLY
-!        call get_element_face_normal(ispecb6, 6, xelm,yelm,zelm, &
-!                                ibool,nspec,nglob, &
-!                                xstore_dummy,ystore_dummy,zstore_dummy, &
-!                                normal_top(:,i,j,ispecb6) )
-!      enddo
-!    enddo
-!
-!  endif
-!
-!  enddo
-!
-!! check theoretical value of elements
-!!  if(ispecb1 /= NSPEC2D_xmin) call exit_MPI(myrank,'ispecb1 should equal NSPEC2D_xmin')
-!!  if(ispecb2 /= NSPEC2D_xmax) call exit_MPI(myrank,'ispecb2 should equal NSPEC2D_xmax')
-!!  if(ispecb3 /= NSPEC2D_ymin) call exit_MPI(myrank,'ispecb3 should equal NSPEC2D_ymin')
-!!  if(ispecb4 /= NSPEC2D_ymax) call exit_MPI(myrank,'ispecb4 should equal NSPEC2D_ymax')
-!!  if(ispecb5 /= NSPEC2D_BOTTOM) call exit_MPI(myrank,'ispecb5 should equal NSPEC2D_BOTTOM')
-!!  if(ispecb6 /= NSPEC2D_TOP) call exit_MPI(myrank,'ispecb6 should equal NSPEC2D_TOP')
-!
-!  end subroutine get_jacobian_boundaries
-!
-!! -------------------------------------------------------
-!
-!  subroutine compute_jacobian_2D(myrank,ispecb,xelm,yelm,zelm, &
-!                                dershape2D,wgllwgll, &
-!                                jacobian2D,normal, &
-!                                NGLLA,NGLLB,NSPEC2DMAX_AB)
-!
-!  implicit none
-!
-!  include "constants.h"
-!
-!! generic routine that accepts any polynomial degree in each direction
-!
-!  integer ispecb,NGLLA,NGLLB,NSPEC2DMAX_AB,myrank
-!
-!  double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
-!  double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
-!  double precision wgllwgll
-!
-!  real(kind=CUSTOM_REAL) jacobian2D(NGLLA,NGLLB,NSPEC2DMAX_AB)
-!  real(kind=CUSTOM_REAL) normal(3,NGLLA,NGLLB,NSPEC2DMAX_AB)
-!
-!  integer i,j,ia
-!  double precision xxi,xeta,yxi,yeta,zxi,zeta
-!  double precision unx,uny,unz,jacobian
-!
-!  do j=1,NGLLB
-!    do i=1,NGLLA
-!
-!    xxi=ZERO
-!    xeta=ZERO
-!    yxi=ZERO
-!    yeta=ZERO
-!    zxi=ZERO
-!    zeta=ZERO
-!    do ia=1,NGNOD2D
-!      xxi=xxi+dershape2D(1,ia,i,j)*xelm(ia)
-!      xeta=xeta+dershape2D(2,ia,i,j)*xelm(ia)
-!      yxi=yxi+dershape2D(1,ia,i,j)*yelm(ia)
-!      yeta=yeta+dershape2D(2,ia,i,j)*yelm(ia)
-!      zxi=zxi+dershape2D(1,ia,i,j)*zelm(ia)
-!      zeta=zeta+dershape2D(2,ia,i,j)*zelm(ia)
-!    enddo
-!
-!!   calculate the unnormalized normal to the boundary
-!    unx=yxi*zeta-yeta*zxi
-!    uny=zxi*xeta-zeta*xxi
-!    unz=xxi*yeta-xeta*yxi
-!    jacobian=dsqrt(unx**2+uny**2+unz**2)
-!    if(jacobian == ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
-!
-!!   normalize normal vector and store weighted surface jacobian
-!
-!! distinguish if single or double precision for reals
-!    if(CUSTOM_REAL == SIZE_REAL) then
-!      jacobian2D(i,j,ispecb) = sngl(jacobian * wgllwgll(i,j) )
-!      normal(1,i,j,ispecb)=sngl(unx/jacobian)
-!      normal(2,i,j,ispecb)=sngl(uny/jacobian)
-!      normal(3,i,j,ispecb)=sngl(unz/jacobian)
-!    else
-!      jacobian2D(i,j,ispecb) = jacobian * wgllwgll(i,j)
-!      normal(1,i,j,ispecb)=unx/jacobian
-!      normal(2,i,j,ispecb)=uny/jacobian
-!      normal(3,i,j,ispecb)=unz/jacobian
-!    endif
-!
-!    enddo
-!  enddo
-!
-!  end subroutine compute_jacobian_2D
-!
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+  subroutine get_jacobian_boundary_face(myrank,nspec, &
+                        xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
+                        dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
+                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
+                        ispec,iface,jacobian2Dw_face,normal_face,NGLLA,NGLLB)
+
+! returns jacobian2Dw_face and normal_face (pointing outwards of element)
+
+  implicit none
+
+  include "constants.h"
+
+  integer nspec,myrank,nglob
+
+! arrays with the mesh
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  real(kind=CUSTOM_REAL) :: xstore_dummy(nglob),ystore_dummy(nglob),zstore_dummy(nglob)
+
+! face information
+  integer :: iface,ispec,NGLLA,NGLLB
+  real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
+  real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
+
+  double precision dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ)
+  double precision dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ)
+  double precision dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY)
+  double precision dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY)
+
+  double precision, dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  double precision, dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  double precision, dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+! local parameters
+! face corners
+  double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
+
+! check that the parameter file is correct
+  if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
+  if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
+
+  select case ( iface )
+  ! on reference face: xmin
+  case(1)
+    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+    xelm(2)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+    yelm(2)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+    zelm(2)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+    xelm(3)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+    yelm(3)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+    zelm(3)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+    xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
+    yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
+    zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
+
+    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
+                  dershape2D_x,wgllwgll_yz, &
+                  jacobian2Dw_face,normal_face,NGLLY,NGLLZ)
+
+! on boundary: xmax
+  case(2)
+    xelm(1)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+    yelm(1)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+    zelm(1)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+    xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+    yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+    zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+    xelm(4)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+    yelm(4)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+    zelm(4)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+
+    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
+                  dershape2D_x,wgllwgll_yz, &
+                  jacobian2Dw_face,normal_face,NGLLY,NGLLZ)
+
+! on boundary: ymin
+  case(3)
+    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+    xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+    yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+    zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+    xelm(3)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+    yelm(3)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+    zelm(3)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+    xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
+    yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
+    zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
+
+    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
+                  dershape2D_y,wgllwgll_xz, &
+                  jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
+
+! on boundary: ymax
+  case(4)
+    xelm(1)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+    yelm(1)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+    zelm(1)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+    xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+    yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+    zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+    xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+    yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+    zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+
+    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
+                  dershape2D_y, wgllwgll_xz, &
+                  jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
+
+
+! on boundary: bottom
+  case(5)
+    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+    xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+    yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+    zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+    xelm(4)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+    yelm(4)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+    zelm(4)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+
+    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm,&
+                  dershape2D_bottom,wgllwgll_xy, &
+                  jacobian2Dw_face,normal_face,NGLLX,NGLLY)
+
+! on boundary: top
+  case(6)
+    xelm(1)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
+    yelm(1)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
+    zelm(1)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
+    xelm(2)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+    yelm(2)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+    zelm(2)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+    xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+    yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+    zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+
+    call compute_jacobian_2D_face(myrank,xelm,yelm,zelm,&
+                  dershape2D_top, wgllwgll_xy, &
+                  jacobian2Dw_face,normal_face,NGLLX,NGLLY)
+
+  case default
+    stop 'error 2D jacobian'
+  end select
+
+  end subroutine get_jacobian_boundary_face
+
+
+! -------------------------------------------------------
+
+  subroutine compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
+                                dershape2D,wgllwgll, &
+                                jacobian2Dw_face,normal_face,NGLLA,NGLLB)
+
+  implicit none
+
+  include "constants.h"
+
+! generic routine that accepts any polynomial degree in each direction
+! returns 2D jacobian and normal for this face only
+
+  integer NGLLA,NGLLB,myrank
+
+  double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
+  double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
+  double precision wgllwgll(NGLLA,NGLLB)
+
+  real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
+  real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
+
+  integer i,j,ia
+  double precision xxi,xeta,yxi,yeta,zxi,zeta
+  double precision unx,uny,unz,jacobian
+
+  do j=1,NGLLB
+    do i=1,NGLLA
+
+    xxi=ZERO
+    xeta=ZERO
+    yxi=ZERO
+    yeta=ZERO
+    zxi=ZERO
+    zeta=ZERO
+    do ia=1,NGNOD2D
+      xxi=xxi+dershape2D(1,ia,i,j)*xelm(ia)
+      xeta=xeta+dershape2D(2,ia,i,j)*xelm(ia)
+      yxi=yxi+dershape2D(1,ia,i,j)*yelm(ia)
+      yeta=yeta+dershape2D(2,ia,i,j)*yelm(ia)
+      zxi=zxi+dershape2D(1,ia,i,j)*zelm(ia)
+      zeta=zeta+dershape2D(2,ia,i,j)*zelm(ia)
+    enddo
+
+!   calculate the unnormalized normal to the boundary
+    unx=yxi*zeta-yeta*zxi
+    uny=zxi*xeta-zeta*xxi
+    unz=xxi*yeta-xeta*yxi
+    jacobian=dsqrt(unx**2+uny**2+unz**2)
+    if(jacobian == ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
+
+!   normalize normal vector and store weighted surface jacobian
+
+! distinguish if single or double precision for reals
+    if(CUSTOM_REAL == SIZE_REAL) then
+      jacobian2Dw_face(i,j) = sngl(jacobian * wgllwgll(i,j) )
+      normal_face(1,i,j)=sngl(unx/jacobian)
+      normal_face(2,i,j)=sngl(uny/jacobian)
+      normal_face(3,i,j)=sngl(unz/jacobian)
+    else
+      jacobian2Dw_face(i,j) = jacobian * wgllwgll(i,j)
+      normal_face(1,i,j)=unx/jacobian
+      normal_face(2,i,j)=uny/jacobian
+      normal_face(3,i,j)=unz/jacobian
+    endif
+
+    enddo
+  enddo
+
+  end subroutine compute_jacobian_2D_face
+
+
+! This subroutine recompute the 3D jacobian for one element
+! based upon 125 GLL points
+! Hejun Zhu OCT16,2009
+
+! input: myrank,
+!        xstore,ystore,zstore ----- input position
+!        xigll,yigll,zigll ----- gll points position
+!        ispec,nspec       ----- element number
+!        ACTUALLY_STORE_ARRAYS   ------ save array or not
+
+! output: xixstore,xiystore,xizstore,
+!         etaxstore,etaystore,etazstore,
+!         gammaxstore,gammaystore,gammazstore ------ parameters used for calculating jacobian
+
+
+  subroutine recalc_jacobian_gll2D(myrank,xstore,ystore,zstore, &
+                                  xigll,yigll,wgllwgll,NGLLA,NGLLB, &
+                                  ispec,nspec,jacobian2Dw_face,normal_face)
+
+  implicit none
+
+  include "constants.h"
+
+  ! input parameter
+  integer::myrank,ispec,nspec
+  double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+
+  integer :: NGLLA,NGLLB
+  double precision, dimension(NGLLA):: xigll
+  double precision, dimension(NGLLB):: yigll
+  double precision:: wgllwgll(NGLLA,NGLLB)
+
+  real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
+  real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
+
+  ! other parameters for this subroutine
+  integer:: i,j,k,i1,j1,k1
+  double precision:: xxi,xeta,yxi,yeta,zxi,zeta
+  double precision:: xi,eta
+  double precision,dimension(NGLLA):: hxir,hpxir
+  double precision,dimension(NGLLB):: hetar,hpetar
+  double precision:: hlagrange,hlagrange_xi,hlagrange_eta
+  double precision:: jacobian
+  double precision:: unx,uny,unz
+
+
+
+  ! test parameters which can be deleted
+  double precision:: xmesh,ymesh,zmesh
+  double precision:: sumshape,sumdershapexi,sumdershapeeta
+
+  ! first go over all gll points on face
+  k=1
+  do j=1,NGLLB
+    do i=1,NGLLA
+
+      xxi = 0.0
+      xeta = 0.0
+      yxi = 0.0
+      yeta = 0.0
+      zxi = 0.0
+      zeta = 0.0
+
+      xi = xigll(i)
+      eta = yigll(j)
+
+      ! calculate lagrange polynomial and its derivative
+      call lagrange_any(xi,NGLLA,xigll,hxir,hpxir)
+      call lagrange_any(eta,NGLLB,yigll,hetar,hpetar)
+
+      ! test parameters
+      sumshape = 0.0
+      sumdershapexi = 0.0
+      sumdershapeeta = 0.0
+      xmesh = 0.0
+      ymesh = 0.0
+      zmesh = 0.0
+
+      k1=1
+      do j1 = 1,NGLLB
+        do i1 = 1,NGLLA
+         hlagrange = hxir(i1)*hetar(j1)
+         hlagrange_xi = hpxir(i1)*hetar(j1)
+         hlagrange_eta = hxir(i1)*hpetar(j1)
+
+
+         xxi = xxi + xstore(i1,j1,k1,ispec)*hlagrange_xi
+         xeta = xeta + xstore(i1,j1,k1,ispec)*hlagrange_eta
+
+         yxi = yxi + ystore(i1,j1,k1,ispec)*hlagrange_xi
+         yeta = yeta + ystore(i1,j1,k1,ispec)*hlagrange_eta
+
+         zxi = zxi + zstore(i1,j1,k1,ispec)*hlagrange_xi
+         zeta = zeta + zstore(i1,j1,k1,ispec)*hlagrange_eta
+
+         ! test the lagrange polynomial and its derivate
+         xmesh = xmesh + xstore(i1,j1,k1,ispec)*hlagrange
+         ymesh = ymesh + ystore(i1,j1,k1,ispec)*hlagrange
+         zmesh = zmesh + zstore(i1,j1,k1,ispec)*hlagrange
+         sumshape = sumshape + hlagrange
+         sumdershapexi = sumdershapexi + hlagrange_xi
+         sumdershapeeta = sumdershapeeta + hlagrange_eta
+
+         end do
+      end do
+
+      ! Check the lagrange polynomial and its derivative
+      if (xmesh /=xstore(i,j,k,ispec).or.ymesh/=ystore(i,j,k,ispec).or.zmesh/=zstore(i,j,k,ispec)) then
+        call exit_MPI(myrank,'new mesh positions are wrong in recalc_jacobian_gall3D.f90')
+      end if
+      if(abs(sumshape-one) >  TINYVAL) then
+        call exit_MPI(myrank,'error shape functions in recalc_jacobian_gll3D.f90')
+      end if
+      if(abs(sumdershapexi) >  TINYVAL) then
+        call exit_MPI(myrank,'error derivative xi shape functions in recalc_jacobian_gll3D.f90')
+      end if
+      if(abs(sumdershapeeta) >  TINYVAL) then
+        call exit_MPI(myrank,'error derivative eta shape functions in recalc_jacobian_gll3D.f90')
+      end if
+
+!   calculate the unnormalized normal to the boundary
+      unx=yxi*zeta-yeta*zxi
+      uny=zxi*xeta-zeta*xxi
+      unz=xxi*yeta-xeta*yxi
+      jacobian=dsqrt(unx**2+uny**2+unz**2)
+      if(jacobian <= ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
+
+!   normalize normal vector and store weighted surface jacobian
+
+! distinguish if single or double precision for reals
+      if(CUSTOM_REAL == SIZE_REAL) then
+        jacobian2Dw_face(i,j) = sngl(jacobian * wgllwgll(i,j) )
+        normal_face(1,i,j)=sngl(unx/jacobian)
+        normal_face(2,i,j)=sngl(uny/jacobian)
+        normal_face(3,i,j)=sngl(unz/jacobian)
+      else
+        jacobian2Dw_face(i,j) = jacobian * wgllwgll(i,j)
+        normal_face(1,i,j)=unx/jacobian
+        normal_face(2,i,j)=uny/jacobian
+        normal_face(3,i,j)=unz/jacobian
+      endif
+
+    enddo
+  enddo
+
+  end subroutine recalc_jacobian_gll2D
+
+!
+!------------------------------------------------------------------------------------------------
+!
+!
+!  subroutine get_jacobian_boundaries(myrank,iboun,nspec, &
+!              xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
+!              dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
+!              wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
+!              ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
+!              xcoord_iboun,ycoord_iboun,zcoord_iboun, &
+!              nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
+!              jacobian2D_xmin,jacobian2D_xmax, &
+!              jacobian2D_ymin,jacobian2D_ymax, &
+!              jacobian2D_bottom,jacobian2D_top, &
+!              normal_xmin,normal_xmax, &
+!              normal_ymin,normal_ymax, &
+!              normal_bottom,normal_top, &
+!              NSPEC2D_BOTTOM,NSPEC2D_TOP)
+!
+!  implicit none
+!
+!  include "constants.h"
+!
+!  integer nspec,myrank,nglob
+!
+!! arrays with the mesh
+!  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+!  real(kind=CUSTOM_REAL) :: xstore_dummy(nglob),ystore_dummy(nglob),zstore_dummy(nglob)
+!
+!
+!! absorbing boundaries
+!! (careful with array bounds, no need for NSPEC2DMAX_XMIN_XMAX & NSPEC2DMAX_YMIN_YMAX  anymore)
+!  integer  :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, NSPEC2D_BOTTOM, NSPEC2D_TOP
+!  integer, dimension(nspec2D_xmin)  :: ibelm_xmin
+!  integer, dimension(nspec2D_xmax)  :: ibelm_xmax
+!  integer, dimension(nspec2D_ymin)  :: ibelm_ymin
+!  integer, dimension(nspec2D_ymax)  :: ibelm_ymax
+!  integer, dimension(NSPEC2D_BOTTOM)  :: ibelm_bottom
+!  integer, dimension(NSPEC2D_TOP)  :: ibelm_top
+!
+!  logical iboun(6,nspec)
+!  real(kind=CUSTOM_REAL), dimension(NGNOD2D,6,nspec) :: xcoord_iboun,ycoord_iboun,zcoord_iboun
+!
+!!  double precision xstore(NGLLX,NGLLY,NGLLZ,nspec)
+!!  double precision ystore(NGLLX,NGLLY,NGLLZ,nspec)
+!!  double precision zstore(NGLLX,NGLLY,NGLLZ,nspec)
+!
+!  real(kind=CUSTOM_REAL) jacobian2D_xmin(NGLLY,NGLLZ,NSPEC2D_xmin)
+!  real(kind=CUSTOM_REAL) jacobian2D_xmax(NGLLY,NGLLZ,NSPEC2D_xmax)
+!  real(kind=CUSTOM_REAL) jacobian2D_ymin(NGLLX,NGLLZ,NSPEC2D_ymin)
+!  real(kind=CUSTOM_REAL) jacobian2D_ymax(NGLLX,NGLLZ,NSPEC2D_ymax)
+!  real(kind=CUSTOM_REAL) jacobian2D_bottom(NGLLX,NGLLY,NSPEC2D_BOTTOM)
+!  real(kind=CUSTOM_REAL) jacobian2D_top(NGLLX,NGLLY,NSPEC2D_TOP)
+!
+!  real(kind=CUSTOM_REAL) normal_xmin(NDIM,NGLLY,NGLLZ,NSPEC2D_xmin)
+!  real(kind=CUSTOM_REAL) normal_xmax(NDIM,NGLLY,NGLLZ,NSPEC2D_xmax)
+!  real(kind=CUSTOM_REAL) normal_ymin(NDIM,NGLLX,NGLLZ,NSPEC2D_ymin)
+!  real(kind=CUSTOM_REAL) normal_ymax(NDIM,NGLLX,NGLLZ,NSPEC2D_ymax)
+!  real(kind=CUSTOM_REAL) normal_bottom(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM)
+!  real(kind=CUSTOM_REAL) normal_top(NDIM,NGLLX,NGLLY,NSPEC2D_TOP)
+!
+!  double precision dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ)
+!  double precision dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ)
+!  double precision dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY)
+!  double precision dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY)
+!
+!  double precision, dimension(NGLLX,NGLLY) :: wgllwgll_xy
+!  double precision, dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+!  double precision, dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+!
+!  double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
+!
+!! element numbering
+!  integer ispec,i,j
+!
+!! counters to keep track of number of elements on each of the boundaries
+!  integer ispecb1,ispecb2,ispecb3,ispecb4,ispecb5,ispecb6
+!
+!
+!! check that the parameter file is correct
+!  if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
+!  if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
+!
+!  ispecb1 = 0
+!  ispecb2 = 0
+!  ispecb3 = 0
+!  ispecb4 = 0
+!  ispecb5 = 0
+!  ispecb6 = 0
+!
+!  do ispec=1,nspec
+!
+!! determine if the element falls on a boundary
+!
+!! on boundary: xmin
+!
+!  if(iboun(1,ispec)) then
+!
+!    ispecb1=ispecb1+1
+!    ibelm_xmin(ispecb1)=ispec
+!
+!!   specify the 4 nodes for the 2-D boundary element
+!!   i.e. face (0,0,0),(0,1,0),(0,1,1),(0,0,1)
+!
+!! careful: these points may not be on the xmin face for unstructured grids
+!!    xelm(1)=xstore(1,1,1,ispec)
+!!    yelm(1)=ystore(1,1,1,ispec)
+!!    zelm(1)=zstore(1,1,1,ispec)
+!!    xelm(2)=xstore(1,NGLLY,1,ispec)
+!!    yelm(2)=ystore(1,NGLLY,1,ispec)
+!!    zelm(2)=zstore(1,NGLLY,1,ispec)
+!!    xelm(3)=xstore(1,NGLLY,NGLLZ,ispec)
+!!    yelm(3)=ystore(1,NGLLY,NGLLZ,ispec)
+!!    zelm(3)=zstore(1,NGLLY,NGLLZ,ispec)
+!!    xelm(4)=xstore(1,1,NGLLZ,ispec)
+!!    yelm(4)=ystore(1,1,NGLLZ,ispec)
+!!    zelm(4)=zstore(1,1,NGLLZ,ispec)
+!
+!    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+!    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+!    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+!    xelm(2)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+!    yelm(2)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+!    zelm(2)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+!    xelm(3)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+!    yelm(3)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+!    zelm(3)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+!    xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
+!    yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
+!    zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
+!
+!! takes coordinates from boundary faces
+!!    do i=1,NGNOD2D
+!!      xelm(i) = xcoord_iboun(i,1,ispec)
+!!      yelm(i) = ycoord_iboun(i,1,ispec)
+!!      zelm(i) = zcoord_iboun(i,1,ispec)
+!!    enddo
+!
+!    call compute_jacobian_2D(myrank,ispecb1,xelm,yelm,zelm, &
+!                  dershape2D_x,wgllwgll_yz, &
+!                  jacobian2D_xmin,normal_xmin,NGLLY,NGLLZ,NSPEC2D_xmin)
+!
+!    ! normal convention: points away from element
+!    ! switches normal direction if necessary
+!    do i=1,NGLLY
+!      do j=1,NGLLZ
+!        call get_element_face_normal(ispecb1, 1, xelm,yelm,zelm, &
+!                                ibool,nspec,nglob, &
+!                                xstore_dummy,ystore_dummy,zstore_dummy, &
+!                                normal_xmin(:,i,j,ispecb1) )
+!      enddo
+!    enddo
+!
+!  endif
+!
+!! on boundary: xmax
+!
+!  if(iboun(2,ispec)) then
+!
+!    ispecb2=ispecb2+1
+!    ibelm_xmax(ispecb2)=ispec
+!
+!! careful...
+!!   specify the 4 nodes for the 2-D boundary element
+!!    xelm(1)=xstore(NGLLX,1,1,ispec)
+!!    yelm(1)=ystore(NGLLX,1,1,ispec)
+!!    zelm(1)=zstore(NGLLX,1,1,ispec)
+!!    xelm(2)=xstore(NGLLX,NGLLY,1,ispec)
+!!    yelm(2)=ystore(NGLLX,NGLLY,1,ispec)
+!!    zelm(2)=zstore(NGLLX,NGLLY,1,ispec)
+!!    xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
+!!    yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
+!!    zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
+!!    xelm(4)=xstore(NGLLX,1,NGLLZ,ispec)
+!!    yelm(4)=ystore(NGLLX,1,NGLLZ,ispec)
+!!    zelm(4)=zstore(NGLLX,1,NGLLZ,ispec)
+!
+!    xelm(1)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+!    yelm(1)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+!    zelm(1)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+!    xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+!    yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+!    zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+!    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+!    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+!    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+!    xelm(4)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+!    yelm(4)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+!    zelm(4)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+!
+!! takes coordinates from boundary faces
+!!    do i=1,NGNOD2D
+!!      xelm(i) = xcoord_iboun(i,2,ispec)
+!!      yelm(i) = ycoord_iboun(i,2,ispec)
+!!      zelm(i) = zcoord_iboun(i,2,ispec)
+!!    enddo
+!
+!    call compute_jacobian_2D(myrank,ispecb2,xelm,yelm,zelm, &
+!                  dershape2D_x,wgllwgll_yz, &
+!                  jacobian2D_xmax,normal_xmax,NGLLY,NGLLZ,NSPEC2D_xmax)
+!
+!    ! normal convention: points away from element
+!    ! switch normal direction if necessary
+!    do i=1,NGLLY
+!      do j=1,NGLLZ
+!        call get_element_face_normal(ispecb2, 2, xelm,yelm,zelm, &
+!                                ibool,nspec,nglob, &
+!                                xstore_dummy,ystore_dummy,zstore_dummy, &
+!                                normal_xmax(:,i,j,ispecb2) )
+!      enddo
+!    enddo
+!
+!  endif
+!
+!! on boundary: ymin
+!
+!  if(iboun(3,ispec)) then
+!
+!    ispecb3=ispecb3+1
+!    ibelm_ymin(ispecb3)=ispec
+!
+!! careful...
+!!   specify the 4 nodes for the 2-D boundary element
+!!    xelm(1)=xstore(1,1,1,ispec)
+!!    yelm(1)=ystore(1,1,1,ispec)
+!!    zelm(1)=zstore(1,1,1,ispec)
+!!    xelm(2)=xstore(NGLLX,1,1,ispec)
+!!    yelm(2)=ystore(NGLLX,1,1,ispec)
+!!    zelm(2)=zstore(NGLLX,1,1,ispec)
+!!    xelm(3)=xstore(NGLLX,1,NGLLZ,ispec)
+!!    yelm(3)=ystore(NGLLX,1,NGLLZ,ispec)
+!!    zelm(3)=zstore(NGLLX,1,NGLLZ,ispec)
+!!    xelm(4)=xstore(1,1,NGLLZ,ispec)
+!!    yelm(4)=ystore(1,1,NGLLZ,ispec)
+!!    zelm(4)=zstore(1,1,NGLLZ,ispec)
+!
+!    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+!    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+!    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+!    xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+!    yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+!    zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+!    xelm(3)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+!    yelm(3)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+!    zelm(3)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+!    xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
+!    yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
+!    zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
+!
+!! takes coordinates from boundary faces
+!!    do i=1,NGNOD2D
+!!      xelm(i) = xcoord_iboun(i,3,ispec)
+!!      yelm(i) = ycoord_iboun(i,3,ispec)
+!!      zelm(i) = zcoord_iboun(i,3,ispec)
+!!    enddo
+!
+!    call compute_jacobian_2D(myrank,ispecb3,xelm,yelm,zelm, &
+!                  dershape2D_y,wgllwgll_xz, &
+!                  jacobian2D_ymin,normal_ymin,NGLLX,NGLLZ,NSPEC2D_ymin)
+!
+!    ! normal convention: points away from element
+!    ! switch normal direction if necessary
+!    do i=1,NGLLX
+!      do j=1,NGLLZ
+!        call get_element_face_normal(ispecb3, 3, xelm,yelm,zelm, &
+!                                ibool,nspec,nglob, &
+!                                xstore_dummy,ystore_dummy,zstore_dummy, &
+!                                normal_ymin(:,i,j,ispecb3) )
+!      enddo
+!    enddo
+!
+!
+!  endif
+!
+!! on boundary: ymax
+!
+!  if(iboun(4,ispec)) then
+!
+!    ispecb4=ispecb4+1
+!    ibelm_ymax(ispecb4)=ispec
+!
+!!careful...
+!!   specify the 4 nodes for the 2-D boundary element
+!!    xelm(1)=xstore(1,NGLLY,1,ispec)
+!!    yelm(1)=ystore(1,NGLLY,1,ispec)
+!!    zelm(1)=zstore(1,NGLLY,1,ispec)
+!!    xelm(2)=xstore(NGLLX,NGLLY,1,ispec)
+!!    yelm(2)=ystore(NGLLX,NGLLY,1,ispec)
+!!    zelm(2)=zstore(NGLLX,NGLLY,1,ispec)
+!!    xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
+!!    yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
+!!    zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
+!!    xelm(4)=xstore(1,NGLLY,NGLLZ,ispec)
+!!    yelm(4)=ystore(1,NGLLY,NGLLZ,ispec)
+!!    zelm(4)=zstore(1,NGLLY,NGLLZ,ispec)
+!
+!    xelm(1)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+!    yelm(1)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+!    zelm(1)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+!    xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+!    yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+!    zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+!    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+!    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+!    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+!    xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+!    yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+!    zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+!
+!! takes coordinates from boundary faces
+!!    do i=1,NGNOD2D
+!!      xelm(i) = xcoord_iboun(i,4,ispec)
+!!      yelm(i) = ycoord_iboun(i,4,ispec)
+!!      zelm(i) = zcoord_iboun(i,4,ispec)
+!!    enddo
+!!
+!    call compute_jacobian_2D(myrank,ispecb4,xelm,yelm,zelm, &
+!                  dershape2D_y, wgllwgll_xz, &
+!                  jacobian2D_ymax,normal_ymax,NGLLX,NGLLZ,NSPEC2D_ymax)
+!
+!    ! normal convention: points away from element
+!    ! switch normal direction if necessary
+!    do i=1,NGLLX
+!      do j=1,NGLLZ
+!        call get_element_face_normal(ispecb4, 4, xelm,yelm,zelm, &
+!                                ibool,nspec,nglob, &
+!                                xstore_dummy,ystore_dummy,zstore_dummy, &
+!                                normal_ymax(:,i,j,ispecb4) )
+!      enddo
+!    enddo
+!
+!  endif
+!
+!! on boundary: bottom
+!
+!  if(iboun(5,ispec)) then
+!
+!    ispecb5=ispecb5+1
+!    ibelm_bottom(ispecb5)=ispec
+!
+!! careful...
+!! for bottom, this might be actually working... when mesh is oriented along z direction...
+!!    xelm(1)=xstore(1,1,1,ispec)
+!!    yelm(1)=ystore(1,1,1,ispec)
+!!    zelm(1)=zstore(1,1,1,ispec)
+!!    xelm(2)=xstore(NGLLX,1,1,ispec)
+!!    yelm(2)=ystore(NGLLX,1,1,ispec)
+!!    zelm(2)=zstore(NGLLX,1,1,ispec)
+!!    xelm(3)=xstore(NGLLX,NGLLY,1,ispec)
+!!    yelm(3)=ystore(NGLLX,NGLLY,1,ispec)
+!!    zelm(3)=zstore(NGLLX,NGLLY,1,ispec)
+!!    xelm(4)=xstore(1,NGLLY,1,ispec)
+!!    yelm(4)=ystore(1,NGLLY,1,ispec)
+!!    zelm(4)=zstore(1,NGLLY,1,ispec)
+!
+!    xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+!    yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+!    zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+!    xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+!    yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+!    zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+!    xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+!    yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+!    zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+!    xelm(4)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+!    yelm(4)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+!    zelm(4)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+!
+!
+!! takes coordinates from boundary faces
+!!    do i=1,NGNOD2D
+!!      xelm(i) = xcoord_iboun(i,5,ispec)
+!!      yelm(i) = ycoord_iboun(i,5,ispec)
+!!      zelm(i) = zcoord_iboun(i,5,ispec)
+!!    enddo
+!
+!    call compute_jacobian_2D(myrank,ispecb5,xelm,yelm,zelm,&
+!                  dershape2D_bottom,wgllwgll_xy, &
+!                  jacobian2D_bottom,normal_bottom,NGLLX,NGLLY,NSPEC2D_BOTTOM)
+!
+!    ! normal convention: points away from element
+!    ! switch normal direction if necessary
+!    do i=1,NGLLX
+!      do j=1,NGLLY
+!        call get_element_face_normal(ispecb5, 5, xelm,yelm,zelm, &
+!                                ibool,nspec,nglob, &
+!                                xstore_dummy,ystore_dummy,zstore_dummy, &
+!                                normal_bottom(:,i,j,ispecb5) )
+!      enddo
+!    enddo
+!
+!  endif
+!
+!! on boundary: top
+!
+!  if(iboun(6,ispec)) then
+!
+!    ispecb6=ispecb6+1
+!    ibelm_top(ispecb6)=ispec
+!
+!! careful...
+!! for top, this might be working as well ... when mesh is oriented along z direction...
+!!    xelm(1)=xstore(1,1,NGLLZ,ispec)
+!!    yelm(1)=ystore(1,1,NGLLZ,ispec)
+!!    zelm(1)=zstore(1,1,NGLLZ,ispec)
+!!    xelm(2)=xstore(NGLLX,1,NGLLZ,ispec)
+!!    yelm(2)=ystore(NGLLX,1,NGLLZ,ispec)
+!!    zelm(2)=zstore(NGLLX,1,NGLLZ,ispec)
+!!    xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
+!!    yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
+!!    zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
+!!    xelm(4)=xstore(1,NGLLY,NGLLZ,ispec)
+!!    yelm(4)=ystore(1,NGLLY,NGLLZ,ispec)
+!!    zelm(4)=zstore(1,NGLLY,NGLLZ,ispec)
+!
+!
+!! takes coordinates from boundary faces
+!!    do i=1,NGNOD2D
+!!      xelm(i) = xcoord_iboun(i,6,ispec)
+!!      yelm(i) = ycoord_iboun(i,6,ispec)
+!!      zelm(i) = zcoord_iboun(i,6,ispec)
+!!    enddo
+!
+!    call compute_jacobian_2D(myrank,ispecb6,xelm,yelm,zelm,&
+!                  dershape2D_top, wgllwgll_xy, &
+!                  jacobian2D_top,normal_top,NGLLX,NGLLY,NSPEC2D_TOP)
+!
+!    ! normal convention: points away from element
+!    ! switch normal direction if necessary
+!    do i=1,NGLLX
+!      do j=1,NGLLY
+!        call get_element_face_normal(ispecb6, 6, xelm,yelm,zelm, &
+!                                ibool,nspec,nglob, &
+!                                xstore_dummy,ystore_dummy,zstore_dummy, &
+!                                normal_top(:,i,j,ispecb6) )
+!      enddo
+!    enddo
+!
+!  endif
+!
+!  enddo
+!
+!! check theoretical value of elements
+!!  if(ispecb1 /= NSPEC2D_xmin) call exit_MPI(myrank,'ispecb1 should equal NSPEC2D_xmin')
+!!  if(ispecb2 /= NSPEC2D_xmax) call exit_MPI(myrank,'ispecb2 should equal NSPEC2D_xmax')
+!!  if(ispecb3 /= NSPEC2D_ymin) call exit_MPI(myrank,'ispecb3 should equal NSPEC2D_ymin')
+!!  if(ispecb4 /= NSPEC2D_ymax) call exit_MPI(myrank,'ispecb4 should equal NSPEC2D_ymax')
+!!  if(ispecb5 /= NSPEC2D_BOTTOM) call exit_MPI(myrank,'ispecb5 should equal NSPEC2D_BOTTOM')
+!!  if(ispecb6 /= NSPEC2D_TOP) call exit_MPI(myrank,'ispecb6 should equal NSPEC2D_TOP')
+!
+!  end subroutine get_jacobian_boundaries
+!
+!! -------------------------------------------------------
+!
+!  subroutine compute_jacobian_2D(myrank,ispecb,xelm,yelm,zelm, &
+!                                dershape2D,wgllwgll, &
+!                                jacobian2D,normal, &
+!                                NGLLA,NGLLB,NSPEC2DMAX_AB)
+!
+!  implicit none
+!
+!  include "constants.h"
+!
+!! generic routine that accepts any polynomial degree in each direction
+!
+!  integer ispecb,NGLLA,NGLLB,NSPEC2DMAX_AB,myrank
+!
+!  double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
+!  double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
+!  double precision wgllwgll
+!
+!  real(kind=CUSTOM_REAL) jacobian2D(NGLLA,NGLLB,NSPEC2DMAX_AB)
+!  real(kind=CUSTOM_REAL) normal(3,NGLLA,NGLLB,NSPEC2DMAX_AB)
+!
+!  integer i,j,ia
+!  double precision xxi,xeta,yxi,yeta,zxi,zeta
+!  double precision unx,uny,unz,jacobian
+!
+!  do j=1,NGLLB
+!    do i=1,NGLLA
+!
+!    xxi=ZERO
+!    xeta=ZERO
+!    yxi=ZERO
+!    yeta=ZERO
+!    zxi=ZERO
+!    zeta=ZERO
+!    do ia=1,NGNOD2D
+!      xxi=xxi+dershape2D(1,ia,i,j)*xelm(ia)
+!      xeta=xeta+dershape2D(2,ia,i,j)*xelm(ia)
+!      yxi=yxi+dershape2D(1,ia,i,j)*yelm(ia)
+!      yeta=yeta+dershape2D(2,ia,i,j)*yelm(ia)
+!      zxi=zxi+dershape2D(1,ia,i,j)*zelm(ia)
+!      zeta=zeta+dershape2D(2,ia,i,j)*zelm(ia)
+!    enddo
+!
+!!   calculate the unnormalized normal to the boundary
+!    unx=yxi*zeta-yeta*zxi
+!    uny=zxi*xeta-zeta*xxi
+!    unz=xxi*yeta-xeta*yxi
+!    jacobian=dsqrt(unx**2+uny**2+unz**2)
+!    if(jacobian == ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
+!
+!!   normalize normal vector and store weighted surface jacobian
+!
+!! distinguish if single or double precision for reals
+!    if(CUSTOM_REAL == SIZE_REAL) then
+!      jacobian2D(i,j,ispecb) = sngl(jacobian * wgllwgll(i,j) )
+!      normal(1,i,j,ispecb)=sngl(unx/jacobian)
+!      normal(2,i,j,ispecb)=sngl(uny/jacobian)
+!      normal(3,i,j,ispecb)=sngl(unz/jacobian)
+!    else
+!      jacobian2D(i,j,ispecb) = jacobian * wgllwgll(i,j)
+!      normal(1,i,j,ispecb)=unx/jacobian
+!      normal(2,i,j,ispecb)=uny/jacobian
+!      normal(3,i,j,ispecb)=unz/jacobian
+!    endif
+!
+!    enddo
+!  enddo
+!
+!  end subroutine compute_jacobian_2D
+!
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape2D.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape2D.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape2D.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,121 +1,121 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine get_shape2D(myrank,shape2D,dershape2D,xigll,yigll,NGLLA,NGLLB)
-
-  implicit none
-
-  include "constants.h"
-
-! generic routine that accepts any polynomial degree in each direction
-
-  integer NGLLA,NGLLB,myrank
-
-  double precision xigll(NGLLA)
-  double precision yigll(NGLLB)
-
-! 2D shape functions and their derivatives
-  double precision shape2D(NGNOD2D,NGLLA,NGLLB)
-  double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
-
-  integer i,j,ia
-
-! location of the nodes of the 2D quadrilateral elements
-  double precision xi,eta
-  double precision xi_map,eta_map
-
-! for checking the 2D shape functions
-  double precision sumshape,sumdershapexi,sumdershapeeta
-
-! check that the parameter file is correct
-  if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
-  if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
-
-! generate the 2D shape functions and their derivatives (4 nodes)
-  do i=1,NGLLA
-
-  xi=xigll(i)
-
-  do j=1,NGLLB
-
-    eta=yigll(j)
-
-! map coordinates to [0,1]
-    xi_map = (xi + 1.) / 2.
-    eta_map = (eta + 1.) / 2.
-
-! corner nodes
-    shape2D(1,i,j) = (1 - xi_map)*(1 - eta_map)
-    shape2D(2,i,j) = xi_map*(1 - eta_map)
-    shape2D(3,i,j) = xi_map*eta_map
-    shape2D(4,i,j) = (1 - xi_map)*eta_map
-
-    dershape2D(1,1,i,j) = (eta - 1.) / 4.
-    dershape2D(2,1,i,j) = (xi - 1.) / 4.
-
-    dershape2D(1,2,i,j) = (1. - eta) / 4.
-    dershape2D(2,2,i,j) = (-1. - xi) / 4.
-
-    dershape2D(1,3,i,j) = (1. + eta) / 4.
-    dershape2D(2,3,i,j) = (1. + xi) / 4.
-
-    dershape2D(1,4,i,j) = (- 1. - eta) / 4.
-    dershape2D(2,4,i,j) = (1. - xi) / 4.
-
-    enddo
-  enddo
-
-! check the 2D shape functions
-  do i=1,NGLLA
-    do j=1,NGLLB
-
-    sumshape=ZERO
-
-    sumdershapexi=ZERO
-    sumdershapeeta=ZERO
-
-    do ia=1,NGNOD2D
-      sumshape=sumshape+shape2D(ia,i,j)
-
-      sumdershapexi=sumdershapexi+dershape2D(1,ia,i,j)
-      sumdershapeeta=sumdershapeeta+dershape2D(2,ia,i,j)
-    enddo
-
-!   the sum of the shape functions should be 1
-    if(abs(sumshape-ONE)>TINYVAL) call exit_MPI(myrank,'error in 2D shape functions')
-
-!   the sum of the derivatives of the shape functions should be 0
-    if(abs(sumdershapexi)>TINYVAL) &
-      call exit_MPI(myrank,'error in xi derivatives of 2D shape function')
-
-    if(abs(sumdershapeeta)>TINYVAL) &
-      call exit_MPI(myrank,'error in eta derivatives of 2D shape function')
-
-    enddo
-  enddo
-
-  end subroutine get_shape2D
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine get_shape2D(myrank,shape2D,dershape2D,xigll,yigll,NGLLA,NGLLB)
+
+  implicit none
+
+  include "constants.h"
+
+! generic routine that accepts any polynomial degree in each direction
+
+  integer NGLLA,NGLLB,myrank
+
+  double precision xigll(NGLLA)
+  double precision yigll(NGLLB)
+
+! 2D shape functions and their derivatives
+  double precision shape2D(NGNOD2D,NGLLA,NGLLB)
+  double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
+
+  integer i,j,ia
+
+! location of the nodes of the 2D quadrilateral elements
+  double precision xi,eta
+  double precision xi_map,eta_map
+
+! for checking the 2D shape functions
+  double precision sumshape,sumdershapexi,sumdershapeeta
+
+! check that the parameter file is correct
+  if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
+  if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
+
+! generate the 2D shape functions and their derivatives (4 nodes)
+  do i=1,NGLLA
+
+  xi=xigll(i)
+
+  do j=1,NGLLB
+
+    eta=yigll(j)
+
+! map coordinates to [0,1]
+    xi_map = (xi + 1.) / 2.
+    eta_map = (eta + 1.) / 2.
+
+! corner nodes
+    shape2D(1,i,j) = (1 - xi_map)*(1 - eta_map)
+    shape2D(2,i,j) = xi_map*(1 - eta_map)
+    shape2D(3,i,j) = xi_map*eta_map
+    shape2D(4,i,j) = (1 - xi_map)*eta_map
+
+    dershape2D(1,1,i,j) = (eta - 1.) / 4.
+    dershape2D(2,1,i,j) = (xi - 1.) / 4.
+
+    dershape2D(1,2,i,j) = (1. - eta) / 4.
+    dershape2D(2,2,i,j) = (-1. - xi) / 4.
+
+    dershape2D(1,3,i,j) = (1. + eta) / 4.
+    dershape2D(2,3,i,j) = (1. + xi) / 4.
+
+    dershape2D(1,4,i,j) = (- 1. - eta) / 4.
+    dershape2D(2,4,i,j) = (1. - xi) / 4.
+
+    enddo
+  enddo
+
+! check the 2D shape functions
+  do i=1,NGLLA
+    do j=1,NGLLB
+
+    sumshape=ZERO
+
+    sumdershapexi=ZERO
+    sumdershapeeta=ZERO
+
+    do ia=1,NGNOD2D
+      sumshape=sumshape+shape2D(ia,i,j)
+
+      sumdershapexi=sumdershapexi+dershape2D(1,ia,i,j)
+      sumdershapeeta=sumdershapeeta+dershape2D(2,ia,i,j)
+    enddo
+
+!   the sum of the shape functions should be 1
+    if(abs(sumshape-ONE)>TINYVAL) call exit_MPI(myrank,'error in 2D shape functions')
+
+!   the sum of the derivatives of the shape functions should be 0
+    if(abs(sumdershapexi)>TINYVAL) &
+      call exit_MPI(myrank,'error in xi derivatives of 2D shape function')
+
+    if(abs(sumdershapeeta)>TINYVAL) &
+      call exit_MPI(myrank,'error in eta derivatives of 2D shape function')
+
+    enddo
+  enddo
+
+  end subroutine get_shape2D
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape3D.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape3D.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape3D.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,270 +1,270 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! 3D shape functions for 8-node element
-
-  subroutine get_shape3D(myrank,shape3D,dershape3D,xigll,yigll,zigll)
-
-  implicit none
-
-  include "constants.h"
-
-  integer myrank
-
-! Gauss-Lobatto-Legendre points of integration
-  double precision xigll(NGLLX)
-  double precision yigll(NGLLY)
-  double precision zigll(NGLLZ)
-
-! 3D shape functions and their derivatives
-  double precision shape3D(NGNOD,NGLLX,NGLLY,NGLLZ)
-  double precision dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ)
-
-  integer i,j,k,ia
-
-! location of the nodes of the 3D quadrilateral elements
-  double precision xi,eta,gamma
-  double precision ra1,ra2,rb1,rb2,rc1,rc2
-
-! for checking the 3D shape functions
-  double precision sumshape,sumdershapexi,sumdershapeeta,sumdershapegamma
-
-  double precision, parameter :: ONE_EIGHTH = 0.125d0
-
-! check that the parameter file is correct
-  if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
-
-! ***
-! *** create 3D shape functions and jacobian
-! ***
-
-!--- case of a 3D 8-node element (Dhatt-Touzot p. 115)
-
-  do i=1,NGLLX
-  do j=1,NGLLY
-  do k=1,NGLLZ
-
-  xi = xigll(i)
-  eta = yigll(j)
-  gamma = zigll(k)
-
-  ra1 = one + xi
-  ra2 = one - xi
-
-  rb1 = one + eta
-  rb2 = one - eta
-
-  rc1 = one + gamma
-  rc2 = one - gamma
-
-  shape3D(1,i,j,k) = ONE_EIGHTH*ra2*rb2*rc2
-  shape3D(2,i,j,k) = ONE_EIGHTH*ra1*rb2*rc2
-  shape3D(3,i,j,k) = ONE_EIGHTH*ra1*rb1*rc2
-  shape3D(4,i,j,k) = ONE_EIGHTH*ra2*rb1*rc2
-  shape3D(5,i,j,k) = ONE_EIGHTH*ra2*rb2*rc1
-  shape3D(6,i,j,k) = ONE_EIGHTH*ra1*rb2*rc1
-  shape3D(7,i,j,k) = ONE_EIGHTH*ra1*rb1*rc1
-  shape3D(8,i,j,k) = ONE_EIGHTH*ra2*rb1*rc1
-
-  dershape3D(1,1,i,j,k) = - ONE_EIGHTH*rb2*rc2
-  dershape3D(1,2,i,j,k) = ONE_EIGHTH*rb2*rc2
-  dershape3D(1,3,i,j,k) = ONE_EIGHTH*rb1*rc2
-  dershape3D(1,4,i,j,k) = - ONE_EIGHTH*rb1*rc2
-  dershape3D(1,5,i,j,k) = - ONE_EIGHTH*rb2*rc1
-  dershape3D(1,6,i,j,k) = ONE_EIGHTH*rb2*rc1
-  dershape3D(1,7,i,j,k) = ONE_EIGHTH*rb1*rc1
-  dershape3D(1,8,i,j,k) = - ONE_EIGHTH*rb1*rc1
-
-  dershape3D(2,1,i,j,k) = - ONE_EIGHTH*ra2*rc2
-  dershape3D(2,2,i,j,k) = - ONE_EIGHTH*ra1*rc2
-  dershape3D(2,3,i,j,k) = ONE_EIGHTH*ra1*rc2
-  dershape3D(2,4,i,j,k) = ONE_EIGHTH*ra2*rc2
-  dershape3D(2,5,i,j,k) = - ONE_EIGHTH*ra2*rc1
-  dershape3D(2,6,i,j,k) = - ONE_EIGHTH*ra1*rc1
-  dershape3D(2,7,i,j,k) = ONE_EIGHTH*ra1*rc1
-  dershape3D(2,8,i,j,k) = ONE_EIGHTH*ra2*rc1
-
-  dershape3D(3,1,i,j,k) = - ONE_EIGHTH*ra2*rb2
-  dershape3D(3,2,i,j,k) = - ONE_EIGHTH*ra1*rb2
-  dershape3D(3,3,i,j,k) = - ONE_EIGHTH*ra1*rb1
-  dershape3D(3,4,i,j,k) = - ONE_EIGHTH*ra2*rb1
-  dershape3D(3,5,i,j,k) = ONE_EIGHTH*ra2*rb2
-  dershape3D(3,6,i,j,k) = ONE_EIGHTH*ra1*rb2
-  dershape3D(3,7,i,j,k) = ONE_EIGHTH*ra1*rb1
-  dershape3D(3,8,i,j,k) = ONE_EIGHTH*ra2*rb1
-
-  enddo
-  enddo
-  enddo
-
-!--- check the shape functions and their derivatives
-
-  do i=1,NGLLX
-  do j=1,NGLLY
-  do k=1,NGLLZ
-
-  sumshape = ZERO
-  sumdershapexi = ZERO
-  sumdershapeeta = ZERO
-  sumdershapegamma = ZERO
-
-  do ia=1,NGNOD
-    sumshape = sumshape + shape3D(ia,i,j,k)
-    sumdershapexi = sumdershapexi + dershape3D(1,ia,i,j,k)
-    sumdershapeeta = sumdershapeeta + dershape3D(2,ia,i,j,k)
-    sumdershapegamma = sumdershapegamma + dershape3D(3,ia,i,j,k)
-  enddo
-
-! sum of shape functions should be one
-! sum of derivative of shape functions should be zero
-  if(abs(sumshape-one) >  TINYVAL) call exit_MPI(myrank,'error shape functions')
-  if(abs(sumdershapexi) >  TINYVAL) call exit_MPI(myrank,'error derivative xi shape functions')
-  if(abs(sumdershapeeta) >  TINYVAL) call exit_MPI(myrank,'error derivative eta shape functions')
-  if(abs(sumdershapegamma) >  TINYVAL) call exit_MPI(myrank,'error derivative gamma shape functions')
-
-  enddo
-  enddo
-  enddo
-
-  end subroutine get_shape3D
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! 3D shape functions for given, single xi/eta/gamma location
-
-  subroutine get_shape3D_single(myrank,shape3D,xi,eta,gamma)
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: myrank
-
-  ! 3D shape functions
-  double precision :: shape3D(NGNOD)
-
-  ! location
-  double precision :: xi,eta,gamma
-
-  ! local parameters
-  double precision :: ra1,ra2,rb1,rb2,rc1,rc2
-  double precision, parameter :: ONE_EIGHTH = 0.125d0
-  double precision :: sumshape
-  integer :: ia
-
-! check that the parameter file is correct
-  if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
-
-!--- case of a 3D 8-node element (Dhatt-Touzot p. 115)
-  ra1 = one + xi
-  ra2 = one - xi
-
-  rb1 = one + eta
-  rb2 = one - eta
-
-  rc1 = one + gamma
-  rc2 = one - gamma
-
-  ! shape functions
-  shape3D(1) = ONE_EIGHTH*ra2*rb2*rc2
-  shape3D(2) = ONE_EIGHTH*ra1*rb2*rc2
-  shape3D(3) = ONE_EIGHTH*ra1*rb1*rc2
-  shape3D(4) = ONE_EIGHTH*ra2*rb1*rc2
-  shape3D(5) = ONE_EIGHTH*ra2*rb2*rc1
-  shape3D(6) = ONE_EIGHTH*ra1*rb2*rc1
-  shape3D(7) = ONE_EIGHTH*ra1*rb1*rc1
-  shape3D(8) = ONE_EIGHTH*ra2*rb1*rc1
-
-  ! check the shape functions
-  sumshape = ZERO
-  do ia=1,NGNOD
-    sumshape = sumshape + shape3D(ia)
-  enddo
-
-  ! sum of shape functions should be one
-  ! sum of derivative of shape functions should be zero
-  if(abs(sumshape-one) >  TINYVAL) call exit_MPI(myrank,'error single shape functions')
-
-  end subroutine get_shape3D_single
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine get_shape3D_element_corners(xelm,yelm,zelm,ispec,&
-                        ibool,xstore,ystore,zstore,NSPEC_AB,NGLOB_AB)
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: ispec
-  integer :: NSPEC_AB,NGLOB_AB
-
-  real(kind=CUSTOM_REAL),dimension(NGNOD),intent(out) :: xelm,yelm,zelm
-
-  ! mesh coordinates
-  real(kind=CUSTOM_REAL),dimension(NGLOB_AB) :: xstore,ystore,zstore
-  integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-
-! 8 node corners
-  xelm(1)=xstore(ibool(1,1,1,ispec))
-  yelm(1)=ystore(ibool(1,1,1,ispec))
-  zelm(1)=zstore(ibool(1,1,1,ispec))
-
-  xelm(2)=xstore(ibool(NGLLX,1,1,ispec))
-  yelm(2)=ystore(ibool(NGLLX,1,1,ispec))
-  zelm(2)=zstore(ibool(NGLLX,1,1,ispec))
-
-  xelm(3)=xstore(ibool(NGLLX,NGLLY,1,ispec))
-  yelm(3)=ystore(ibool(NGLLX,NGLLY,1,ispec))
-  zelm(3)=zstore(ibool(NGLLX,NGLLY,1,ispec))
-
-  xelm(4)=xstore(ibool(1,NGLLY,1,ispec))
-  yelm(4)=ystore(ibool(1,NGLLY,1,ispec))
-  zelm(4)=zstore(ibool(1,NGLLY,1,ispec))
-
-  xelm(5)=xstore(ibool(1,1,NGLLZ,ispec))
-  yelm(5)=ystore(ibool(1,1,NGLLZ,ispec))
-  zelm(5)=zstore(ibool(1,1,NGLLZ,ispec))
-
-  xelm(6)=xstore(ibool(NGLLX,1,NGLLZ,ispec))
-  yelm(6)=ystore(ibool(NGLLX,1,NGLLZ,ispec))
-  zelm(6)=zstore(ibool(NGLLX,1,NGLLZ,ispec))
-
-  xelm(7)=xstore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
-  yelm(7)=ystore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
-  zelm(7)=zstore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
-
-  xelm(8)=xstore(ibool(1,NGLLY,NGLLZ,ispec))
-  yelm(8)=ystore(ibool(1,NGLLY,NGLLZ,ispec))
-  zelm(8)=zstore(ibool(1,NGLLY,NGLLZ,ispec))
-
-  end subroutine get_shape3D_element_corners
-
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! 3D shape functions for 8-node element
+
+  subroutine get_shape3D(myrank,shape3D,dershape3D,xigll,yigll,zigll)
+
+  implicit none
+
+  include "constants.h"
+
+  integer myrank
+
+! Gauss-Lobatto-Legendre points of integration
+  double precision xigll(NGLLX)
+  double precision yigll(NGLLY)
+  double precision zigll(NGLLZ)
+
+! 3D shape functions and their derivatives
+  double precision shape3D(NGNOD,NGLLX,NGLLY,NGLLZ)
+  double precision dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ)
+
+  integer i,j,k,ia
+
+! location of the nodes of the 3D quadrilateral elements
+  double precision xi,eta,gamma
+  double precision ra1,ra2,rb1,rb2,rc1,rc2
+
+! for checking the 3D shape functions
+  double precision sumshape,sumdershapexi,sumdershapeeta,sumdershapegamma
+
+  double precision, parameter :: ONE_EIGHTH = 0.125d0
+
+! check that the parameter file is correct
+  if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
+
+! ***
+! *** create 3D shape functions and jacobian
+! ***
+
+!--- case of a 3D 8-node element (Dhatt-Touzot p. 115)
+
+  do i=1,NGLLX
+  do j=1,NGLLY
+  do k=1,NGLLZ
+
+  xi = xigll(i)
+  eta = yigll(j)
+  gamma = zigll(k)
+
+  ra1 = one + xi
+  ra2 = one - xi
+
+  rb1 = one + eta
+  rb2 = one - eta
+
+  rc1 = one + gamma
+  rc2 = one - gamma
+
+  shape3D(1,i,j,k) = ONE_EIGHTH*ra2*rb2*rc2
+  shape3D(2,i,j,k) = ONE_EIGHTH*ra1*rb2*rc2
+  shape3D(3,i,j,k) = ONE_EIGHTH*ra1*rb1*rc2
+  shape3D(4,i,j,k) = ONE_EIGHTH*ra2*rb1*rc2
+  shape3D(5,i,j,k) = ONE_EIGHTH*ra2*rb2*rc1
+  shape3D(6,i,j,k) = ONE_EIGHTH*ra1*rb2*rc1
+  shape3D(7,i,j,k) = ONE_EIGHTH*ra1*rb1*rc1
+  shape3D(8,i,j,k) = ONE_EIGHTH*ra2*rb1*rc1
+
+  dershape3D(1,1,i,j,k) = - ONE_EIGHTH*rb2*rc2
+  dershape3D(1,2,i,j,k) = ONE_EIGHTH*rb2*rc2
+  dershape3D(1,3,i,j,k) = ONE_EIGHTH*rb1*rc2
+  dershape3D(1,4,i,j,k) = - ONE_EIGHTH*rb1*rc2
+  dershape3D(1,5,i,j,k) = - ONE_EIGHTH*rb2*rc1
+  dershape3D(1,6,i,j,k) = ONE_EIGHTH*rb2*rc1
+  dershape3D(1,7,i,j,k) = ONE_EIGHTH*rb1*rc1
+  dershape3D(1,8,i,j,k) = - ONE_EIGHTH*rb1*rc1
+
+  dershape3D(2,1,i,j,k) = - ONE_EIGHTH*ra2*rc2
+  dershape3D(2,2,i,j,k) = - ONE_EIGHTH*ra1*rc2
+  dershape3D(2,3,i,j,k) = ONE_EIGHTH*ra1*rc2
+  dershape3D(2,4,i,j,k) = ONE_EIGHTH*ra2*rc2
+  dershape3D(2,5,i,j,k) = - ONE_EIGHTH*ra2*rc1
+  dershape3D(2,6,i,j,k) = - ONE_EIGHTH*ra1*rc1
+  dershape3D(2,7,i,j,k) = ONE_EIGHTH*ra1*rc1
+  dershape3D(2,8,i,j,k) = ONE_EIGHTH*ra2*rc1
+
+  dershape3D(3,1,i,j,k) = - ONE_EIGHTH*ra2*rb2
+  dershape3D(3,2,i,j,k) = - ONE_EIGHTH*ra1*rb2
+  dershape3D(3,3,i,j,k) = - ONE_EIGHTH*ra1*rb1
+  dershape3D(3,4,i,j,k) = - ONE_EIGHTH*ra2*rb1
+  dershape3D(3,5,i,j,k) = ONE_EIGHTH*ra2*rb2
+  dershape3D(3,6,i,j,k) = ONE_EIGHTH*ra1*rb2
+  dershape3D(3,7,i,j,k) = ONE_EIGHTH*ra1*rb1
+  dershape3D(3,8,i,j,k) = ONE_EIGHTH*ra2*rb1
+
+  enddo
+  enddo
+  enddo
+
+!--- check the shape functions and their derivatives
+
+  do i=1,NGLLX
+  do j=1,NGLLY
+  do k=1,NGLLZ
+
+  sumshape = ZERO
+  sumdershapexi = ZERO
+  sumdershapeeta = ZERO
+  sumdershapegamma = ZERO
+
+  do ia=1,NGNOD
+    sumshape = sumshape + shape3D(ia,i,j,k)
+    sumdershapexi = sumdershapexi + dershape3D(1,ia,i,j,k)
+    sumdershapeeta = sumdershapeeta + dershape3D(2,ia,i,j,k)
+    sumdershapegamma = sumdershapegamma + dershape3D(3,ia,i,j,k)
+  enddo
+
+! sum of shape functions should be one
+! sum of derivative of shape functions should be zero
+  if(abs(sumshape-one) >  TINYVAL) call exit_MPI(myrank,'error shape functions')
+  if(abs(sumdershapexi) >  TINYVAL) call exit_MPI(myrank,'error derivative xi shape functions')
+  if(abs(sumdershapeeta) >  TINYVAL) call exit_MPI(myrank,'error derivative eta shape functions')
+  if(abs(sumdershapegamma) >  TINYVAL) call exit_MPI(myrank,'error derivative gamma shape functions')
+
+  enddo
+  enddo
+  enddo
+
+  end subroutine get_shape3D
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! 3D shape functions for given, single xi/eta/gamma location
+
+  subroutine get_shape3D_single(myrank,shape3D,xi,eta,gamma)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: myrank
+
+  ! 3D shape functions
+  double precision :: shape3D(NGNOD)
+
+  ! location
+  double precision :: xi,eta,gamma
+
+  ! local parameters
+  double precision :: ra1,ra2,rb1,rb2,rc1,rc2
+  double precision, parameter :: ONE_EIGHTH = 0.125d0
+  double precision :: sumshape
+  integer :: ia
+
+! check that the parameter file is correct
+  if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
+
+!--- case of a 3D 8-node element (Dhatt-Touzot p. 115)
+  ra1 = one + xi
+  ra2 = one - xi
+
+  rb1 = one + eta
+  rb2 = one - eta
+
+  rc1 = one + gamma
+  rc2 = one - gamma
+
+  ! shape functions
+  shape3D(1) = ONE_EIGHTH*ra2*rb2*rc2
+  shape3D(2) = ONE_EIGHTH*ra1*rb2*rc2
+  shape3D(3) = ONE_EIGHTH*ra1*rb1*rc2
+  shape3D(4) = ONE_EIGHTH*ra2*rb1*rc2
+  shape3D(5) = ONE_EIGHTH*ra2*rb2*rc1
+  shape3D(6) = ONE_EIGHTH*ra1*rb2*rc1
+  shape3D(7) = ONE_EIGHTH*ra1*rb1*rc1
+  shape3D(8) = ONE_EIGHTH*ra2*rb1*rc1
+
+  ! check the shape functions
+  sumshape = ZERO
+  do ia=1,NGNOD
+    sumshape = sumshape + shape3D(ia)
+  enddo
+
+  ! sum of shape functions should be one
+  ! sum of derivative of shape functions should be zero
+  if(abs(sumshape-one) >  TINYVAL) call exit_MPI(myrank,'error single shape functions')
+
+  end subroutine get_shape3D_single
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine get_shape3D_element_corners(xelm,yelm,zelm,ispec,&
+                        ibool,xstore,ystore,zstore,NSPEC_AB,NGLOB_AB)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: ispec
+  integer :: NSPEC_AB,NGLOB_AB
+
+  real(kind=CUSTOM_REAL),dimension(NGNOD),intent(out) :: xelm,yelm,zelm
+
+  ! mesh coordinates
+  real(kind=CUSTOM_REAL),dimension(NGLOB_AB) :: xstore,ystore,zstore
+  integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+
+! 8 node corners
+  xelm(1)=xstore(ibool(1,1,1,ispec))
+  yelm(1)=ystore(ibool(1,1,1,ispec))
+  zelm(1)=zstore(ibool(1,1,1,ispec))
+
+  xelm(2)=xstore(ibool(NGLLX,1,1,ispec))
+  yelm(2)=ystore(ibool(NGLLX,1,1,ispec))
+  zelm(2)=zstore(ibool(NGLLX,1,1,ispec))
+
+  xelm(3)=xstore(ibool(NGLLX,NGLLY,1,ispec))
+  yelm(3)=ystore(ibool(NGLLX,NGLLY,1,ispec))
+  zelm(3)=zstore(ibool(NGLLX,NGLLY,1,ispec))
+
+  xelm(4)=xstore(ibool(1,NGLLY,1,ispec))
+  yelm(4)=ystore(ibool(1,NGLLY,1,ispec))
+  zelm(4)=zstore(ibool(1,NGLLY,1,ispec))
+
+  xelm(5)=xstore(ibool(1,1,NGLLZ,ispec))
+  yelm(5)=ystore(ibool(1,1,NGLLZ,ispec))
+  zelm(5)=zstore(ibool(1,1,NGLLZ,ispec))
+
+  xelm(6)=xstore(ibool(NGLLX,1,NGLLZ,ispec))
+  yelm(6)=ystore(ibool(NGLLX,1,NGLLZ,ispec))
+  zelm(6)=zstore(ibool(NGLLX,1,NGLLZ,ispec))
+
+  xelm(7)=xstore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
+  yelm(7)=ystore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
+  zelm(7)=zstore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
+
+  xelm(8)=xstore(ibool(1,NGLLY,NGLLZ,ispec))
+  yelm(8)=ystore(ibool(1,NGLLY,NGLLZ,ispec))
+  zelm(8)=zstore(ibool(1,NGLLY,NGLLZ,ispec))
+
+  end subroutine get_shape3D_element_corners
+
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/gll_library.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/gll_library.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/gll_library.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,554 +1,554 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!=======================================================================
-!
-!  Library to compute the Gauss-Lobatto-Legendre points and weights
-!  Based on Gauss-Lobatto routines from M.I.T.
-!  Department of Mechanical Engineering
-!
-!=======================================================================
-
-  double precision function endw1(n,alpha,beta)
-
-  implicit none
-
-  integer n
-  double precision alpha,beta
-
-  double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0,three=3.d0,four=4.d0
-  double precision apb,f1,fint1,fint2,f2,di,abn,abnn,a1,a2,a3,f3
-  double precision, external :: gammaf
-  integer i
-
-  f3 = zero
-  apb   = alpha+beta
-  if (n == 0) then
-   endw1 = zero
-   return
-  endif
-  f1   = gammaf(alpha+two)*gammaf(beta+one)/gammaf(apb+three)
-  f1   = f1*(apb+two)*two**(apb+two)/two
-  if (n == 1) then
-   endw1 = f1
-   return
-  endif
-  fint1 = gammaf(alpha+two)*gammaf(beta+one)/gammaf(apb+three)
-  fint1 = fint1*two**(apb+two)
-  fint2 = gammaf(alpha+two)*gammaf(beta+two)/gammaf(apb+four)
-  fint2 = fint2*two**(apb+three)
-  f2    = (-two*(beta+two)*fint1 + (apb+four)*fint2) * (apb+three)/four
-  if (n == 2) then
-   endw1 = f2
-   return
-  endif
-  do i=3,n
-   di   = dble(i-1)
-   abn  = alpha+beta+di
-   abnn = abn+di
-   a1   = -(two*(di+alpha)*(di+beta))/(abn*abnn*(abnn+one))
-   a2   =  (two*(alpha-beta))/(abnn*(abnn+two))
-   a3   =  (two*(abn+one))/((abnn+two)*(abnn+one))
-   f3   =  -(a2*f2+a1*f1)/a3
-   f1   = f2
-   f2   = f3
-  enddo
-  endw1  = f3
-
-  end function endw1
-
-!
-!=======================================================================
-!
-
-  double precision function endw2(n,alpha,beta)
-
-  implicit none
-
-  integer n
-  double precision alpha,beta
-
-  double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0,three=3.d0,four=4.d0
-  double precision apb,f1,fint1,fint2,f2,di,abn,abnn,a1,a2,a3,f3
-  double precision, external :: gammaf
-  integer i
-
-  apb   = alpha+beta
-  f3 = zero
-  if (n == 0) then
-   endw2 = zero
-   return
-  endif
-  f1   = gammaf(alpha+one)*gammaf(beta+two)/gammaf(apb+three)
-  f1   = f1*(apb+two)*two**(apb+two)/two
-  if (n == 1) then
-   endw2 = f1
-   return
-  endif
-  fint1 = gammaf(alpha+one)*gammaf(beta+two)/gammaf(apb+three)
-  fint1 = fint1*two**(apb+two)
-  fint2 = gammaf(alpha+two)*gammaf(beta+two)/gammaf(apb+four)
-  fint2 = fint2*two**(apb+three)
-  f2    = (two*(alpha+two)*fint1 - (apb+four)*fint2) * (apb+three)/four
-  if (n == 2) then
-   endw2 = f2
-   return
-  endif
-  do i=3,n
-   di   = dble(i-1)
-   abn  = alpha+beta+di
-   abnn = abn+di
-   a1   =  -(two*(di+alpha)*(di+beta))/(abn*abnn*(abnn+one))
-   a2   =  (two*(alpha-beta))/(abnn*(abnn+two))
-   a3   =  (two*(abn+one))/((abnn+two)*(abnn+one))
-   f3   =  -(a2*f2+a1*f1)/a3
-   f1   = f2
-   f2   = f3
-  enddo
-  endw2  = f3
-
-  end function endw2
-
-!
-!=======================================================================
-!
-
-  double precision function gammaf (x)
-
-  implicit none
-
-  double precision, parameter :: pi = 3.141592653589793d0
-
-  double precision x
-
-  double precision, parameter :: half=0.5d0,one=1.d0,two=2.d0
-
-  gammaf = one
-
-  if (x == -half) gammaf = -two*dsqrt(pi)
-  if (x ==  half) gammaf =  dsqrt(pi)
-  if (x ==  one ) gammaf =  one
-  if (x ==  two ) gammaf =  one
-  if (x ==  1.5d0) gammaf =  dsqrt(pi)/2.d0
-  if (x ==  2.5d0) gammaf =  1.5d0*dsqrt(pi)/2.d0
-  if (x ==  3.5d0) gammaf =  2.5d0*1.5d0*dsqrt(pi)/2.d0
-  if (x ==  3.d0 ) gammaf =  2.d0
-  if (x ==  4.d0 ) gammaf = 6.d0
-  if (x ==  5.d0 ) gammaf = 24.d0
-  if (x ==  6.d0 ) gammaf = 120.d0
-
-  end function gammaf
-
-!
-!=====================================================================
-!
-
-  subroutine jacg (xjac,np,alpha,beta)
-
-!=======================================================================
-!
-! computes np Gauss points, which are the zeros of the
-! Jacobi polynomial with parameters alpha and beta
-!
-!                  .alpha = beta =  0.0  ->  Legendre points
-!                  .alpha = beta = -0.5  ->  Chebyshev points
-!
-!=======================================================================
-
-  implicit none
-
-  integer np
-  double precision alpha,beta
-  double precision xjac(np)
-
-  integer k,j,i,jmin,jm,n
-  double precision xlast,dth,x,x1,x2,recsum,delx,xmin,swap
-  double precision p,pd,pm1,pdm1,pm2,pdm2
-
-  integer, parameter :: K_MAX_ITER = 10
-  double precision, parameter :: zero = 0.d0, eps = 1.0d-12
-
-  pm1 = zero
-  pm2 = zero
-  pdm1 = zero
-  pdm2 = zero
-
-  xlast = 0.d0
-  n   = np-1
-  dth = 4.d0*datan(1.d0)/(2.d0*dble(n)+2.d0)
-  p = 0.d0
-  pd = 0.d0
-  jmin = 0
-  do j=1,np
-   if(j == 1) then
-      x = dcos((2.d0*(dble(j)-1.d0)+1.d0)*dth)
-   else
-      x1 = dcos((2.d0*(dble(j)-1.d0)+1.d0)*dth)
-      x2 = xlast
-      x  = (x1+x2)/2.d0
-   endif
-   do k=1,K_MAX_ITER
-      call jacobf (p,pd,pm1,pdm1,pm2,pdm2,np,alpha,beta,x)
-      recsum = 0.d0
-      jm = j-1
-      do i=1,jm
-         recsum = recsum+1.d0/(x-xjac(np-i+1))
-      enddo
-      delx = -p/(pd-recsum*p)
-      x    = x+delx
-      if(abs(delx) < eps) goto 31
-   enddo
- 31      continue
-   xjac(np-j+1) = x
-   xlast        = x
-  enddo
-  do i=1,np
-   xmin = 2.d0
-   do j=i,np
-      if(xjac(j) < xmin) then
-         xmin = xjac(j)
-         jmin = j
-      endif
-   enddo
-   if(jmin /= i) then
-      swap = xjac(i)
-      xjac(i) = xjac(jmin)
-      xjac(jmin) = swap
-   endif
-  enddo
-
-  end subroutine jacg
-
-!
-!=====================================================================
-!
-
-  subroutine jacobf (poly,pder,polym1,pderm1,polym2,pderm2,n,alp,bet,x)
-
-!=======================================================================
-!
-! Computes the Jacobi polynomial of degree n and its derivative at x
-!
-!=======================================================================
-
-  implicit none
-
-  double precision poly,pder,polym1,pderm1,polym2,pderm2,alp,bet,x
-  integer n
-
-  double precision apb,polyl,pderl,dk,a1,a2,b3,a3,a4,polyn,pdern,psave,pdsave
-  integer k
-
-  apb  = alp+bet
-  poly = 1.d0
-  pder = 0.d0
-  psave = 0.d0
-  pdsave = 0.d0
-
-  if (n == 0) return
-
-  polyl = poly
-  pderl = pder
-  poly  = (alp-bet+(apb+2.d0)*x)/2.d0
-  pder  = (apb+2.d0)/2.d0
-  if (n == 1) return
-
-  do k=2,n
-    dk = dble(k)
-    a1 = 2.d0*dk*(dk+apb)*(2.d0*dk+apb-2.d0)
-    a2 = (2.d0*dk+apb-1.d0)*(alp**2-bet**2)
-    b3 = (2.d0*dk+apb-2.d0)
-    a3 = b3*(b3+1.d0)*(b3+2.d0)
-    a4 = 2.d0*(dk+alp-1.d0)*(dk+bet-1.d0)*(2.d0*dk+apb)
-    polyn  = ((a2+a3*x)*poly-a4*polyl)/a1
-    pdern  = ((a2+a3*x)*pder-a4*pderl+a3*poly)/a1
-    psave  = polyl
-    pdsave = pderl
-    polyl  = poly
-    poly   = polyn
-    pderl  = pder
-    pder   = pdern
-  enddo
-
-  polym1 = polyl
-  pderm1 = pderl
-  polym2 = psave
-  pderm2 = pdsave
-
-  end subroutine jacobf
-
-!
-!------------------------------------------------------------------------
-!
-
-  double precision FUNCTION PNDLEG (Z,N)
-
-!------------------------------------------------------------------------
-!
-!     Compute the derivative of the Nth order Legendre polynomial at Z.
-!     Based on the recursion formula for the Legendre polynomials.
-!
-!------------------------------------------------------------------------
-  implicit none
-
-  double precision z
-  integer n
-
-  double precision P1,P2,P1D,P2D,P3D,FK,P3
-  integer k
-
-  P1   = 1.d0
-  P2   = Z
-  P1D  = 0.d0
-  P2D  = 1.d0
-  P3D  = 1.d0
-
-  do K = 1, N-1
-    FK  = dble(K)
-    P3  = ((2.d0*FK+1.d0)*Z*P2 - FK*P1)/(FK+1.d0)
-    P3D = ((2.d0*FK+1.d0)*P2 + (2.d0*FK+1.d0)*Z*P2D - FK*P1D) / (FK+1.d0)
-    P1  = P2
-    P2  = P3
-    P1D = P2D
-    P2D = P3D
-  enddo
-
-  PNDLEG = P3D
-
-  end function pndleg
-
-!
-!------------------------------------------------------------------------
-!
-
-  double precision FUNCTION PNLEG (Z,N)
-
-!------------------------------------------------------------------------
-!
-!     Compute the value of the Nth order Legendre polynomial at Z.
-!     Based on the recursion formula for the Legendre polynomials.
-!
-!------------------------------------------------------------------------
-  implicit none
-
-  double precision z
-  integer n
-
-  double precision P1,P2,P3,FK
-  integer k
-
-  P1   = 1.d0
-  P2   = Z
-  P3   = P2
-
-  do K = 1, N-1
-    FK  = dble(K)
-    P3  = ((2.d0*FK+1.d0)*Z*P2 - FK*P1)/(FK+1.d0)
-    P1  = P2
-    P2  = P3
-  enddo
-
-  PNLEG = P3
-
-  end function pnleg
-
-!
-!------------------------------------------------------------------------
-!
-
-  double precision function pnormj (n,alpha,beta)
-
-  implicit none
-
-  double precision alpha,beta
-  integer n
-
-  double precision one,two,dn,const,prod,dindx,frac
-  double precision, external :: gammaf
-  integer i
-
-  one   = 1.d0
-  two   = 2.d0
-  dn    = dble(n)
-  const = alpha+beta+one
-
-  if (n <= 1) then
-    prod   = gammaf(dn+alpha)*gammaf(dn+beta)
-    prod   = prod/(gammaf(dn)*gammaf(dn+alpha+beta))
-    pnormj = prod * two**const/(two*dn+const)
-    return
-  endif
-
-  prod  = gammaf(alpha+one)*gammaf(beta+one)
-  prod  = prod/(two*(one+const)*gammaf(const+one))
-  prod  = prod*(one+alpha)*(two+alpha)
-  prod  = prod*(one+beta)*(two+beta)
-
-  do i=3,n
-    dindx = dble(i)
-    frac  = (dindx+alpha)*(dindx+beta)/(dindx*(dindx+alpha+beta))
-    prod  = prod*frac
-  enddo
-
-  pnormj = prod * two**const/(two*dn+const)
-
-  end function pnormj
-
-!
-!------------------------------------------------------------------------
-!
-
-  subroutine zwgjd(z,w,np,alpha,beta)
-
-!=======================================================================
-!
-!     Z w g j d : Generate np Gauss-Jacobi points and weights
-!                 associated with Jacobi polynomial of degree n = np-1
-!
-!     Note : Coefficients alpha and beta must be greater than -1.
-!     ----
-!=======================================================================
-
-  implicit none
-
-  double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0
-
-  integer np
-  double precision z(np),w(np)
-  double precision alpha,beta
-
-  integer n,np1,np2,i
-  double precision p,pd,pm1,pdm1,pm2,pdm2
-  double precision apb,dnp1,dnp2,fac1,fac2,fac3,fnorm,rcoef
-  double precision, external :: gammaf,pnormj
-
-  pd = zero
-  pm1 = zero
-  pm2 = zero
-  pdm1 = zero
-  pdm2 = zero
-
-  n    = np-1
-  apb  = alpha+beta
-  p    = zero
-  pdm1 = zero
-
-  if (np <= 0) stop 'minimum number of Gauss points is 1'
-
-  if ((alpha <= -one) .or. (beta <= -one)) stop 'alpha and beta must be greater than -1'
-
-  if (np == 1) then
-   z(1) = (beta-alpha)/(apb+two)
-   w(1) = gammaf(alpha+one)*gammaf(beta+one)/gammaf(apb+two) * two**(apb+one)
-   return
-  endif
-
-  call jacg(z,np,alpha,beta)
-
-  np1   = n+1
-  np2   = n+2
-  dnp1  = dble(np1)
-  dnp2  = dble(np2)
-  fac1  = dnp1+alpha+beta+one
-  fac2  = fac1+dnp1
-  fac3  = fac2+one
-  fnorm = pnormj(np1,alpha,beta)
-  rcoef = (fnorm*fac2*fac3)/(two*fac1*dnp2)
-  do i=1,np
-    call jacobf(p,pd,pm1,pdm1,pm2,pdm2,np2,alpha,beta,z(i))
-    w(i) = -rcoef/(p*pdm1)
-  enddo
-
-  end subroutine zwgjd
-
-!
-!------------------------------------------------------------------------
-!
-
-  subroutine zwgljd(z,w,np,alpha,beta)
-
-!=======================================================================
-!
-!     Z w g l j d : Generate np Gauss-Lobatto-Jacobi points and the
-!     -----------   weights associated with Jacobi polynomials of degree
-!                   n = np-1.
-!
-!     Note : alpha and beta coefficients must be greater than -1.
-!            Legendre polynomials are special case of Jacobi polynomials
-!            just by setting alpha and beta to 0.
-!
-!=======================================================================
-
-  implicit none
-
-  double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0
-
-  integer np
-  double precision alpha,beta
-  double precision z(np), w(np)
-
-  integer n,nm1,i
-  double precision p,pd,pm1,pdm1,pm2,pdm2
-  double precision alpg,betg
-  double precision, external :: endw1,endw2
-
-  p = zero
-  pm1 = zero
-  pm2 = zero
-  pdm1 = zero
-  pdm2 = zero
-
-  n   = np-1
-  nm1 = n-1
-  pd  = zero
-
-  if (np <= 1) stop 'minimum number of Gauss-Lobatto points is 2'
-
-! with spectral elements, use at least 3 points
-  if (np <= 2) stop 'minimum number of Gauss-Lobatto points for the SEM is 3'
-
-  if ((alpha <= -one) .or. (beta <= -one)) stop 'alpha and beta must be greater than -1'
-
-  if (nm1 > 0) then
-    alpg  = alpha+one
-    betg  = beta+one
-    call zwgjd(z(2),w(2),nm1,alpg,betg)
-  endif
-
-  z(1)  = - one
-  z(np) =  one
-
-  do i=2,np-1
-   w(i) = w(i)/(one-z(i)**2)
-  enddo
-
-  call jacobf(p,pd,pm1,pdm1,pm2,pdm2,n,alpha,beta,z(1))
-  w(1)  = endw1(n,alpha,beta)/(two*pd)
-  call jacobf(p,pd,pm1,pdm1,pm2,pdm2,n,alpha,beta,z(np))
-  w(np) = endw2(n,alpha,beta)/(two*pd)
-
-  end subroutine zwgljd
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!=======================================================================
+!
+!  Library to compute the Gauss-Lobatto-Legendre points and weights
+!  Based on Gauss-Lobatto routines from M.I.T.
+!  Department of Mechanical Engineering
+!
+!=======================================================================
+
+  double precision function endw1(n,alpha,beta)
+
+  implicit none
+
+  integer n
+  double precision alpha,beta
+
+  double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0,three=3.d0,four=4.d0
+  double precision apb,f1,fint1,fint2,f2,di,abn,abnn,a1,a2,a3,f3
+  double precision, external :: gammaf
+  integer i
+
+  f3 = zero
+  apb   = alpha+beta
+  if (n == 0) then
+   endw1 = zero
+   return
+  endif
+  f1   = gammaf(alpha+two)*gammaf(beta+one)/gammaf(apb+three)
+  f1   = f1*(apb+two)*two**(apb+two)/two
+  if (n == 1) then
+   endw1 = f1
+   return
+  endif
+  fint1 = gammaf(alpha+two)*gammaf(beta+one)/gammaf(apb+three)
+  fint1 = fint1*two**(apb+two)
+  fint2 = gammaf(alpha+two)*gammaf(beta+two)/gammaf(apb+four)
+  fint2 = fint2*two**(apb+three)
+  f2    = (-two*(beta+two)*fint1 + (apb+four)*fint2) * (apb+three)/four
+  if (n == 2) then
+   endw1 = f2
+   return
+  endif
+  do i=3,n
+   di   = dble(i-1)
+   abn  = alpha+beta+di
+   abnn = abn+di
+   a1   = -(two*(di+alpha)*(di+beta))/(abn*abnn*(abnn+one))
+   a2   =  (two*(alpha-beta))/(abnn*(abnn+two))
+   a3   =  (two*(abn+one))/((abnn+two)*(abnn+one))
+   f3   =  -(a2*f2+a1*f1)/a3
+   f1   = f2
+   f2   = f3
+  enddo
+  endw1  = f3
+
+  end function endw1
+
+!
+!=======================================================================
+!
+
+  double precision function endw2(n,alpha,beta)
+
+  implicit none
+
+  integer n
+  double precision alpha,beta
+
+  double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0,three=3.d0,four=4.d0
+  double precision apb,f1,fint1,fint2,f2,di,abn,abnn,a1,a2,a3,f3
+  double precision, external :: gammaf
+  integer i
+
+  apb   = alpha+beta
+  f3 = zero
+  if (n == 0) then
+   endw2 = zero
+   return
+  endif
+  f1   = gammaf(alpha+one)*gammaf(beta+two)/gammaf(apb+three)
+  f1   = f1*(apb+two)*two**(apb+two)/two
+  if (n == 1) then
+   endw2 = f1
+   return
+  endif
+  fint1 = gammaf(alpha+one)*gammaf(beta+two)/gammaf(apb+three)
+  fint1 = fint1*two**(apb+two)
+  fint2 = gammaf(alpha+two)*gammaf(beta+two)/gammaf(apb+four)
+  fint2 = fint2*two**(apb+three)
+  f2    = (two*(alpha+two)*fint1 - (apb+four)*fint2) * (apb+three)/four
+  if (n == 2) then
+   endw2 = f2
+   return
+  endif
+  do i=3,n
+   di   = dble(i-1)
+   abn  = alpha+beta+di
+   abnn = abn+di
+   a1   =  -(two*(di+alpha)*(di+beta))/(abn*abnn*(abnn+one))
+   a2   =  (two*(alpha-beta))/(abnn*(abnn+two))
+   a3   =  (two*(abn+one))/((abnn+two)*(abnn+one))
+   f3   =  -(a2*f2+a1*f1)/a3
+   f1   = f2
+   f2   = f3
+  enddo
+  endw2  = f3
+
+  end function endw2
+
+!
+!=======================================================================
+!
+
+  double precision function gammaf (x)
+
+  implicit none
+
+  double precision, parameter :: pi = 3.141592653589793d0
+
+  double precision x
+
+  double precision, parameter :: half=0.5d0,one=1.d0,two=2.d0
+
+  gammaf = one
+
+  if (x == -half) gammaf = -two*dsqrt(pi)
+  if (x ==  half) gammaf =  dsqrt(pi)
+  if (x ==  one ) gammaf =  one
+  if (x ==  two ) gammaf =  one
+  if (x ==  1.5d0) gammaf =  dsqrt(pi)/2.d0
+  if (x ==  2.5d0) gammaf =  1.5d0*dsqrt(pi)/2.d0
+  if (x ==  3.5d0) gammaf =  2.5d0*1.5d0*dsqrt(pi)/2.d0
+  if (x ==  3.d0 ) gammaf =  2.d0
+  if (x ==  4.d0 ) gammaf = 6.d0
+  if (x ==  5.d0 ) gammaf = 24.d0
+  if (x ==  6.d0 ) gammaf = 120.d0
+
+  end function gammaf
+
+!
+!=====================================================================
+!
+
+  subroutine jacg (xjac,np,alpha,beta)
+
+!=======================================================================
+!
+! computes np Gauss points, which are the zeros of the
+! Jacobi polynomial with parameters alpha and beta
+!
+!                  .alpha = beta =  0.0  ->  Legendre points
+!                  .alpha = beta = -0.5  ->  Chebyshev points
+!
+!=======================================================================
+
+  implicit none
+
+  integer np
+  double precision alpha,beta
+  double precision xjac(np)
+
+  integer k,j,i,jmin,jm,n
+  double precision xlast,dth,x,x1,x2,recsum,delx,xmin,swap
+  double precision p,pd,pm1,pdm1,pm2,pdm2
+
+  integer, parameter :: K_MAX_ITER = 10
+  double precision, parameter :: zero = 0.d0, eps = 1.0d-12
+
+  pm1 = zero
+  pm2 = zero
+  pdm1 = zero
+  pdm2 = zero
+
+  xlast = 0.d0
+  n   = np-1
+  dth = 4.d0*datan(1.d0)/(2.d0*dble(n)+2.d0)
+  p = 0.d0
+  pd = 0.d0
+  jmin = 0
+  do j=1,np
+   if(j == 1) then
+      x = dcos((2.d0*(dble(j)-1.d0)+1.d0)*dth)
+   else
+      x1 = dcos((2.d0*(dble(j)-1.d0)+1.d0)*dth)
+      x2 = xlast
+      x  = (x1+x2)/2.d0
+   endif
+   do k=1,K_MAX_ITER
+      call jacobf (p,pd,pm1,pdm1,pm2,pdm2,np,alpha,beta,x)
+      recsum = 0.d0
+      jm = j-1
+      do i=1,jm
+         recsum = recsum+1.d0/(x-xjac(np-i+1))
+      enddo
+      delx = -p/(pd-recsum*p)
+      x    = x+delx
+      if(abs(delx) < eps) goto 31
+   enddo
+ 31      continue
+   xjac(np-j+1) = x
+   xlast        = x
+  enddo
+  do i=1,np
+   xmin = 2.d0
+   do j=i,np
+      if(xjac(j) < xmin) then
+         xmin = xjac(j)
+         jmin = j
+      endif
+   enddo
+   if(jmin /= i) then
+      swap = xjac(i)
+      xjac(i) = xjac(jmin)
+      xjac(jmin) = swap
+   endif
+  enddo
+
+  end subroutine jacg
+
+!
+!=====================================================================
+!
+
+  subroutine jacobf (poly,pder,polym1,pderm1,polym2,pderm2,n,alp,bet,x)
+
+!=======================================================================
+!
+! Computes the Jacobi polynomial of degree n and its derivative at x
+!
+!=======================================================================
+
+  implicit none
+
+  double precision poly,pder,polym1,pderm1,polym2,pderm2,alp,bet,x
+  integer n
+
+  double precision apb,polyl,pderl,dk,a1,a2,b3,a3,a4,polyn,pdern,psave,pdsave
+  integer k
+
+  apb  = alp+bet
+  poly = 1.d0
+  pder = 0.d0
+  psave = 0.d0
+  pdsave = 0.d0
+
+  if (n == 0) return
+
+  polyl = poly
+  pderl = pder
+  poly  = (alp-bet+(apb+2.d0)*x)/2.d0
+  pder  = (apb+2.d0)/2.d0
+  if (n == 1) return
+
+  do k=2,n
+    dk = dble(k)
+    a1 = 2.d0*dk*(dk+apb)*(2.d0*dk+apb-2.d0)
+    a2 = (2.d0*dk+apb-1.d0)*(alp**2-bet**2)
+    b3 = (2.d0*dk+apb-2.d0)
+    a3 = b3*(b3+1.d0)*(b3+2.d0)
+    a4 = 2.d0*(dk+alp-1.d0)*(dk+bet-1.d0)*(2.d0*dk+apb)
+    polyn  = ((a2+a3*x)*poly-a4*polyl)/a1
+    pdern  = ((a2+a3*x)*pder-a4*pderl+a3*poly)/a1
+    psave  = polyl
+    pdsave = pderl
+    polyl  = poly
+    poly   = polyn
+    pderl  = pder
+    pder   = pdern
+  enddo
+
+  polym1 = polyl
+  pderm1 = pderl
+  polym2 = psave
+  pderm2 = pdsave
+
+  end subroutine jacobf
+
+!
+!------------------------------------------------------------------------
+!
+
+  double precision FUNCTION PNDLEG (Z,N)
+
+!------------------------------------------------------------------------
+!
+!     Compute the derivative of the Nth order Legendre polynomial at Z.
+!     Based on the recursion formula for the Legendre polynomials.
+!
+!------------------------------------------------------------------------
+  implicit none
+
+  double precision z
+  integer n
+
+  double precision P1,P2,P1D,P2D,P3D,FK,P3
+  integer k
+
+  P1   = 1.d0
+  P2   = Z
+  P1D  = 0.d0
+  P2D  = 1.d0
+  P3D  = 1.d0
+
+  do K = 1, N-1
+    FK  = dble(K)
+    P3  = ((2.d0*FK+1.d0)*Z*P2 - FK*P1)/(FK+1.d0)
+    P3D = ((2.d0*FK+1.d0)*P2 + (2.d0*FK+1.d0)*Z*P2D - FK*P1D) / (FK+1.d0)
+    P1  = P2
+    P2  = P3
+    P1D = P2D
+    P2D = P3D
+  enddo
+
+  PNDLEG = P3D
+
+  end function pndleg
+
+!
+!------------------------------------------------------------------------
+!
+
+  double precision FUNCTION PNLEG (Z,N)
+
+!------------------------------------------------------------------------
+!
+!     Compute the value of the Nth order Legendre polynomial at Z.
+!     Based on the recursion formula for the Legendre polynomials.
+!
+!------------------------------------------------------------------------
+  implicit none
+
+  double precision z
+  integer n
+
+  double precision P1,P2,P3,FK
+  integer k
+
+  P1   = 1.d0
+  P2   = Z
+  P3   = P2
+
+  do K = 1, N-1
+    FK  = dble(K)
+    P3  = ((2.d0*FK+1.d0)*Z*P2 - FK*P1)/(FK+1.d0)
+    P1  = P2
+    P2  = P3
+  enddo
+
+  PNLEG = P3
+
+  end function pnleg
+
+!
+!------------------------------------------------------------------------
+!
+
+  double precision function pnormj (n,alpha,beta)
+
+  implicit none
+
+  double precision alpha,beta
+  integer n
+
+  double precision one,two,dn,const,prod,dindx,frac
+  double precision, external :: gammaf
+  integer i
+
+  one   = 1.d0
+  two   = 2.d0
+  dn    = dble(n)
+  const = alpha+beta+one
+
+  if (n <= 1) then
+    prod   = gammaf(dn+alpha)*gammaf(dn+beta)
+    prod   = prod/(gammaf(dn)*gammaf(dn+alpha+beta))
+    pnormj = prod * two**const/(two*dn+const)
+    return
+  endif
+
+  prod  = gammaf(alpha+one)*gammaf(beta+one)
+  prod  = prod/(two*(one+const)*gammaf(const+one))
+  prod  = prod*(one+alpha)*(two+alpha)
+  prod  = prod*(one+beta)*(two+beta)
+
+  do i=3,n
+    dindx = dble(i)
+    frac  = (dindx+alpha)*(dindx+beta)/(dindx*(dindx+alpha+beta))
+    prod  = prod*frac
+  enddo
+
+  pnormj = prod * two**const/(two*dn+const)
+
+  end function pnormj
+
+!
+!------------------------------------------------------------------------
+!
+
+  subroutine zwgjd(z,w,np,alpha,beta)
+
+!=======================================================================
+!
+!     Z w g j d : Generate np Gauss-Jacobi points and weights
+!                 associated with Jacobi polynomial of degree n = np-1
+!
+!     Note : Coefficients alpha and beta must be greater than -1.
+!     ----
+!=======================================================================
+
+  implicit none
+
+  double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0
+
+  integer np
+  double precision z(np),w(np)
+  double precision alpha,beta
+
+  integer n,np1,np2,i
+  double precision p,pd,pm1,pdm1,pm2,pdm2
+  double precision apb,dnp1,dnp2,fac1,fac2,fac3,fnorm,rcoef
+  double precision, external :: gammaf,pnormj
+
+  pd = zero
+  pm1 = zero
+  pm2 = zero
+  pdm1 = zero
+  pdm2 = zero
+
+  n    = np-1
+  apb  = alpha+beta
+  p    = zero
+  pdm1 = zero
+
+  if (np <= 0) stop 'minimum number of Gauss points is 1'
+
+  if ((alpha <= -one) .or. (beta <= -one)) stop 'alpha and beta must be greater than -1'
+
+  if (np == 1) then
+   z(1) = (beta-alpha)/(apb+two)
+   w(1) = gammaf(alpha+one)*gammaf(beta+one)/gammaf(apb+two) * two**(apb+one)
+   return
+  endif
+
+  call jacg(z,np,alpha,beta)
+
+  np1   = n+1
+  np2   = n+2
+  dnp1  = dble(np1)
+  dnp2  = dble(np2)
+  fac1  = dnp1+alpha+beta+one
+  fac2  = fac1+dnp1
+  fac3  = fac2+one
+  fnorm = pnormj(np1,alpha,beta)
+  rcoef = (fnorm*fac2*fac3)/(two*fac1*dnp2)
+  do i=1,np
+    call jacobf(p,pd,pm1,pdm1,pm2,pdm2,np2,alpha,beta,z(i))
+    w(i) = -rcoef/(p*pdm1)
+  enddo
+
+  end subroutine zwgjd
+
+!
+!------------------------------------------------------------------------
+!
+
+  subroutine zwgljd(z,w,np,alpha,beta)
+
+!=======================================================================
+!
+!     Z w g l j d : Generate np Gauss-Lobatto-Jacobi points and the
+!     -----------   weights associated with Jacobi polynomials of degree
+!                   n = np-1.
+!
+!     Note : alpha and beta coefficients must be greater than -1.
+!            Legendre polynomials are special case of Jacobi polynomials
+!            just by setting alpha and beta to 0.
+!
+!=======================================================================
+
+  implicit none
+
+  double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0
+
+  integer np
+  double precision alpha,beta
+  double precision z(np), w(np)
+
+  integer n,nm1,i
+  double precision p,pd,pm1,pdm1,pm2,pdm2
+  double precision alpg,betg
+  double precision, external :: endw1,endw2
+
+  p = zero
+  pm1 = zero
+  pm2 = zero
+  pdm1 = zero
+  pdm2 = zero
+
+  n   = np-1
+  nm1 = n-1
+  pd  = zero
+
+  if (np <= 1) stop 'minimum number of Gauss-Lobatto points is 2'
+
+! with spectral elements, use at least 3 points
+  if (np <= 2) stop 'minimum number of Gauss-Lobatto points for the SEM is 3'
+
+  if ((alpha <= -one) .or. (beta <= -one)) stop 'alpha and beta must be greater than -1'
+
+  if (nm1 > 0) then
+    alpg  = alpha+one
+    betg  = beta+one
+    call zwgjd(z(2),w(2),nm1,alpg,betg)
+  endif
+
+  z(1)  = - one
+  z(np) =  one
+
+  do i=2,np-1
+   w(i) = w(i)/(one-z(i)**2)
+  enddo
+
+  call jacobf(p,pd,pm1,pdm1,pm2,pdm2,n,alpha,beta,z(1))
+  w(1)  = endw1(n,alpha,beta)/(two*pd)
+  call jacobf(p,pd,pm1,pdm1,pm2,pdm2,n,alpha,beta,z(np))
+  w(np) = endw2(n,alpha,beta)/(two*pd)
+
+  end subroutine zwgljd
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/hex_nodes.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/hex_nodes.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/hex_nodes.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,479 +1,479 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine usual_hex_nodes(iaddx,iaddy,iaddz)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! check that the parameter file is correct
-  if(NGNOD /= 8) stop 'elements should have 8 control nodes'
-
-! corner nodes
-  iaddx(1)=0
-  iaddy(1)=0
-  iaddz(1)=0
-
-  iaddx(2)=2
-  iaddy(2)=0
-  iaddz(2)=0
-
-  iaddx(3)=2
-  iaddy(3)=2
-  iaddz(3)=0
-
-  iaddx(4)=0
-  iaddy(4)=2
-  iaddz(4)=0
-
-  iaddx(5)=0
-  iaddy(5)=0
-  iaddz(5)=2
-
-  iaddx(6)=2
-  iaddy(6)=0
-  iaddz(6)=2
-
-  iaddx(7)=2
-  iaddy(7)=2
-  iaddz(7)=2
-
-  iaddx(8)=0
-  iaddy(8)=2
-  iaddz(8)=2
-
-  end subroutine usual_hex_nodes
-
-  subroutine unusual_hex_nodes1(iaddx,iaddy,iaddz)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
-  iaddx(1)=0
-  iaddy(1)=0
-  iaddz(1)=0
-
-  iaddx(2)=4
-  iaddy(2)=0
-  iaddz(2)=0
-
-  iaddx(3)=4
-  iaddy(3)=4
-  iaddz(3)=0
-
-  iaddx(4)=0
-  iaddy(4)=4
-  iaddz(4)=0
-
-  iaddx(5)=2
-  iaddy(5)=0
-  iaddz(5)=2
-
-  iaddx(6)=4
-  iaddy(6)=0
-  iaddz(6)=2
-
-  iaddx(7)=4
-  iaddy(7)=4
-  iaddz(7)=2
-
-  iaddx(8)=2
-  iaddy(8)=4
-  iaddz(8)=2
-
-  end subroutine unusual_hex_nodes1
-
-  subroutine unusual_hex_nodes1p(iaddx,iaddy,iaddz)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
-  iaddx(1)=0
-  iaddy(1)=0
-  iaddz(1)=0
-
-  iaddx(2)=4
-  iaddy(2)=0
-  iaddz(2)=0
-
-  iaddx(3)=4
-  iaddy(3)=4
-  iaddz(3)=0
-
-  iaddx(4)=0
-  iaddy(4)=4
-  iaddz(4)=0
-
-  iaddx(5)=0
-  iaddy(5)=0
-  iaddz(5)=2
-
-  iaddx(6)=2
-  iaddy(6)=0
-  iaddz(6)=2
-
-  iaddx(7)=2
-  iaddy(7)=4
-  iaddz(7)=2
-
-  iaddx(8)=0
-  iaddy(8)=4
-  iaddz(8)=2
-
-  end subroutine unusual_hex_nodes1p
-
-  subroutine unusual_hex_nodes2(iaddx,iaddy,iaddz)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
-  iaddx(1)=0
-  iaddy(1)=0
-  iaddz(1)=0
-
-  iaddx(2)=2
-  iaddy(2)=0
-  iaddz(2)=2
-
-  iaddx(3)=2
-  iaddy(3)=4
-  iaddz(3)=2
-
-  iaddx(4)=0
-  iaddy(4)=4
-  iaddz(4)=0
-
-  iaddx(5)=0
-  iaddy(5)=0
-  iaddz(5)=4
-
-  iaddx(6)=2
-  iaddy(6)=0
-  iaddz(6)=4
-
-  iaddx(7)=2
-  iaddy(7)=4
-  iaddz(7)=4
-
-  iaddx(8)=0
-  iaddy(8)=4
-  iaddz(8)=4
-
-  end subroutine unusual_hex_nodes2
-
-  subroutine unusual_hex_nodes2p(iaddx,iaddy,iaddz)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
-  iaddx(1)=0
-  iaddy(1)=0
-  iaddz(1)=0
-
-  iaddx(2)=2
-  iaddy(2)=0
-  iaddz(2)=-2
-
-  iaddx(3)=2
-  iaddy(3)=4
-  iaddz(3)=-2
-
-  iaddx(4)=0
-  iaddy(4)=4
-  iaddz(4)=0
-
-  iaddx(5)=0
-  iaddy(5)=0
-  iaddz(5)=2
-
-  iaddx(6)=2
-  iaddy(6)=0
-  iaddz(6)=2
-
-  iaddx(7)=2
-  iaddy(7)=4
-  iaddz(7)=2
-
-  iaddx(8)=0
-  iaddy(8)=4
-  iaddz(8)=2
-
-  end subroutine unusual_hex_nodes2p
-
-  subroutine unusual_hex_nodes3(iaddx,iaddy,iaddz)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
-  iaddx(1)=0
-  iaddy(1)=0
-  iaddz(1)=0
-
-  iaddx(2)=2
-  iaddy(2)=0
-  iaddz(2)=0
-
-  iaddx(3)=2
-  iaddy(3)=4
-  iaddz(3)=0
-
-  iaddx(4)=0
-  iaddy(4)=4
-  iaddz(4)=0
-
-  iaddx(5)=0
-  iaddy(5)=0
-  iaddz(5)=2
-
-  iaddx(6)=2
-  iaddy(6)=0
-  iaddz(6)=2
-
-  iaddx(7)=2
-  iaddy(7)=4
-  iaddz(7)=2
-
-  iaddx(8)=0
-  iaddy(8)=4
-  iaddz(8)=2
-
-  end subroutine unusual_hex_nodes3
-
-  subroutine unusual_hex_nodes4(iaddx,iaddy,iaddz)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
-  iaddx(1)=0
-  iaddy(1)=0
-  iaddz(1)=0
-
-  iaddx(2)=2
-  iaddy(2)=0
-  iaddz(2)=0
-
-  iaddx(3)=2
-  iaddy(3)=4
-  iaddz(3)=0
-
-  iaddx(4)=0
-  iaddy(4)=4
-  iaddz(4)=0
-
-  iaddx(5)=0
-  iaddy(5)=0
-  iaddz(5)=2
-
-  iaddx(6)=2
-  iaddy(6)=0
-  iaddz(6)=2
-
-  iaddx(7)=2
-  iaddy(7)=2
-  iaddz(7)=2
-
-  iaddx(8)=0
-  iaddy(8)=2
-  iaddz(8)=2
-
-  end subroutine unusual_hex_nodes4
-
-  subroutine unusual_hex_nodes4p(iaddx,iaddy,iaddz)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
-  iaddx(1)=0
-  iaddy(1)=0
-  iaddz(1)=0
-
-  iaddx(2)=2
-  iaddy(2)=0
-  iaddz(2)=0
-
-  iaddx(3)=2
-  iaddy(3)=4
-  iaddz(3)=0
-
-  iaddx(4)=0
-  iaddy(4)=4
-  iaddz(4)=0
-
-  iaddx(5)=0
-  iaddy(5)=2
-  iaddz(5)=2
-
-  iaddx(6)=2
-  iaddy(6)=2
-  iaddz(6)=2
-
-  iaddx(7)=2
-  iaddy(7)=4
-  iaddz(7)=2
-
-  iaddx(8)=0
-  iaddy(8)=4
-  iaddz(8)=2
-
-  end subroutine unusual_hex_nodes4p
-
-  subroutine unusual_hex_nodes6(iaddx,iaddy,iaddz)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
-  iaddx(1)=0
-  iaddy(1)=0
-  iaddz(1)=0
-
-  iaddx(2)=2
-  iaddy(2)=0
-  iaddz(2)=0
-
-  iaddx(3)=2
-  iaddy(3)=2
-  iaddz(3)=-2
-
-  iaddx(4)=0
-  iaddy(4)=2
-  iaddz(4)=-2
-
-  iaddx(5)=0
-  iaddy(5)=0
-  iaddz(5)=2
-
-  iaddx(6)=2
-  iaddy(6)=0
-  iaddz(6)=2
-
-  iaddx(7)=2
-  iaddy(7)=2
-  iaddz(7)=2
-
-  iaddx(8)=0
-  iaddy(8)=2
-  iaddz(8)=2
-
-  end subroutine unusual_hex_nodes6
-
-  subroutine unusual_hex_nodes6p(iaddx,iaddy,iaddz)
-
-  implicit none
-
-  include "constants.h"
-
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
-  iaddx(1)=0
-  iaddy(1)=0
-  iaddz(1)=0
-
-  iaddx(2)=2
-  iaddy(2)=0
-  iaddz(2)=0
-
-  iaddx(3)=2
-  iaddy(3)=2
-  iaddz(3)=2
-
-  iaddx(4)=0
-  iaddy(4)=2
-  iaddz(4)=2
-
-  iaddx(5)=0
-  iaddy(5)=0
-  iaddz(5)=4
-
-  iaddx(6)=2
-  iaddy(6)=0
-  iaddz(6)=4
-
-  iaddx(7)=2
-  iaddy(7)=2
-  iaddz(7)=4
-
-  iaddx(8)=0
-  iaddy(8)=2
-  iaddz(8)=4
-
-  end subroutine unusual_hex_nodes6p
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine usual_hex_nodes(iaddx,iaddy,iaddz)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! check that the parameter file is correct
+  if(NGNOD /= 8) stop 'elements should have 8 control nodes'
+
+! corner nodes
+  iaddx(1)=0
+  iaddy(1)=0
+  iaddz(1)=0
+
+  iaddx(2)=2
+  iaddy(2)=0
+  iaddz(2)=0
+
+  iaddx(3)=2
+  iaddy(3)=2
+  iaddz(3)=0
+
+  iaddx(4)=0
+  iaddy(4)=2
+  iaddz(4)=0
+
+  iaddx(5)=0
+  iaddy(5)=0
+  iaddz(5)=2
+
+  iaddx(6)=2
+  iaddy(6)=0
+  iaddz(6)=2
+
+  iaddx(7)=2
+  iaddy(7)=2
+  iaddz(7)=2
+
+  iaddx(8)=0
+  iaddy(8)=2
+  iaddz(8)=2
+
+  end subroutine usual_hex_nodes
+
+  subroutine unusual_hex_nodes1(iaddx,iaddy,iaddz)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+  iaddx(1)=0
+  iaddy(1)=0
+  iaddz(1)=0
+
+  iaddx(2)=4
+  iaddy(2)=0
+  iaddz(2)=0
+
+  iaddx(3)=4
+  iaddy(3)=4
+  iaddz(3)=0
+
+  iaddx(4)=0
+  iaddy(4)=4
+  iaddz(4)=0
+
+  iaddx(5)=2
+  iaddy(5)=0
+  iaddz(5)=2
+
+  iaddx(6)=4
+  iaddy(6)=0
+  iaddz(6)=2
+
+  iaddx(7)=4
+  iaddy(7)=4
+  iaddz(7)=2
+
+  iaddx(8)=2
+  iaddy(8)=4
+  iaddz(8)=2
+
+  end subroutine unusual_hex_nodes1
+
+  subroutine unusual_hex_nodes1p(iaddx,iaddy,iaddz)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+  iaddx(1)=0
+  iaddy(1)=0
+  iaddz(1)=0
+
+  iaddx(2)=4
+  iaddy(2)=0
+  iaddz(2)=0
+
+  iaddx(3)=4
+  iaddy(3)=4
+  iaddz(3)=0
+
+  iaddx(4)=0
+  iaddy(4)=4
+  iaddz(4)=0
+
+  iaddx(5)=0
+  iaddy(5)=0
+  iaddz(5)=2
+
+  iaddx(6)=2
+  iaddy(6)=0
+  iaddz(6)=2
+
+  iaddx(7)=2
+  iaddy(7)=4
+  iaddz(7)=2
+
+  iaddx(8)=0
+  iaddy(8)=4
+  iaddz(8)=2
+
+  end subroutine unusual_hex_nodes1p
+
+  subroutine unusual_hex_nodes2(iaddx,iaddy,iaddz)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+  iaddx(1)=0
+  iaddy(1)=0
+  iaddz(1)=0
+
+  iaddx(2)=2
+  iaddy(2)=0
+  iaddz(2)=2
+
+  iaddx(3)=2
+  iaddy(3)=4
+  iaddz(3)=2
+
+  iaddx(4)=0
+  iaddy(4)=4
+  iaddz(4)=0
+
+  iaddx(5)=0
+  iaddy(5)=0
+  iaddz(5)=4
+
+  iaddx(6)=2
+  iaddy(6)=0
+  iaddz(6)=4
+
+  iaddx(7)=2
+  iaddy(7)=4
+  iaddz(7)=4
+
+  iaddx(8)=0
+  iaddy(8)=4
+  iaddz(8)=4
+
+  end subroutine unusual_hex_nodes2
+
+  subroutine unusual_hex_nodes2p(iaddx,iaddy,iaddz)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+  iaddx(1)=0
+  iaddy(1)=0
+  iaddz(1)=0
+
+  iaddx(2)=2
+  iaddy(2)=0
+  iaddz(2)=-2
+
+  iaddx(3)=2
+  iaddy(3)=4
+  iaddz(3)=-2
+
+  iaddx(4)=0
+  iaddy(4)=4
+  iaddz(4)=0
+
+  iaddx(5)=0
+  iaddy(5)=0
+  iaddz(5)=2
+
+  iaddx(6)=2
+  iaddy(6)=0
+  iaddz(6)=2
+
+  iaddx(7)=2
+  iaddy(7)=4
+  iaddz(7)=2
+
+  iaddx(8)=0
+  iaddy(8)=4
+  iaddz(8)=2
+
+  end subroutine unusual_hex_nodes2p
+
+  subroutine unusual_hex_nodes3(iaddx,iaddy,iaddz)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+  iaddx(1)=0
+  iaddy(1)=0
+  iaddz(1)=0
+
+  iaddx(2)=2
+  iaddy(2)=0
+  iaddz(2)=0
+
+  iaddx(3)=2
+  iaddy(3)=4
+  iaddz(3)=0
+
+  iaddx(4)=0
+  iaddy(4)=4
+  iaddz(4)=0
+
+  iaddx(5)=0
+  iaddy(5)=0
+  iaddz(5)=2
+
+  iaddx(6)=2
+  iaddy(6)=0
+  iaddz(6)=2
+
+  iaddx(7)=2
+  iaddy(7)=4
+  iaddz(7)=2
+
+  iaddx(8)=0
+  iaddy(8)=4
+  iaddz(8)=2
+
+  end subroutine unusual_hex_nodes3
+
+  subroutine unusual_hex_nodes4(iaddx,iaddy,iaddz)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+  iaddx(1)=0
+  iaddy(1)=0
+  iaddz(1)=0
+
+  iaddx(2)=2
+  iaddy(2)=0
+  iaddz(2)=0
+
+  iaddx(3)=2
+  iaddy(3)=4
+  iaddz(3)=0
+
+  iaddx(4)=0
+  iaddy(4)=4
+  iaddz(4)=0
+
+  iaddx(5)=0
+  iaddy(5)=0
+  iaddz(5)=2
+
+  iaddx(6)=2
+  iaddy(6)=0
+  iaddz(6)=2
+
+  iaddx(7)=2
+  iaddy(7)=2
+  iaddz(7)=2
+
+  iaddx(8)=0
+  iaddy(8)=2
+  iaddz(8)=2
+
+  end subroutine unusual_hex_nodes4
+
+  subroutine unusual_hex_nodes4p(iaddx,iaddy,iaddz)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+  iaddx(1)=0
+  iaddy(1)=0
+  iaddz(1)=0
+
+  iaddx(2)=2
+  iaddy(2)=0
+  iaddz(2)=0
+
+  iaddx(3)=2
+  iaddy(3)=4
+  iaddz(3)=0
+
+  iaddx(4)=0
+  iaddy(4)=4
+  iaddz(4)=0
+
+  iaddx(5)=0
+  iaddy(5)=2
+  iaddz(5)=2
+
+  iaddx(6)=2
+  iaddy(6)=2
+  iaddz(6)=2
+
+  iaddx(7)=2
+  iaddy(7)=4
+  iaddz(7)=2
+
+  iaddx(8)=0
+  iaddy(8)=4
+  iaddz(8)=2
+
+  end subroutine unusual_hex_nodes4p
+
+  subroutine unusual_hex_nodes6(iaddx,iaddy,iaddz)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+  iaddx(1)=0
+  iaddy(1)=0
+  iaddz(1)=0
+
+  iaddx(2)=2
+  iaddy(2)=0
+  iaddz(2)=0
+
+  iaddx(3)=2
+  iaddy(3)=2
+  iaddz(3)=-2
+
+  iaddx(4)=0
+  iaddy(4)=2
+  iaddz(4)=-2
+
+  iaddx(5)=0
+  iaddy(5)=0
+  iaddz(5)=2
+
+  iaddx(6)=2
+  iaddy(6)=0
+  iaddz(6)=2
+
+  iaddx(7)=2
+  iaddy(7)=2
+  iaddz(7)=2
+
+  iaddx(8)=0
+  iaddy(8)=2
+  iaddz(8)=2
+
+  end subroutine unusual_hex_nodes6
+
+  subroutine unusual_hex_nodes6p(iaddx,iaddy,iaddz)
+
+  implicit none
+
+  include "constants.h"
+
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+  iaddx(1)=0
+  iaddy(1)=0
+  iaddz(1)=0
+
+  iaddx(2)=2
+  iaddy(2)=0
+  iaddz(2)=0
+
+  iaddx(3)=2
+  iaddy(3)=2
+  iaddz(3)=2
+
+  iaddx(4)=0
+  iaddy(4)=2
+  iaddz(4)=2
+
+  iaddx(5)=0
+  iaddy(5)=0
+  iaddz(5)=4
+
+  iaddx(6)=2
+  iaddy(6)=0
+  iaddz(6)=4
+
+  iaddx(7)=2
+  iaddy(7)=2
+  iaddz(7)=4
+
+  iaddx(8)=0
+  iaddy(8)=2
+  iaddz(8)=4
+
+  end subroutine unusual_hex_nodes6p
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/lagrange_poly.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/lagrange_poly.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/lagrange_poly.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,110 +1,110 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine lagrange_any(xi,NGLL,xigll,h,hprime)
-
-! subroutine to compute the Lagrange interpolants based upon the GLL points
-! and their first derivatives at any point xi in [-1,1]
-
-  implicit none
-
-  integer, intent(in) :: NGLL
-  double precision, intent(in) :: xi,xigll(NGLL)
-  double precision, intent(out) :: h(NGLL),hprime(NGLL)
-
-  integer dgr,i,j
-  double precision prod1,prod2
-
-  do dgr=1,NGLL
-
-    prod1 = 1.0d0
-    prod2 = 1.0d0
-    do i=1,NGLL
-      if(i /= dgr) then
-        prod1 = prod1*(xi-xigll(i))
-        prod2 = prod2*(xigll(dgr)-xigll(i))
-      endif
-    enddo
-    h(dgr)=prod1/prod2
-
-    hprime(dgr)=0.0d0
-    do i=1,NGLL
-      if(i /= dgr) then
-        prod1=1.0d0
-        do j=1,NGLL
-          if(j /= dgr .and. j /= i) prod1 = prod1*(xi-xigll(j))
-        enddo
-        hprime(dgr) = hprime(dgr)+prod1
-      endif
-    enddo
-    hprime(dgr) = hprime(dgr)/prod2
-
-  enddo
-
-  end subroutine lagrange_any
-
-!
-!=====================================================================
-!
-
-! subroutine to compute the derivative of the Lagrange interpolants
-! at the GLL points at any given GLL point
-
-  double precision function lagrange_deriv_GLL(I,j,ZGLL,NZ)
-
-!------------------------------------------------------------------------
-!
-!     Compute the value of the derivative of the I-th
-!     Lagrange interpolant through the
-!     NZ Gauss-Lobatto Legendre points ZGLL at point ZGLL(j)
-!
-!------------------------------------------------------------------------
-
-  implicit none
-
-  integer i,j,nz
-  double precision zgll(0:nz-1)
-
-  integer degpoly
-
-  double precision, external :: pnleg,pndleg
-
-  degpoly = nz - 1
-  if (i == 0 .and. j == 0) then
-    lagrange_deriv_GLL = - dble(degpoly)*(dble(degpoly)+1.d0) / 4.d0
-  else if (i == degpoly .and. j == degpoly) then
-    lagrange_deriv_GLL = dble(degpoly)*(dble(degpoly)+1.d0) / 4.d0
-  else if (i == j) then
-    lagrange_deriv_GLL = 0.d0
-  else
-    lagrange_deriv_GLL = pnleg(zgll(j),degpoly) / &
-      (pnleg(zgll(i),degpoly)*(zgll(j)-zgll(i))) &
-      + (1.d0-zgll(j)*zgll(j))*pndleg(zgll(j),degpoly) / (dble(degpoly)* &
-      (dble(degpoly)+1.d0)*pnleg(zgll(i),degpoly)*(zgll(j)-zgll(i))*(zgll(j)-zgll(i)))
-  endif
-
-  end function lagrange_deriv_GLL
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine lagrange_any(xi,NGLL,xigll,h,hprime)
+
+! subroutine to compute the Lagrange interpolants based upon the GLL points
+! and their first derivatives at any point xi in [-1,1]
+
+  implicit none
+
+  integer, intent(in) :: NGLL
+  double precision, intent(in) :: xi,xigll(NGLL)
+  double precision, intent(out) :: h(NGLL),hprime(NGLL)
+
+  integer dgr,i,j
+  double precision prod1,prod2
+
+  do dgr=1,NGLL
+
+    prod1 = 1.0d0
+    prod2 = 1.0d0
+    do i=1,NGLL
+      if(i /= dgr) then
+        prod1 = prod1*(xi-xigll(i))
+        prod2 = prod2*(xigll(dgr)-xigll(i))
+      endif
+    enddo
+    h(dgr)=prod1/prod2
+
+    hprime(dgr)=0.0d0
+    do i=1,NGLL
+      if(i /= dgr) then
+        prod1=1.0d0
+        do j=1,NGLL
+          if(j /= dgr .and. j /= i) prod1 = prod1*(xi-xigll(j))
+        enddo
+        hprime(dgr) = hprime(dgr)+prod1
+      endif
+    enddo
+    hprime(dgr) = hprime(dgr)/prod2
+
+  enddo
+
+  end subroutine lagrange_any
+
+!
+!=====================================================================
+!
+
+! subroutine to compute the derivative of the Lagrange interpolants
+! at the GLL points at any given GLL point
+
+  double precision function lagrange_deriv_GLL(I,j,ZGLL,NZ)
+
+!------------------------------------------------------------------------
+!
+!     Compute the value of the derivative of the I-th
+!     Lagrange interpolant through the
+!     NZ Gauss-Lobatto Legendre points ZGLL at point ZGLL(j)
+!
+!------------------------------------------------------------------------
+
+  implicit none
+
+  integer i,j,nz
+  double precision zgll(0:nz-1)
+
+  integer degpoly
+
+  double precision, external :: pnleg,pndleg
+
+  degpoly = nz - 1
+  if (i == 0 .and. j == 0) then
+    lagrange_deriv_GLL = - dble(degpoly)*(dble(degpoly)+1.d0) / 4.d0
+  else if (i == degpoly .and. j == degpoly) then
+    lagrange_deriv_GLL = dble(degpoly)*(dble(degpoly)+1.d0) / 4.d0
+  else if (i == j) then
+    lagrange_deriv_GLL = 0.d0
+  else
+    lagrange_deriv_GLL = pnleg(zgll(j),degpoly) / &
+      (pnleg(zgll(i),degpoly)*(zgll(j)-zgll(i))) &
+      + (1.d0-zgll(j)*zgll(j))*pndleg(zgll(j),degpoly) / (dble(degpoly)* &
+      (dble(degpoly)+1.d0)*pnleg(zgll(i),degpoly)*(zgll(j)-zgll(i))*(zgll(j)-zgll(i)))
+  endif
+
+  end function lagrange_deriv_GLL
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_topo_bathy_file.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_topo_bathy_file.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_topo_bathy_file.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,55 +1,55 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine read_topo_bathy_file(itopo_bathy,NX_TOPO,NY_TOPO,topo_file)
-!
-!---- read topography and bathymetry file once and for all
-!
-  implicit none
-
-  include "constants.h"
-
-  integer NX_TOPO,NY_TOPO
-
-! use integer array to store topography values
-  integer itopo_bathy(NX_TOPO,NY_TOPO)
-
-  character(len=100) topo_file
-
-  integer ix,iy
-
-  itopo_bathy(:,:) = 0
-
-  open(unit=13,file=topo_file,status='old',action='read')
-  do iy=1,NY_TOPO
-    do ix=1,NX_TOPO
-      read(13,*) itopo_bathy(ix,iy)
-    enddo
-  enddo
-  close(13)
-
-  end subroutine read_topo_bathy_file
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine read_topo_bathy_file(itopo_bathy,NX_TOPO,NY_TOPO,topo_file)
+!
+!---- read topography and bathymetry file once and for all
+!
+  implicit none
+
+  include "constants.h"
+
+  integer NX_TOPO,NY_TOPO
+
+! use integer array to store topography values
+  integer itopo_bathy(NX_TOPO,NY_TOPO)
+
+  character(len=100) topo_file
+
+  integer ix,iy
+
+  itopo_bathy(:,:) = 0
+
+  open(unit=13,file=topo_file,status='old',action='read')
+  do iy=1,NY_TOPO
+    do ix=1,NX_TOPO
+      read(13,*) itopo_bathy(ix,iy)
+    enddo
+  enddo
+  close(13)
+
+  end subroutine read_topo_bathy_file
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_value_parameters.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_value_parameters.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_value_parameters.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,290 +1,290 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! read values from parameter file, ignoring white lines and comments
-
-  subroutine read_value_integer(value_to_read, name)
-
-  implicit none
-
-  integer value_to_read
-  character(len=*) name
-  character(len=100) string_read
-  integer ierr
-  common /param_err_common/ ierr
-
-  call param_read(string_read, len(string_read), name, len(name), ierr)
-  if (ierr .ne. 0) return
-  read(string_read,*) value_to_read
-
-  end subroutine read_value_integer
-
-!--------------------
-
-  subroutine read_value_double_precision(value_to_read, name)
-
-  implicit none
-
-  double precision value_to_read
-  character(len=*) name
-  character(len=100) string_read
-  integer ierr
-  common /param_err_common/ ierr
-
-  call param_read(string_read, len(string_read), name, len(name), ierr)
-  if (ierr .ne. 0) return
-  read(string_read,*) value_to_read
-
-  end subroutine read_value_double_precision
-
-!--------------------
-
-  subroutine read_value_logical(value_to_read, name)
-
-  implicit none
-
-  logical value_to_read
-  character(len=*) name
-  character(len=100) string_read
-  integer ierr
-  common /param_err_common/ ierr
-
-  call param_read(string_read, len(string_read), name, len(name), ierr)
-  if (ierr .ne. 0) return
-  read(string_read,*) value_to_read
-
-  end subroutine read_value_logical
-
-!--------------------
-
-  subroutine read_value_string(value_to_read, name)
-
-  implicit none
-
-  character(len=*) value_to_read
-  character(len=*) name
-  character(len=100) string_read
-  integer ierr
-  common /param_err_common/ ierr
-
-  call param_read(string_read, len(string_read), name, len(name), ierr)
-  if (ierr .ne. 0) return
-  value_to_read = string_read
-
-  end subroutine read_value_string
-
-!--------------------
-
-  subroutine open_parameter_file()
-
-  include 'constants.h'
-  integer ierr
-  common /param_err_common/ ierr
-  character(len=50) filename
-  filename = IN_DATA_FILES_PATH(1:len_trim(IN_DATA_FILES_PATH))//'Par_file'
-
-  call param_open(filename, len(filename), ierr);
-  if (ierr .ne. 0) return
-
-  end subroutine open_parameter_file
-
-!--------------------
-
-  subroutine close_parameter_file()
-
-  call param_close();
-
-  end subroutine close_parameter_file
-
-!--------------------
-
-  integer function err_occurred()
-
-  integer ierr
-  common /param_err_common/ ierr
-
-  err_occurred = ierr
-
-  end function err_occurred
-
-!--------------------
-
-
-!
-! unused routines:
-!
-
-
-!  subroutine read_value_integer(value_to_read, name)
-!
-!  implicit none
-!
-!  integer value_to_read
-!  character(len=*) name
-!  character(len=256) string_read
-!
-!  call unused_string(name)
-!
-!  call read_next_line(string_read)
-!  read(string_read,*) value_to_read
-!
-!  end subroutine read_value_integer
-!
-!!--------------------
-!
-!  subroutine read_value_double_precision(value_to_read, name)
-!
-!  implicit none
-!
-!  double precision value_to_read
-!  character(len=*) name
-!  character(len=256) string_read
-!
-!  call unused_string(name)
-!
-!  call read_next_line(string_read)
-!  read(string_read,*) value_to_read
-!
-!  end subroutine read_value_double_precision
-!
-!!--------------------
-!
-!  subroutine read_value_logical(value_to_read, name)
-!
-!  implicit none
-!
-!  logical value_to_read
-!  character(len=*) name
-!  character(len=256) string_read
-!
-!  call unused_string(name)
-!
-!  call read_next_line(string_read)
-!  read(string_read,*) value_to_read
-!
-!  end subroutine read_value_logical
-!
-!!--------------------
-!
-!  subroutine read_value_string(value_to_read, name)
-!
-!  implicit none
-!
-!  character(len=*) value_to_read
-!  character(len=*) name
-!  character(len=256) string_read
-!
-!  call unused_string(name)
-!
-!  call read_next_line(string_read)
-!  value_to_read = string_read
-!
-!  end subroutine read_value_string
-!
-!!--------------------
-!
-!  subroutine read_next_line(string_read)
-!
-!  implicit none
-!
-!  include "constants.h"
-!
-!  character(len=256) string_read
-!
-!  integer index_equal_sign,ios
-!
-!  do
-!    read(unit=IIN,fmt="(a256)",iostat=ios) string_read
-!    if(ios /= 0) stop 'error while reading parameter file'
-!
-!! suppress leading white spaces, if any
-!    string_read = adjustl(string_read)
-!
-!! suppress trailing carriage return (ASCII code 13) if any (e.g. if input text file coming from Windows/DOS)
-!    if(index(string_read,achar(13)) > 0) string_read = string_read(1:index(string_read,achar(13))-1)
-!
-!! exit loop when we find the first line that is not a comment or a white line
-!    if(len_trim(string_read) == 0) cycle
-!    if(string_read(1:1) /= '#') exit
-!
-!  enddo
-!
-!! suppress trailing white spaces, if any
-!  string_read = string_read(1:len_trim(string_read))
-!
-!! suppress trailing comments, if any
-!  if(index(string_read,'#') > 0) string_read = string_read(1:index(string_read,'#')-1)
-!
-!! suppress leading junk (up to the first equal sign, included)
-!  index_equal_sign = index(string_read,'=')
-!  if(index_equal_sign <= 1 .or. index_equal_sign == len_trim(string_read)) stop 'incorrect syntax detected in Par_file'
-!  string_read = string_read(index_equal_sign + 1:len_trim(string_read))
-!
-!! suppress leading and trailing white spaces again, if any, after having suppressed the leading junk
-!  string_read = adjustl(string_read)
-!  string_read = string_read(1:len_trim(string_read))
-!
-!  end subroutine read_next_line
-!
-!!--------------------
-!
-!  subroutine open_parameter_file
-!
-!  include "constants.h"
-!
-!  open(unit=IIN,file='in_data_files/Par_file',status='old',action='read')
-!
-!  end subroutine open_parameter_file
-!
-!!--------------------
-!
-!  subroutine close_parameter_file
-!
-!  include "constants.h"
-!
-!  close(IIN)
-!
-!  end subroutine close_parameter_file
-!
-!!--------------------
-!
-!  integer function err_occurred()
-!
-!  err_occurred = 0
-!
-!  end function err_occurred
-!
-!!--------------------
-!
-!! dummy subroutine to avoid warnings about variable not used in other subroutines
-!  subroutine unused_string(s)
-!
-!  character(len=*) s
-!
-!  if (len(s) == 1) continue
-!
-!  end subroutine unused_string
-!
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! read values from parameter file, ignoring white lines and comments
+
+  subroutine read_value_integer(value_to_read, name)
+
+  implicit none
+
+  integer value_to_read
+  character(len=*) name
+  character(len=100) string_read
+  integer ierr
+  common /param_err_common/ ierr
+
+  call param_read(string_read, len(string_read), name, len(name), ierr)
+  if (ierr .ne. 0) return
+  read(string_read,*) value_to_read
+
+  end subroutine read_value_integer
+
+!--------------------
+
+  subroutine read_value_double_precision(value_to_read, name)
+
+  implicit none
+
+  double precision value_to_read
+  character(len=*) name
+  character(len=100) string_read
+  integer ierr
+  common /param_err_common/ ierr
+
+  call param_read(string_read, len(string_read), name, len(name), ierr)
+  if (ierr .ne. 0) return
+  read(string_read,*) value_to_read
+
+  end subroutine read_value_double_precision
+
+!--------------------
+
+  subroutine read_value_logical(value_to_read, name)
+
+  implicit none
+
+  logical value_to_read
+  character(len=*) name
+  character(len=100) string_read
+  integer ierr
+  common /param_err_common/ ierr
+
+  call param_read(string_read, len(string_read), name, len(name), ierr)
+  if (ierr .ne. 0) return
+  read(string_read,*) value_to_read
+
+  end subroutine read_value_logical
+
+!--------------------
+
+  subroutine read_value_string(value_to_read, name)
+
+  implicit none
+
+  character(len=*) value_to_read
+  character(len=*) name
+  character(len=100) string_read
+  integer ierr
+  common /param_err_common/ ierr
+
+  call param_read(string_read, len(string_read), name, len(name), ierr)
+  if (ierr .ne. 0) return
+  value_to_read = string_read
+
+  end subroutine read_value_string
+
+!--------------------
+
+  subroutine open_parameter_file()
+
+  include 'constants.h'
+  integer ierr
+  common /param_err_common/ ierr
+  character(len=50) filename
+  filename = IN_DATA_FILES_PATH(1:len_trim(IN_DATA_FILES_PATH))//'Par_file'
+
+  call param_open(filename, len(filename), ierr);
+  if (ierr .ne. 0) return
+
+  end subroutine open_parameter_file
+
+!--------------------
+
+  subroutine close_parameter_file()
+
+  call param_close();
+
+  end subroutine close_parameter_file
+
+!--------------------
+
+  integer function err_occurred()
+
+  integer ierr
+  common /param_err_common/ ierr
+
+  err_occurred = ierr
+
+  end function err_occurred
+
+!--------------------
+
+
+!
+! unused routines:
+!
+
+
+!  subroutine read_value_integer(value_to_read, name)
+!
+!  implicit none
+!
+!  integer value_to_read
+!  character(len=*) name
+!  character(len=256) string_read
+!
+!  call unused_string(name)
+!
+!  call read_next_line(string_read)
+!  read(string_read,*) value_to_read
+!
+!  end subroutine read_value_integer
+!
+!!--------------------
+!
+!  subroutine read_value_double_precision(value_to_read, name)
+!
+!  implicit none
+!
+!  double precision value_to_read
+!  character(len=*) name
+!  character(len=256) string_read
+!
+!  call unused_string(name)
+!
+!  call read_next_line(string_read)
+!  read(string_read,*) value_to_read
+!
+!  end subroutine read_value_double_precision
+!
+!!--------------------
+!
+!  subroutine read_value_logical(value_to_read, name)
+!
+!  implicit none
+!
+!  logical value_to_read
+!  character(len=*) name
+!  character(len=256) string_read
+!
+!  call unused_string(name)
+!
+!  call read_next_line(string_read)
+!  read(string_read,*) value_to_read
+!
+!  end subroutine read_value_logical
+!
+!!--------------------
+!
+!  subroutine read_value_string(value_to_read, name)
+!
+!  implicit none
+!
+!  character(len=*) value_to_read
+!  character(len=*) name
+!  character(len=256) string_read
+!
+!  call unused_string(name)
+!
+!  call read_next_line(string_read)
+!  value_to_read = string_read
+!
+!  end subroutine read_value_string
+!
+!!--------------------
+!
+!  subroutine read_next_line(string_read)
+!
+!  implicit none
+!
+!  include "constants.h"
+!
+!  character(len=256) string_read
+!
+!  integer index_equal_sign,ios
+!
+!  do
+!    read(unit=IIN,fmt="(a256)",iostat=ios) string_read
+!    if(ios /= 0) stop 'error while reading parameter file'
+!
+!! suppress leading white spaces, if any
+!    string_read = adjustl(string_read)
+!
+!! suppress trailing carriage return (ASCII code 13) if any (e.g. if input text file coming from Windows/DOS)
+!    if(index(string_read,achar(13)) > 0) string_read = string_read(1:index(string_read,achar(13))-1)
+!
+!! exit loop when we find the first line that is not a comment or a white line
+!    if(len_trim(string_read) == 0) cycle
+!    if(string_read(1:1) /= '#') exit
+!
+!  enddo
+!
+!! suppress trailing white spaces, if any
+!  string_read = string_read(1:len_trim(string_read))
+!
+!! suppress trailing comments, if any
+!  if(index(string_read,'#') > 0) string_read = string_read(1:index(string_read,'#')-1)
+!
+!! suppress leading junk (up to the first equal sign, included)
+!  index_equal_sign = index(string_read,'=')
+!  if(index_equal_sign <= 1 .or. index_equal_sign == len_trim(string_read)) stop 'incorrect syntax detected in Par_file'
+!  string_read = string_read(index_equal_sign + 1:len_trim(string_read))
+!
+!! suppress leading and trailing white spaces again, if any, after having suppressed the leading junk
+!  string_read = adjustl(string_read)
+!  string_read = string_read(1:len_trim(string_read))
+!
+!  end subroutine read_next_line
+!
+!!--------------------
+!
+!  subroutine open_parameter_file
+!
+!  include "constants.h"
+!
+!  open(unit=IIN,file='in_data_files/Par_file',status='old',action='read')
+!
+!  end subroutine open_parameter_file
+!
+!!--------------------
+!
+!  subroutine close_parameter_file
+!
+!  include "constants.h"
+!
+!  close(IIN)
+!
+!  end subroutine close_parameter_file
+!
+!!--------------------
+!
+!  integer function err_occurred()
+!
+!  err_occurred = 0
+!
+!  end function err_occurred
+!
+!!--------------------
+!
+!! dummy subroutine to avoid warnings about variable not used in other subroutines
+!  subroutine unused_string(s)
+!
+!  character(len=*) s
+!
+!  if (len(s) == 1) continue
+!
+!  end subroutine unused_string
+!

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/recompute_jacobian.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/recompute_jacobian.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/recompute_jacobian.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,158 +1,158 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! recompute 3D jacobian at a given point for a 8-node element
-
-  subroutine recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
-                   xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
-
-  implicit none
-
-  include "constants.h"
-
-  double precision x,y,z,xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  double precision xi,eta,gamma,jacobian
-
-! coordinates of the control points
-  double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
-
-! 3D shape functions and their derivatives at receiver
-  double precision shape3D(NGNOD)
-  double precision dershape3D(NDIM,NGNOD)
-
-  double precision xxi,yxi,zxi
-  double precision xeta,yeta,zeta
-  double precision xgamma,ygamma,zgamma
-  double precision ra1,ra2,rb1,rb2,rc1,rc2
-
-  integer ia
-
-! for 8-node element
-  double precision, parameter :: ONE_EIGHTH = 0.125d0
-
-! recompute jacobian for any (xi,eta,gamma) point, not necessarily a GLL point
-
-! check that the parameter file is correct
-  if(NGNOD /= 8) stop 'elements should have 8 control nodes'
-
-! ***
-! *** create the 3D shape functions and the Jacobian for an 8-node element
-! ***
-
-!--- case of an 8-node 3D element (Dhatt-Touzot p. 115)
-
-  ra1 = one + xi
-  ra2 = one - xi
-
-  rb1 = one + eta
-  rb2 = one - eta
-
-  rc1 = one + gamma
-  rc2 = one - gamma
-
-  shape3D(1) = ONE_EIGHTH*ra2*rb2*rc2
-  shape3D(2) = ONE_EIGHTH*ra1*rb2*rc2
-  shape3D(3) = ONE_EIGHTH*ra1*rb1*rc2
-  shape3D(4) = ONE_EIGHTH*ra2*rb1*rc2
-  shape3D(5) = ONE_EIGHTH*ra2*rb2*rc1
-  shape3D(6) = ONE_EIGHTH*ra1*rb2*rc1
-  shape3D(7) = ONE_EIGHTH*ra1*rb1*rc1
-  shape3D(8) = ONE_EIGHTH*ra2*rb1*rc1
-
-  dershape3D(1,1) = - ONE_EIGHTH*rb2*rc2
-  dershape3D(1,2) = ONE_EIGHTH*rb2*rc2
-  dershape3D(1,3) = ONE_EIGHTH*rb1*rc2
-  dershape3D(1,4) = - ONE_EIGHTH*rb1*rc2
-  dershape3D(1,5) = - ONE_EIGHTH*rb2*rc1
-  dershape3D(1,6) = ONE_EIGHTH*rb2*rc1
-  dershape3D(1,7) = ONE_EIGHTH*rb1*rc1
-  dershape3D(1,8) = - ONE_EIGHTH*rb1*rc1
-
-  dershape3D(2,1) = - ONE_EIGHTH*ra2*rc2
-  dershape3D(2,2) = - ONE_EIGHTH*ra1*rc2
-  dershape3D(2,3) = ONE_EIGHTH*ra1*rc2
-  dershape3D(2,4) = ONE_EIGHTH*ra2*rc2
-  dershape3D(2,5) = - ONE_EIGHTH*ra2*rc1
-  dershape3D(2,6) = - ONE_EIGHTH*ra1*rc1
-  dershape3D(2,7) = ONE_EIGHTH*ra1*rc1
-  dershape3D(2,8) = ONE_EIGHTH*ra2*rc1
-
-  dershape3D(3,1) = - ONE_EIGHTH*ra2*rb2
-  dershape3D(3,2) = - ONE_EIGHTH*ra1*rb2
-  dershape3D(3,3) = - ONE_EIGHTH*ra1*rb1
-  dershape3D(3,4) = - ONE_EIGHTH*ra2*rb1
-  dershape3D(3,5) = ONE_EIGHTH*ra2*rb2
-  dershape3D(3,6) = ONE_EIGHTH*ra1*rb2
-  dershape3D(3,7) = ONE_EIGHTH*ra1*rb1
-  dershape3D(3,8) = ONE_EIGHTH*ra2*rb1
-
-! compute coordinates and jacobian matrix
-  x=ZERO
-  y=ZERO
-  z=ZERO
-  xxi=ZERO
-  xeta=ZERO
-  xgamma=ZERO
-  yxi=ZERO
-  yeta=ZERO
-  ygamma=ZERO
-  zxi=ZERO
-  zeta=ZERO
-  zgamma=ZERO
-
-  do ia=1,NGNOD
-    x=x+shape3D(ia)*xelm(ia)
-    y=y+shape3D(ia)*yelm(ia)
-    z=z+shape3D(ia)*zelm(ia)
-
-    xxi=xxi+dershape3D(1,ia)*xelm(ia)
-    xeta=xeta+dershape3D(2,ia)*xelm(ia)
-    xgamma=xgamma+dershape3D(3,ia)*xelm(ia)
-    yxi=yxi+dershape3D(1,ia)*yelm(ia)
-    yeta=yeta+dershape3D(2,ia)*yelm(ia)
-    ygamma=ygamma+dershape3D(3,ia)*yelm(ia)
-    zxi=zxi+dershape3D(1,ia)*zelm(ia)
-    zeta=zeta+dershape3D(2,ia)*zelm(ia)
-    zgamma=zgamma+dershape3D(3,ia)*zelm(ia)
-  enddo
-
-  jacobian = xxi*(yeta*zgamma-ygamma*zeta) - xeta*(yxi*zgamma-ygamma*zxi) + xgamma*(yxi*zeta-yeta*zxi)
-
-  if(jacobian <= ZERO) stop '3D Jacobian undefined'
-
-! invert the relation (Fletcher p. 50 vol. 2)
-  xix=(yeta*zgamma-ygamma*zeta)/jacobian
-  xiy=(xgamma*zeta-xeta*zgamma)/jacobian
-  xiz=(xeta*ygamma-xgamma*yeta)/jacobian
-  etax=(ygamma*zxi-yxi*zgamma)/jacobian
-  etay=(xxi*zgamma-xgamma*zxi)/jacobian
-  etaz=(xgamma*yxi-xxi*ygamma)/jacobian
-  gammax=(yxi*zeta-yeta*zxi)/jacobian
-  gammay=(xeta*zxi-xxi*zeta)/jacobian
-  gammaz=(xxi*yeta-xeta*yxi)/jacobian
-
-  end subroutine recompute_jacobian
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! recompute 3D jacobian at a given point for a 8-node element
+
+  subroutine recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+                   xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+
+  implicit none
+
+  include "constants.h"
+
+  double precision x,y,z,xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+  double precision xi,eta,gamma,jacobian
+
+! coordinates of the control points
+  double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
+
+! 3D shape functions and their derivatives at receiver
+  double precision shape3D(NGNOD)
+  double precision dershape3D(NDIM,NGNOD)
+
+  double precision xxi,yxi,zxi
+  double precision xeta,yeta,zeta
+  double precision xgamma,ygamma,zgamma
+  double precision ra1,ra2,rb1,rb2,rc1,rc2
+
+  integer ia
+
+! for 8-node element
+  double precision, parameter :: ONE_EIGHTH = 0.125d0
+
+! recompute jacobian for any (xi,eta,gamma) point, not necessarily a GLL point
+
+! check that the parameter file is correct
+  if(NGNOD /= 8) stop 'elements should have 8 control nodes'
+
+! ***
+! *** create the 3D shape functions and the Jacobian for an 8-node element
+! ***
+
+!--- case of an 8-node 3D element (Dhatt-Touzot p. 115)
+
+  ra1 = one + xi
+  ra2 = one - xi
+
+  rb1 = one + eta
+  rb2 = one - eta
+
+  rc1 = one + gamma
+  rc2 = one - gamma
+
+  shape3D(1) = ONE_EIGHTH*ra2*rb2*rc2
+  shape3D(2) = ONE_EIGHTH*ra1*rb2*rc2
+  shape3D(3) = ONE_EIGHTH*ra1*rb1*rc2
+  shape3D(4) = ONE_EIGHTH*ra2*rb1*rc2
+  shape3D(5) = ONE_EIGHTH*ra2*rb2*rc1
+  shape3D(6) = ONE_EIGHTH*ra1*rb2*rc1
+  shape3D(7) = ONE_EIGHTH*ra1*rb1*rc1
+  shape3D(8) = ONE_EIGHTH*ra2*rb1*rc1
+
+  dershape3D(1,1) = - ONE_EIGHTH*rb2*rc2
+  dershape3D(1,2) = ONE_EIGHTH*rb2*rc2
+  dershape3D(1,3) = ONE_EIGHTH*rb1*rc2
+  dershape3D(1,4) = - ONE_EIGHTH*rb1*rc2
+  dershape3D(1,5) = - ONE_EIGHTH*rb2*rc1
+  dershape3D(1,6) = ONE_EIGHTH*rb2*rc1
+  dershape3D(1,7) = ONE_EIGHTH*rb1*rc1
+  dershape3D(1,8) = - ONE_EIGHTH*rb1*rc1
+
+  dershape3D(2,1) = - ONE_EIGHTH*ra2*rc2
+  dershape3D(2,2) = - ONE_EIGHTH*ra1*rc2
+  dershape3D(2,3) = ONE_EIGHTH*ra1*rc2
+  dershape3D(2,4) = ONE_EIGHTH*ra2*rc2
+  dershape3D(2,5) = - ONE_EIGHTH*ra2*rc1
+  dershape3D(2,6) = - ONE_EIGHTH*ra1*rc1
+  dershape3D(2,7) = ONE_EIGHTH*ra1*rc1
+  dershape3D(2,8) = ONE_EIGHTH*ra2*rc1
+
+  dershape3D(3,1) = - ONE_EIGHTH*ra2*rb2
+  dershape3D(3,2) = - ONE_EIGHTH*ra1*rb2
+  dershape3D(3,3) = - ONE_EIGHTH*ra1*rb1
+  dershape3D(3,4) = - ONE_EIGHTH*ra2*rb1
+  dershape3D(3,5) = ONE_EIGHTH*ra2*rb2
+  dershape3D(3,6) = ONE_EIGHTH*ra1*rb2
+  dershape3D(3,7) = ONE_EIGHTH*ra1*rb1
+  dershape3D(3,8) = ONE_EIGHTH*ra2*rb1
+
+! compute coordinates and jacobian matrix
+  x=ZERO
+  y=ZERO
+  z=ZERO
+  xxi=ZERO
+  xeta=ZERO
+  xgamma=ZERO
+  yxi=ZERO
+  yeta=ZERO
+  ygamma=ZERO
+  zxi=ZERO
+  zeta=ZERO
+  zgamma=ZERO
+
+  do ia=1,NGNOD
+    x=x+shape3D(ia)*xelm(ia)
+    y=y+shape3D(ia)*yelm(ia)
+    z=z+shape3D(ia)*zelm(ia)
+
+    xxi=xxi+dershape3D(1,ia)*xelm(ia)
+    xeta=xeta+dershape3D(2,ia)*xelm(ia)
+    xgamma=xgamma+dershape3D(3,ia)*xelm(ia)
+    yxi=yxi+dershape3D(1,ia)*yelm(ia)
+    yeta=yeta+dershape3D(2,ia)*yelm(ia)
+    ygamma=ygamma+dershape3D(3,ia)*yelm(ia)
+    zxi=zxi+dershape3D(1,ia)*zelm(ia)
+    zeta=zeta+dershape3D(2,ia)*zelm(ia)
+    zgamma=zgamma+dershape3D(3,ia)*zelm(ia)
+  enddo
+
+  jacobian = xxi*(yeta*zgamma-ygamma*zeta) - xeta*(yxi*zgamma-ygamma*zxi) + xgamma*(yxi*zeta-yeta*zxi)
+
+  if(jacobian <= ZERO) stop '3D Jacobian undefined'
+
+! invert the relation (Fletcher p. 50 vol. 2)
+  xix=(yeta*zgamma-ygamma*zeta)/jacobian
+  xiy=(xgamma*zeta-xeta*zgamma)/jacobian
+  xiz=(xeta*ygamma-xgamma*yeta)/jacobian
+  etax=(ygamma*zxi-yxi*zgamma)/jacobian
+  etay=(xxi*zgamma-xgamma*zxi)/jacobian
+  etaz=(xgamma*yxi-xxi*ygamma)/jacobian
+  gammax=(yxi*zeta-yeta*zxi)/jacobian
+  gammay=(xeta*zxi-xxi*zeta)/jacobian
+  gammaz=(xxi*yeta-xeta*yxi)/jacobian
+
+  end subroutine recompute_jacobian
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_header_file.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_header_file.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_header_file.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,145 +1,145 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! save header file OUTPUT_FILES/values_from_mesher.h
-
-  subroutine save_header_file(NSPEC_AB,NGLOB_AB,NPROC, &
-             ATTENUATION,ANISOTROPY,NSTEP,DT, &
-             SIMULATION_TYPE,static_memory_size,nfaces_surface_glob_ext_mesh)
-
-  implicit none
-
-  include "constants.h"
-
-  integer NSPEC_AB,NGLOB_AB,NPROC,NSTEP,SIMULATION_TYPE
-
-  logical ATTENUATION,ANISOTROPY
-
-  double precision DT
-
-  double precision :: static_memory_size
-
-  character(len=256) HEADER_FILE
-
-  integer :: nfaces_surface_glob_ext_mesh
-
-! copy number of elements and points in an include file for the solver
-  call get_value_string(HEADER_FILE, 'solver.HEADER_FILE', &
-       OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/values_from_mesher.h')
-
-  open(unit=IOUT,file=HEADER_FILE,status='unknown')
-  write(IOUT,*)
-  write(IOUT,*) '!'
-  write(IOUT,*) '! purely informative use'
-  write(IOUT,*) '!'
-  write(IOUT,*) '! mesh statistics:'
-  write(IOUT,*) '! ---------------'
-  write(IOUT,*) '!'
-  write(IOUT,*) '! note: '
-  write(IOUT,*) '!    the values are only approximate and differ for different processes'
-  write(IOUT,*) '!    because the CUBIT + SCOTCH mesh has'
-  write(IOUT,*) '!    a different number of mesh elements and points in each slice'
-  write(IOUT,*) '!'
-  write(IOUT,*) '! number of processors = ',NPROC
-  write(IOUT,*) '!'
-  write(IOUT,*) '! number of ES nodes = ',real(NPROC)/8.
-  write(IOUT,*) '! percentage of total 640 ES nodes = ',100.*(real(NPROC)/8.)/640.,' %'
-  write(IOUT,*) '! total memory available on these ES nodes (Gb) = ',16.*real(NPROC)/8.
-  write(IOUT,*) '!'
-  write(IOUT,*) '! min vector length = ',NGLLSQUARE
-  write(IOUT,*) '! min critical vector length = ',NGLLSQUARE_NDIM
-  write(IOUT,*) '!'
-  write(IOUT,*) '! master process: total points per AB slice = ',NGLOB_AB
-  write(IOUT,*) '! total elements per AB slice = (will be read in external file)'
-  write(IOUT,*) '! total points per AB slice = (will be read in external file)'
-  write(IOUT,*) '!'
-  write(IOUT,*) '! total for full mesh:'
-  write(IOUT,*) '! -------------------'
-  write(IOUT,*) '!'
-  write(IOUT,*) '!'
-  write(IOUT,*) '! number of time steps = ',NSTEP
-  write(IOUT,*) '!'
-  write(IOUT,*) '! time step = ',DT
-  write(IOUT,*) '!'
-  write(IOUT,*) '! attenuation uses:'
-  if(ATTENUATION) then
-    write(IOUT,*) '!  NSPEC_ATTENUATION = ', NSPEC_AB
-  else
-    write(IOUT,*) '!  NSPEC_ATTENUATION = ', 1
-  endif
-  write(IOUT,*) '! '
-  write(IOUT,*) '! anisotropy uses:'
-  if(ANISOTROPY) then
-    write(IOUT,*) '!  NSPEC_ANISO = ',NSPEC_AB
-  else
-    write(IOUT,*) '!  NSPEC_ANISO = ', 1
-  endif
-  write(IOUT,*) '! '
-  write(IOUT,*) '! adjoint uses:'
-  if (SIMULATION_TYPE == 3) then
-    write(IOUT,*) '!  NSPEC_ADJOINT = ', NSPEC_AB
-    write(IOUT,*) '!  NGLOB_ADJOINT = ', NGLOB_AB
-  else
-    write(IOUT,*) '!  NSPEC_ADJOINT = ', 1
-    write(IOUT,*) '!  NGLOB_ADJOINT = ', 1
-  endif
-  write(IOUT,*) '! '
-  write(IOUT,*) '! approximate least memory needed by the solver:'
-  write(IOUT,*) '! ----------------------------------------------'
-  write(IOUT,*) '!'
-  write(IOUT,*) '! size of static arrays for the biggest slice = ',static_memory_size/1048576.d0,' MB'
-  write(IOUT,*) '!                                             = ',static_memory_size/1073741824.d0,' GB'
-  write(IOUT,*) '!'
-  write(IOUT,*) '!   (should be below to 80% of 1.5 GB = 1.2 GB on pangu'
-  write(IOUT,*) '!    at Caltech, and below and typically equal to 85% of 2 GB = 1.7 GB'
-  write(IOUT,*) '!    on Marenostrum in Barcelona)'
-  write(IOUT,*) '!   (if significantly more, the job will not run by lack of memory)'
-  write(IOUT,*) '!   (if significantly less, you waste a significant amount of memory)'
-  write(IOUT,*) '!'
-  write(IOUT,*)
-  close(IOUT)
-
-! copy number of surface elements in an include file for the movies
-  if( nfaces_surface_glob_ext_mesh > 0 ) then
-
-    call get_value_string(HEADER_FILE, 'solver.HEADER_FILE', &
-         OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/surface_from_mesher.h')
-
-    open(unit=IOUT,file=HEADER_FILE,status='unknown')
-    write(IOUT,*) '!'
-    write(IOUT,*) '! this is the parameter file for static compilation for movie creation'
-    write(IOUT,*) '!'
-    write(IOUT,*) '! number of elements containing surface faces '
-    write(IOUT,*) '! ---------------'
-    write(IOUT,*)
-    write(IOUT,*) 'integer,parameter :: NSPEC_SURFACE_EXT_MESH = ',nfaces_surface_glob_ext_mesh
-    write(IOUT,*)
-    close(IOUT)
-
-  endif
-
-  end subroutine save_header_file
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! save header file OUTPUT_FILES/values_from_mesher.h
+
+  subroutine save_header_file(NSPEC_AB,NGLOB_AB,NPROC, &
+             ATTENUATION,ANISOTROPY,NSTEP,DT, &
+             SIMULATION_TYPE,static_memory_size,nfaces_surface_glob_ext_mesh)
+
+  implicit none
+
+  include "constants.h"
+
+  integer NSPEC_AB,NGLOB_AB,NPROC,NSTEP,SIMULATION_TYPE
+
+  logical ATTENUATION,ANISOTROPY
+
+  double precision DT
+
+  double precision :: static_memory_size
+
+  character(len=256) HEADER_FILE
+
+  integer :: nfaces_surface_glob_ext_mesh
+
+! copy number of elements and points in an include file for the solver
+  call get_value_string(HEADER_FILE, 'solver.HEADER_FILE', &
+       OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/values_from_mesher.h')
+
+  open(unit=IOUT,file=HEADER_FILE,status='unknown')
+  write(IOUT,*)
+  write(IOUT,*) '!'
+  write(IOUT,*) '! purely informative use'
+  write(IOUT,*) '!'
+  write(IOUT,*) '! mesh statistics:'
+  write(IOUT,*) '! ---------------'
+  write(IOUT,*) '!'
+  write(IOUT,*) '! note: '
+  write(IOUT,*) '!    the values are only approximate and differ for different processes'
+  write(IOUT,*) '!    because the CUBIT + SCOTCH mesh has'
+  write(IOUT,*) '!    a different number of mesh elements and points in each slice'
+  write(IOUT,*) '!'
+  write(IOUT,*) '! number of processors = ',NPROC
+  write(IOUT,*) '!'
+  write(IOUT,*) '! number of ES nodes = ',real(NPROC)/8.
+  write(IOUT,*) '! percentage of total 640 ES nodes = ',100.*(real(NPROC)/8.)/640.,' %'
+  write(IOUT,*) '! total memory available on these ES nodes (Gb) = ',16.*real(NPROC)/8.
+  write(IOUT,*) '!'
+  write(IOUT,*) '! min vector length = ',NGLLSQUARE
+  write(IOUT,*) '! min critical vector length = ',NGLLSQUARE_NDIM
+  write(IOUT,*) '!'
+  write(IOUT,*) '! master process: total points per AB slice = ',NGLOB_AB
+  write(IOUT,*) '! total elements per AB slice = (will be read in external file)'
+  write(IOUT,*) '! total points per AB slice = (will be read in external file)'
+  write(IOUT,*) '!'
+  write(IOUT,*) '! total for full mesh:'
+  write(IOUT,*) '! -------------------'
+  write(IOUT,*) '!'
+  write(IOUT,*) '!'
+  write(IOUT,*) '! number of time steps = ',NSTEP
+  write(IOUT,*) '!'
+  write(IOUT,*) '! time step = ',DT
+  write(IOUT,*) '!'
+  write(IOUT,*) '! attenuation uses:'
+  if(ATTENUATION) then
+    write(IOUT,*) '!  NSPEC_ATTENUATION = ', NSPEC_AB
+  else
+    write(IOUT,*) '!  NSPEC_ATTENUATION = ', 1
+  endif
+  write(IOUT,*) '! '
+  write(IOUT,*) '! anisotropy uses:'
+  if(ANISOTROPY) then
+    write(IOUT,*) '!  NSPEC_ANISO = ',NSPEC_AB
+  else
+    write(IOUT,*) '!  NSPEC_ANISO = ', 1
+  endif
+  write(IOUT,*) '! '
+  write(IOUT,*) '! adjoint uses:'
+  if (SIMULATION_TYPE == 3) then
+    write(IOUT,*) '!  NSPEC_ADJOINT = ', NSPEC_AB
+    write(IOUT,*) '!  NGLOB_ADJOINT = ', NGLOB_AB
+  else
+    write(IOUT,*) '!  NSPEC_ADJOINT = ', 1
+    write(IOUT,*) '!  NGLOB_ADJOINT = ', 1
+  endif
+  write(IOUT,*) '! '
+  write(IOUT,*) '! approximate least memory needed by the solver:'
+  write(IOUT,*) '! ----------------------------------------------'
+  write(IOUT,*) '!'
+  write(IOUT,*) '! size of static arrays for the biggest slice = ',static_memory_size/1048576.d0,' MB'
+  write(IOUT,*) '!                                             = ',static_memory_size/1073741824.d0,' GB'
+  write(IOUT,*) '!'
+  write(IOUT,*) '!   (should be below to 80% of 1.5 GB = 1.2 GB on pangu'
+  write(IOUT,*) '!    at Caltech, and below and typically equal to 85% of 2 GB = 1.7 GB'
+  write(IOUT,*) '!    on Marenostrum in Barcelona)'
+  write(IOUT,*) '!   (if significantly more, the job will not run by lack of memory)'
+  write(IOUT,*) '!   (if significantly less, you waste a significant amount of memory)'
+  write(IOUT,*) '!'
+  write(IOUT,*)
+  close(IOUT)
+
+! copy number of surface elements in an include file for the movies
+  if( nfaces_surface_glob_ext_mesh > 0 ) then
+
+    call get_value_string(HEADER_FILE, 'solver.HEADER_FILE', &
+         OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/surface_from_mesher.h')
+
+    open(unit=IOUT,file=HEADER_FILE,status='unknown')
+    write(IOUT,*) '!'
+    write(IOUT,*) '! this is the parameter file for static compilation for movie creation'
+    write(IOUT,*) '!'
+    write(IOUT,*) '! number of elements containing surface faces '
+    write(IOUT,*) '! ---------------'
+    write(IOUT,*)
+    write(IOUT,*) 'integer,parameter :: NSPEC_SURFACE_EXT_MESH = ',nfaces_surface_glob_ext_mesh
+    write(IOUT,*)
+    close(IOUT)
+
+  endif
+
+  end subroutine save_header_file
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/smooth_vol_data.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/smooth_vol_data.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/smooth_vol_data.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -200,7 +200,7 @@
     print*,"  element size   : ",element_size
     print*,"  smoothing sigma_h , sigma_v: ",sigma_h,sigma_v
     ! scalelength: approximately S ~ sigma * sqrt(8.0) for a gaussian smoothing
-    print*,"  smoothing scalelengths horizontal, vertical : ",sigma_h*sqrt(8.0),sigma_v*sqrt(8.0)    
+    print*,"  smoothing scalelengths horizontal, vertical : ",sigma_h*sqrt(8.0),sigma_v*sqrt(8.0)
     print*,"  in dir : ",trim(indir)
     print*,"  out dir: ",trim(outdir)
   endif

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/utm_geo.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/utm_geo.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/utm_geo.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,259 +1,259 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!=====================================================================
-!
-!  UTM (Universal Transverse Mercator) projection from the USGS
-!
-!=====================================================================
-
-  subroutine utm_geo(rlon,rlat,rx,ry,UTM_PROJECTION_ZONE,iway,SUPPRESS_UTM_PROJECTION)
-
-! convert geodetic longitude and latitude to UTM, and back
-! use iway = ILONGLAT2UTM for long/lat to UTM, IUTM2LONGLAT for UTM to lat/long
-! a list of UTM zones of the world is available at www.dmap.co.uk/utmworld.htm
-
-  implicit none
-
-  include "constants.h"
-
-!
-!-----CAMx v2.03
-!
-!     UTM_GEO performs UTM to geodetic (long/lat) translation, and back.
-!
-!     This is a Fortran version of the BASIC program "Transverse Mercator
-!     Conversion", Copyright 1986, Norman J. Berls (Stefan Musarra, 2/94)
-!     Based on algorithm taken from "Map Projections Used by the USGS"
-!     by John P. Snyder, Geological Survey Bulletin 1532, USDI.
-!
-!     Input/Output arguments:
-!
-!        rlon                  Longitude (deg, negative for West)
-!        rlat                  Latitude (deg)
-!        rx                    UTM easting (m)
-!        ry                    UTM northing (m)
-!        UTM_PROJECTION_ZONE  UTM zone
-!        iway                  Conversion type
-!                              ILONGLAT2UTM = geodetic to UTM
-!                              IUTM2LONGLAT = UTM to geodetic
-!
-
-  integer UTM_PROJECTION_ZONE,iway
-  double precision rx,ry,rlon,rlat
-  logical SUPPRESS_UTM_PROJECTION
-
-  double precision, parameter :: degrad=PI/180.d0, raddeg=180.d0/PI
-  double precision, parameter :: semimaj=6378206.4d0, semimin=6356583.8d0
-  double precision, parameter :: scfa=0.9996d0
-
-! some extracts about UTM:
-!
-! There are 60 longitudinal projection zones numbered 1 to 60 starting at 180°W.
-! Each of these zones is 6 degrees wide, apart from a few exceptions around Norway and Svalbard.
-! There are 20 latitudinal zones spanning the latitudes 80°S to 84°N and denoted
-! by the letters C to X, ommitting the letter O.
-! Each of these is 8 degrees south-north, apart from zone X which is 12 degrees south-north.
-!
-! To change the UTM zone and the hemisphere in which the
-! calculations are carried out, need to change the fortran code and recompile. The UTM zone is described
-! actually by the central meridian of that zone, i.e. the longitude at the midpoint of the zone, 3 degrees
-! from either zone boundary.
-! To change hemisphere need to change the "north" variable:
-!  - north=0 for northern hemisphere and
-!  - north=10000000 (10000km) for southern hemisphere. values must be in metres i.e. north=10000000.
-!
-! Note that the UTM grids are actually Mercators which
-! employ the standard UTM scale factor 0.9996 and set the
-! Easting Origin to 500,000;
-! the Northing origin in the southern
-! hemisphere is kept at 0 rather than set to 10,000,000
-! and this gives a uniform scale across the equator if the
-! normal convention of selecting the Base Latitude (origin)
-! at the equator (0 deg.) is followed.  Northings are
-! positive in the northern hemisphere and negative in the
-! southern hemisphere.
-  double precision, parameter :: north=0.d0
-  double precision, parameter :: east=500000.d0
-
-  double precision e2,e4,e6,ep2,xx,yy,dlat,dlon,zone,cm,cmr,delam
-  double precision f1,f2,f3,f4,rm,rn,t,c,a,e1,u,rlat1,dlat1,c1,t1,rn1,r1,d
-  double precision rx_save,ry_save,rlon_save,rlat_save
-
-  ! checks if conversion to utm has to be done
-  if(SUPPRESS_UTM_PROJECTION) then
-    if (iway == ILONGLAT2UTM) then
-      rx = rlon
-      ry = rlat
-    else
-      rlon = rx
-      rlat = ry
-    endif
-    return
-  endif
-
-! save original parameters
-  rlon_save = rlon
-  rlat_save = rlat
-  rx_save = rx
-  ry_save = ry
-
-  xx = 0.d0
-  yy = 0.d0
-  dlat = 0.d0
-  dlon = 0.d0
-
-! define parameters of reference ellipsoid
-  e2=1.0-(semimin/semimaj)**2.0
-  e4=e2*e2
-  e6=e2*e4
-  ep2=e2/(1.-e2)
-
-  if (iway == IUTM2LONGLAT) then
-    xx = rx
-    yy = ry
-  else
-    dlon = rlon
-    dlat = rlat
-  endif
-!
-!----- Set Zone parameters
-!
-  zone = dble(UTM_PROJECTION_ZONE)
-  ! sets central meridian for this zone
-  cm = zone*6.0 - 183.0
-  cmr = cm*degrad
-!
-!---- Lat/Lon to UTM conversion
-!
-  if (iway == ILONGLAT2UTM) then
-
-  rlon = degrad*dlon
-  rlat = degrad*dlat
-
-  delam = dlon - cm
-  if (delam < -180.) delam = delam + 360.
-  if (delam > 180.) delam = delam - 360.
-  delam = delam*degrad
-
-  f1 = (1. - e2/4. - 3.*e4/64. - 5.*e6/256)*rlat
-  f2 = 3.*e2/8. + 3.*e4/32. + 45.*e6/1024.
-  f2 = f2*sin(2.*rlat)
-  f3 = 15.*e4/256.*45.*e6/1024.
-  f3 = f3*sin(4.*rlat)
-  f4 = 35.*e6/3072.
-  f4 = f4*sin(6.*rlat)
-  rm = semimaj*(f1 - f2 + f3 - f4)
-  if (dlat == 90. .or. dlat == -90.) then
-    xx = 0.
-    yy = scfa*rm
-  else
-    rn = semimaj/sqrt(1. - e2*sin(rlat)**2)
-    t = tan(rlat)**2
-    c = ep2*cos(rlat)**2
-    a = cos(rlat)*delam
-
-    f1 = (1. - t + c)*a**3/6.
-    f2 = 5. - 18.*t + t**2 + 72.*c - 58.*ep2
-    f2 = f2*a**5/120.
-    xx = scfa*rn*(a + f1 + f2)
-    f1 = a**2/2.
-    f2 = 5. - t + 9.*c + 4.*c**2
-    f2 = f2*a**4/24.
-    f3 = 61. - 58.*t + t**2 + 600.*c - 330.*ep2
-    f3 = f3*a**6/720.
-    yy = scfa*(rm + rn*tan(rlat)*(f1 + f2 + f3))
-  endif
-  xx = xx + east
-  yy = yy + north
-
-!
-!---- UTM to Lat/Lon conversion
-!
-  else
-
-  xx = xx - east
-  yy = yy - north
-  e1 = sqrt(1. - e2)
-  e1 = (1. - e1)/(1. + e1)
-  rm = yy/scfa
-  u = 1. - e2/4. - 3.*e4/64. - 5.*e6/256.
-  u = rm/(semimaj*u)
-
-  f1 = 3.*e1/2. - 27.*e1**3./32.
-  f1 = f1*sin(2.*u)
-  f2 = 21.*e1**2/16. - 55.*e1**4/32.
-  f2 = f2*sin(4.*u)
-  f3 = 151.*e1**3./96.
-  f3 = f3*sin(6.*u)
-  rlat1 = u + f1 + f2 + f3
-  dlat1 = rlat1*raddeg
-  if (dlat1 >= 90. .or. dlat1 <= -90.) then
-    dlat1 = dmin1(dlat1,dble(90.) )
-    dlat1 = dmax1(dlat1,dble(-90.) )
-    dlon = cm
-  else
-    c1 = ep2*cos(rlat1)**2.
-    t1 = tan(rlat1)**2.
-    f1 = 1. - e2*sin(rlat1)**2.
-    rn1 = semimaj/sqrt(f1)
-    r1 = semimaj*(1. - e2)/sqrt(f1**3)
-    d = xx/(rn1*scfa)
-
-    f1 = rn1*tan(rlat1)/r1
-    f2 = d**2/2.
-    f3 = 5.*3.*t1 + 10.*c1 - 4.*c1**2 - 9.*ep2
-    f3 = f3*d**2*d**2/24.
-    f4 = 61. + 90.*t1 + 298.*c1 + 45.*t1**2. - 252.*ep2 - 3.*c1**2
-    f4 = f4*(d**2)**3./720.
-    rlat = rlat1 - f1*(f2 - f3 + f4)
-    dlat = rlat*raddeg
-
-    f1 = 1. + 2.*t1 + c1
-    f1 = f1*d**2*d/6.
-    f2 = 5. - 2.*c1 + 28.*t1 - 3.*c1**2 + 8.*ep2 + 24.*t1**2.
-    f2 = f2*(d**2)**2*d/120.
-    rlon = cmr + (d - f1 + f2)/cos(rlat1)
-    dlon = rlon*raddeg
-    if (dlon < -180.) dlon = dlon + 360.
-    if (dlon > 180.) dlon = dlon - 360.
-  endif
-  endif
-
-  if (iway == IUTM2LONGLAT) then
-    rlon = dlon
-    rlat = dlat
-    rx = rx_save
-    ry = ry_save
-  else
-    rx = xx
-    ry = yy
-    rlon = rlon_save
-    rlat = rlat_save
-  endif
-
-  end subroutine utm_geo
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!=====================================================================
+!
+!  UTM (Universal Transverse Mercator) projection from the USGS
+!
+!=====================================================================
+
+  subroutine utm_geo(rlon,rlat,rx,ry,UTM_PROJECTION_ZONE,iway,SUPPRESS_UTM_PROJECTION)
+
+! convert geodetic longitude and latitude to UTM, and back
+! use iway = ILONGLAT2UTM for long/lat to UTM, IUTM2LONGLAT for UTM to lat/long
+! a list of UTM zones of the world is available at www.dmap.co.uk/utmworld.htm
+
+  implicit none
+
+  include "constants.h"
+
+!
+!-----CAMx v2.03
+!
+!     UTM_GEO performs UTM to geodetic (long/lat) translation, and back.
+!
+!     This is a Fortran version of the BASIC program "Transverse Mercator
+!     Conversion", Copyright 1986, Norman J. Berls (Stefan Musarra, 2/94)
+!     Based on algorithm taken from "Map Projections Used by the USGS"
+!     by John P. Snyder, Geological Survey Bulletin 1532, USDI.
+!
+!     Input/Output arguments:
+!
+!        rlon                  Longitude (deg, negative for West)
+!        rlat                  Latitude (deg)
+!        rx                    UTM easting (m)
+!        ry                    UTM northing (m)
+!        UTM_PROJECTION_ZONE  UTM zone
+!        iway                  Conversion type
+!                              ILONGLAT2UTM = geodetic to UTM
+!                              IUTM2LONGLAT = UTM to geodetic
+!
+
+  integer UTM_PROJECTION_ZONE,iway
+  double precision rx,ry,rlon,rlat
+  logical SUPPRESS_UTM_PROJECTION
+
+  double precision, parameter :: degrad=PI/180.d0, raddeg=180.d0/PI
+  double precision, parameter :: semimaj=6378206.4d0, semimin=6356583.8d0
+  double precision, parameter :: scfa=0.9996d0
+
+! some extracts about UTM:
+!
+! There are 60 longitudinal projection zones numbered 1 to 60 starting at 180°W.
+! Each of these zones is 6 degrees wide, apart from a few exceptions around Norway and Svalbard.
+! There are 20 latitudinal zones spanning the latitudes 80°S to 84°N and denoted
+! by the letters C to X, ommitting the letter O.
+! Each of these is 8 degrees south-north, apart from zone X which is 12 degrees south-north.
+!
+! To change the UTM zone and the hemisphere in which the
+! calculations are carried out, need to change the fortran code and recompile. The UTM zone is described
+! actually by the central meridian of that zone, i.e. the longitude at the midpoint of the zone, 3 degrees
+! from either zone boundary.
+! To change hemisphere need to change the "north" variable:
+!  - north=0 for northern hemisphere and
+!  - north=10000000 (10000km) for southern hemisphere. values must be in metres i.e. north=10000000.
+!
+! Note that the UTM grids are actually Mercators which
+! employ the standard UTM scale factor 0.9996 and set the
+! Easting Origin to 500,000;
+! the Northing origin in the southern
+! hemisphere is kept at 0 rather than set to 10,000,000
+! and this gives a uniform scale across the equator if the
+! normal convention of selecting the Base Latitude (origin)
+! at the equator (0 deg.) is followed.  Northings are
+! positive in the northern hemisphere and negative in the
+! southern hemisphere.
+  double precision, parameter :: north=0.d0
+  double precision, parameter :: east=500000.d0
+
+  double precision e2,e4,e6,ep2,xx,yy,dlat,dlon,zone,cm,cmr,delam
+  double precision f1,f2,f3,f4,rm,rn,t,c,a,e1,u,rlat1,dlat1,c1,t1,rn1,r1,d
+  double precision rx_save,ry_save,rlon_save,rlat_save
+
+  ! checks if conversion to utm has to be done
+  if(SUPPRESS_UTM_PROJECTION) then
+    if (iway == ILONGLAT2UTM) then
+      rx = rlon
+      ry = rlat
+    else
+      rlon = rx
+      rlat = ry
+    endif
+    return
+  endif
+
+! save original parameters
+  rlon_save = rlon
+  rlat_save = rlat
+  rx_save = rx
+  ry_save = ry
+
+  xx = 0.d0
+  yy = 0.d0
+  dlat = 0.d0
+  dlon = 0.d0
+
+! define parameters of reference ellipsoid
+  e2=1.0-(semimin/semimaj)**2.0
+  e4=e2*e2
+  e6=e2*e4
+  ep2=e2/(1.-e2)
+
+  if (iway == IUTM2LONGLAT) then
+    xx = rx
+    yy = ry
+  else
+    dlon = rlon
+    dlat = rlat
+  endif
+!
+!----- Set Zone parameters
+!
+  zone = dble(UTM_PROJECTION_ZONE)
+  ! sets central meridian for this zone
+  cm = zone*6.0 - 183.0
+  cmr = cm*degrad
+!
+!---- Lat/Lon to UTM conversion
+!
+  if (iway == ILONGLAT2UTM) then
+
+  rlon = degrad*dlon
+  rlat = degrad*dlat
+
+  delam = dlon - cm
+  if (delam < -180.) delam = delam + 360.
+  if (delam > 180.) delam = delam - 360.
+  delam = delam*degrad
+
+  f1 = (1. - e2/4. - 3.*e4/64. - 5.*e6/256)*rlat
+  f2 = 3.*e2/8. + 3.*e4/32. + 45.*e6/1024.
+  f2 = f2*sin(2.*rlat)
+  f3 = 15.*e4/256.*45.*e6/1024.
+  f3 = f3*sin(4.*rlat)
+  f4 = 35.*e6/3072.
+  f4 = f4*sin(6.*rlat)
+  rm = semimaj*(f1 - f2 + f3 - f4)
+  if (dlat == 90. .or. dlat == -90.) then
+    xx = 0.
+    yy = scfa*rm
+  else
+    rn = semimaj/sqrt(1. - e2*sin(rlat)**2)
+    t = tan(rlat)**2
+    c = ep2*cos(rlat)**2
+    a = cos(rlat)*delam
+
+    f1 = (1. - t + c)*a**3/6.
+    f2 = 5. - 18.*t + t**2 + 72.*c - 58.*ep2
+    f2 = f2*a**5/120.
+    xx = scfa*rn*(a + f1 + f2)
+    f1 = a**2/2.
+    f2 = 5. - t + 9.*c + 4.*c**2
+    f2 = f2*a**4/24.
+    f3 = 61. - 58.*t + t**2 + 600.*c - 330.*ep2
+    f3 = f3*a**6/720.
+    yy = scfa*(rm + rn*tan(rlat)*(f1 + f2 + f3))
+  endif
+  xx = xx + east
+  yy = yy + north
+
+!
+!---- UTM to Lat/Lon conversion
+!
+  else
+
+  xx = xx - east
+  yy = yy - north
+  e1 = sqrt(1. - e2)
+  e1 = (1. - e1)/(1. + e1)
+  rm = yy/scfa
+  u = 1. - e2/4. - 3.*e4/64. - 5.*e6/256.
+  u = rm/(semimaj*u)
+
+  f1 = 3.*e1/2. - 27.*e1**3./32.
+  f1 = f1*sin(2.*u)
+  f2 = 21.*e1**2/16. - 55.*e1**4/32.
+  f2 = f2*sin(4.*u)
+  f3 = 151.*e1**3./96.
+  f3 = f3*sin(6.*u)
+  rlat1 = u + f1 + f2 + f3
+  dlat1 = rlat1*raddeg
+  if (dlat1 >= 90. .or. dlat1 <= -90.) then
+    dlat1 = dmin1(dlat1,dble(90.) )
+    dlat1 = dmax1(dlat1,dble(-90.) )
+    dlon = cm
+  else
+    c1 = ep2*cos(rlat1)**2.
+    t1 = tan(rlat1)**2.
+    f1 = 1. - e2*sin(rlat1)**2.
+    rn1 = semimaj/sqrt(f1)
+    r1 = semimaj*(1. - e2)/sqrt(f1**3)
+    d = xx/(rn1*scfa)
+
+    f1 = rn1*tan(rlat1)/r1
+    f2 = d**2/2.
+    f3 = 5.*3.*t1 + 10.*c1 - 4.*c1**2 - 9.*ep2
+    f3 = f3*d**2*d**2/24.
+    f4 = 61. + 90.*t1 + 298.*c1 + 45.*t1**2. - 252.*ep2 - 3.*c1**2
+    f4 = f4*(d**2)**3./720.
+    rlat = rlat1 - f1*(f2 - f3 + f4)
+    dlat = rlat*raddeg
+
+    f1 = 1. + 2.*t1 + c1
+    f1 = f1*d**2*d/6.
+    f2 = 5. - 2.*c1 + 28.*t1 - 3.*c1**2 + 8.*ep2 + 24.*t1**2.
+    f2 = f2*(d**2)**2*d/120.
+    rlon = cmr + (d - f1 + f2)/cos(rlat1)
+    dlon = rlon*raddeg
+    if (dlon < -180.) dlon = dlon + 360.
+    if (dlon > 180.) dlon = dlon - 360.
+  endif
+  endif
+
+  if (iway == IUTM2LONGLAT) then
+    rlon = dlon
+    rlat = dlat
+    rx = rx_save
+    ry = ry_save
+  else
+    rx = xx
+    ry = yy
+    rlon = rlon_save
+    rlat = rlat_save
+  endif
+
+  end subroutine utm_geo
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -128,17 +128,17 @@
   endif
 
   end subroutine assemble_MPI_vector_ext_mesh
-  
+
 !
 !-------------------------------------------------------------------------------------------------
 !
-  
+
   subroutine assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,array_val, &
-       buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
-       num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-       nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,my_neighbours_ext_mesh, &
-       request_send_vector_ext_mesh,request_recv_vector_ext_mesh &
-       )
+                                           buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+                                           num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                                           nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                                           my_neighbours_ext_mesh, &
+                                           request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
 
     ! sends data
 
@@ -195,17 +195,17 @@
     endif
 
   end subroutine assemble_MPI_vector_ext_mesh_s
-  
+
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine assemble_MPI_vector_ext_mesh_send_cuda(NPROC, &
-                          buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
-                          num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                          nibool_interfaces_ext_mesh, &
-                          my_neighbours_ext_mesh, &
-                          request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+  subroutine assemble_MPI_vector_send_cuda(NPROC, &
+                                          buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+                                          num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                                          nibool_interfaces_ext_mesh, &
+                                          my_neighbours_ext_mesh, &
+                                          request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
 
     ! sends data
     ! note: array to assemble already filled into buffer_send_vector_ext_mesh array
@@ -226,9 +226,10 @@
 
     integer iinterface
 
-    ! here we have to assemble all the contributions between partitions using MPI
+    ! note: preparation of the contribution between partitions using MPI
+    !          already done in transfer_boun_accel routine
 
-    ! assemble only if more than one partition
+    ! send only if more than one partition
     if(NPROC > 1) then
 
        ! send messages
@@ -249,7 +250,7 @@
 
     endif
 
-  end subroutine assemble_MPI_vector_ext_mesh_send_cuda
+  end subroutine assemble_MPI_vector_send_cuda
 
 !
 !-------------------------------------------------------------------------------------------------
@@ -315,11 +316,12 @@
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine assemble_MPI_vector_ext_mesh_write_cuda(NPROC,NGLOB_AB,array_val, Mesh_pointer, &
-            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-            request_send_vector_ext_mesh,request_recv_vector_ext_mesh,&
-            FORWARD_OR_ADJOINT )
+  subroutine assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,array_val, Mesh_pointer, &
+                                            buffer_recv_vector_ext_mesh, &
+                                            num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh, &
+                                            FORWARD_OR_ADJOINT )
 
 ! waits for data to receive and assembles
 
@@ -344,7 +346,7 @@
 
   integer iinterface ! ipoin
   integer FORWARD_OR_ADJOINT
-  
+
 ! here we have to assemble all the contributions between partitions using MPI
 
 ! assemble only if more than one partition
@@ -356,10 +358,11 @@
   enddo
 
 ! adding contributions of neighbours
-  call transfer_and_assemble_accel_to_device(Mesh_pointer, array_val, buffer_recv_vector_ext_mesh, &
-       num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh,&
-       ibool_interfaces_ext_mesh,FORWARD_OR_ADJOINT)
-  
+  call transfer_asmbl_accel_to_device(Mesh_pointer, array_val, buffer_recv_vector_ext_mesh, &
+                                    num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, &
+                                    nibool_interfaces_ext_mesh,&
+                                    ibool_interfaces_ext_mesh,FORWARD_OR_ADJOINT)
+
   ! This step is done via previous function transfer_and_assemble...
   ! do iinterface = 1, num_interfaces_ext_mesh
   !   do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
@@ -367,7 +370,7 @@
   !          array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
   !   enddo
   ! enddo
-  
+
 ! wait for communications completion (send)
   do iinterface = 1, num_interfaces_ext_mesh
     call wait_req(request_send_vector_ext_mesh(iinterface))
@@ -375,13 +378,13 @@
 
   endif
 
-  end subroutine assemble_MPI_vector_ext_mesh_write_cuda
+  end subroutine assemble_MPI_vector_write_cuda
 
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine assemble_MPI_scalar_ext_mesh_send_cuda(NPROC, &
+  subroutine assemble_MPI_scalar_send_cuda(NPROC, &
                         buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
                         num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
                         nibool_interfaces_ext_mesh, &
@@ -410,8 +413,8 @@
 ! sends only if more than one partition
   if(NPROC > 1) then
 
-    ! note: partition border copy into the buffer has already been done 
-    !          by routine transfer_boundary_potential_from_device() 
+    ! note: partition border copy into the buffer has already been done
+    !          by routine transfer_boun_pot_from_device()
 
     ! send messages
     do iinterface = 1, num_interfaces_ext_mesh
@@ -434,13 +437,13 @@
 
   endif
 
-  end subroutine assemble_MPI_scalar_ext_mesh_send_cuda
+  end subroutine assemble_MPI_scalar_send_cuda
 
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine assemble_MPI_scalar_ext_mesh_write_cuda(NPROC,NGLOB_AB,array_val, &
+  subroutine assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,array_val, &
                         Mesh_pointer, &
                         buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
                         max_nibool_interfaces_ext_mesh, &
@@ -457,7 +460,7 @@
   integer :: NPROC
   integer :: NGLOB_AB
   integer(kind=8) :: Mesh_pointer
-  
+
   integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
 
 ! array to assemble
@@ -484,7 +487,7 @@
     enddo
 
     ! adding contributions of neighbours
-    call transfer_and_assemble_potential_to_device(Mesh_pointer, array_val, buffer_recv_scalar_ext_mesh, &
+    call transfer_asmbl_pot_to_device(Mesh_pointer, array_val, buffer_recv_scalar_ext_mesh, &
                 num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh,&
                 ibool_interfaces_ext_mesh,FORWARD_OR_ADJOINT)
 
@@ -504,6 +507,6 @@
 
   endif
 
-  end subroutine assemble_MPI_scalar_ext_mesh_write_cuda
+  end subroutine assemble_MPI_scalar_write_cuda
 
-  
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/comp_source_time_function.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/comp_source_time_function.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/comp_source_time_function.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,95 +1,95 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  double precision function comp_source_time_function(t,hdur)
-
-  implicit none
-
-  include "constants.h"
-
-  double precision t,hdur
-
-  double precision, external :: netlib_specfun_erf
-
-  ! quasi Heaviside, small Gaussian moment-rate tensor with hdur
-  comp_source_time_function = 0.5d0*(1.0d0 + netlib_specfun_erf(t/hdur))
-
-  end function comp_source_time_function
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  double precision function comp_source_time_function_gauss(t,hdur)
-
-  implicit none
-
-  include "constants.h"
-
-  double precision :: t,hdur
-  double precision :: hdur_decay
-  double precision,parameter :: SOURCE_DECAY_STRONG = 2.0d0/SOURCE_DECAY_MIMIC_TRIANGLE
-
-  ! note: hdur given is hdur_gaussian = hdur/SOURCE_DECAY_MIMIC_TRIANGLE
-  !           and SOURCE_DECAY_MIMIC_TRIANGLE ~ 1.68
-  hdur_decay = hdur
-
-  ! this here uses a stronger gaussian decay rate (empirical value) to avoid non-zero onset times;
-  ! however, it should mimik a triangle source time function...
-  !hdur_decay = hdur  / SOURCE_DECAY_STRONG
-
-  ! note: a nonzero time to start the simulation with would lead to more high-frequency noise
-  !          due to the (spatial) discretization of the point source on the mesh
-
-  ! gaussian
-  comp_source_time_function_gauss = exp(-(t/hdur_decay)**2)/(sqrt(PI)*hdur_decay)
-
-  end function comp_source_time_function_gauss
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  double precision function comp_source_time_function_rickr(t,f0)
-
-  implicit none
-
-  include "constants.h"
-
-  double precision t,f0
-
-  ! ricker
-  comp_source_time_function_rickr = (1.d0 - 2.d0*PI*PI*f0*f0*t*t ) &
-                                    * exp( -PI*PI*f0*f0*t*t )
-
-  !!! another source time function they have called 'ricker' in some old papers,
-  !!! e.g., 'Finite-Frequency Kernels Based on Adjoint Methods' by Liu & Tromp, BSSA (2006)
-  !!! in order to benchmark those simulations, the following formula is needed.
-  ! comp_source_time_function_rickr = -2.d0*PI*PI*f0*f0*f0*t * exp(-PI*PI*f0*f0*t*t)
-
-  end function comp_source_time_function_rickr
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  double precision function comp_source_time_function(t,hdur)
+
+  implicit none
+
+  include "constants.h"
+
+  double precision t,hdur
+
+  double precision, external :: netlib_specfun_erf
+
+  ! quasi Heaviside, small Gaussian moment-rate tensor with hdur
+  comp_source_time_function = 0.5d0*(1.0d0 + netlib_specfun_erf(t/hdur))
+
+  end function comp_source_time_function
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  double precision function comp_source_time_function_gauss(t,hdur)
+
+  implicit none
+
+  include "constants.h"
+
+  double precision :: t,hdur
+  double precision :: hdur_decay
+  double precision,parameter :: SOURCE_DECAY_STRONG = 2.0d0/SOURCE_DECAY_MIMIC_TRIANGLE
+
+  ! note: hdur given is hdur_gaussian = hdur/SOURCE_DECAY_MIMIC_TRIANGLE
+  !           and SOURCE_DECAY_MIMIC_TRIANGLE ~ 1.68
+  hdur_decay = hdur
+
+  ! this here uses a stronger gaussian decay rate (empirical value) to avoid non-zero onset times;
+  ! however, it should mimik a triangle source time function...
+  !hdur_decay = hdur  / SOURCE_DECAY_STRONG
+
+  ! note: a nonzero time to start the simulation with would lead to more high-frequency noise
+  !          due to the (spatial) discretization of the point source on the mesh
+
+  ! gaussian
+  comp_source_time_function_gauss = exp(-(t/hdur_decay)**2)/(sqrt(PI)*hdur_decay)
+
+  end function comp_source_time_function_gauss
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  double precision function comp_source_time_function_rickr(t,f0)
+
+  implicit none
+
+  include "constants.h"
+
+  double precision t,f0
+
+  ! ricker
+  comp_source_time_function_rickr = (1.d0 - 2.d0*PI*PI*f0*f0*t*t ) &
+                                    * exp( -PI*PI*f0*f0*t*t )
+
+  !!! another source time function they have called 'ricker' in some old papers,
+  !!! e.g., 'Finite-Frequency Kernels Based on Adjoint Methods' by Liu & Tromp, BSSA (2006)
+  !!! in order to benchmark those simulations, the following formula is needed.
+  ! comp_source_time_function_rickr = -2.d0*PI*PI*f0*f0*f0*t * exp(-PI*PI*f0*f0*t*t)
+
+  end function comp_source_time_function_rickr
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -40,7 +40,8 @@
 
   use specfem_par,only: PRINT_SOURCE_TIME_FUNCTION,stf_used_total, &
                         xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,&
-                        station_name,network_name,adj_source_file,nrec_local,number_receiver_global
+                        station_name,network_name,adj_source_file,nrec_local,number_receiver_global, &
+                        nsources_local
   implicit none
 
   include "constants.h"
@@ -75,7 +76,7 @@
 !adjoint simulations
   integer:: SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT
   logical:: GPU_MODE
-  integer(kind=8) :: Mesh_pointer    
+  integer(kind=8) :: Mesh_pointer
   integer:: nrec
   integer,dimension(nrec) :: islice_selected_rec,ispec_selected_rec
   integer:: nadj_rec_local
@@ -92,7 +93,7 @@
   integer :: isource,iglob,ispec,i,j,k,ier
   integer :: irec_local,irec
   double precision, dimension(NSOURCES) :: stf_pre_compute
-  
+
 ! adjoint sources in SU format
   integer :: it_start,it_end
   real(kind=CUSTOM_REAL) :: adj_temp(NSTEP)
@@ -112,7 +113,7 @@
   endif
 
 ! forward simulations
-  if (SIMULATION_TYPE == 1) then
+  if (SIMULATION_TYPE == 1 .and. nsources_local > 0) then
 
 !way 2
     if(GPU_MODE) then
@@ -121,21 +122,21 @@
           if(USE_FORCE_POINT_SOURCE) then
             ! precomputes source time function factor
             stf_pre_compute(isource) = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr( &
-                  dble(it-1)*DT-t0-tshift_cmt(isource),hdur(isource))                
+                  dble(it-1)*DT-t0-tshift_cmt(isource),hdur(isource))
           else
             stf_pre_compute(isource) = comp_source_time_function_gauss( &
-                  dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))                      
+                  dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
           endif
         enddo
-        stf_used_total = stf_used_total + sum(stf_pre_compute(:))        
+        stf_used_total = stf_used_total + sum(stf_pre_compute(:))
         ! only implements SIMTYPE=1 and NOISE_TOM=0
         ! write(*,*) "fortran dt = ", dt
         ! change dt -> DT
-        call compute_add_sources_acoustic_cuda(Mesh_pointer, phase_is_inner, &
+        call compute_add_sources_ac_cuda(Mesh_pointer, phase_is_inner, &
                                               NSOURCES, SIMULATION_TYPE, &
                                               USE_FORCE_POINT_SOURCE, stf_pre_compute, myrank)
       endif
-      
+
     else ! .NOT. GPU_MODE
 
       ! adds acoustic sources
@@ -225,7 +226,7 @@
         endif ! myrank
 
       enddo ! NSOURCES
-    endif ! GPU_MODE    
+    endif ! GPU_MODE
   endif
 
 ! NOTE: adjoint sources and backward wavefield timing:
@@ -331,7 +332,7 @@
            enddo
            close(IIN_SU1)
         endif !if (.not. SU_FORMAT)
-        
+
         deallocate(adj_sourcearray)
 
       endif ! if(ibool_read_adj_arrays)
@@ -339,11 +340,11 @@
       if( it < NSTEP ) then
         ! receivers act as sources
         if( GPU_MODE) then
-          call add_sources_acoustic_sim_type_2_or_3_cuda(Mesh_pointer, adj_sourcearrays, &
+          call add_sources_ac_sim_2_or_3_cuda(Mesh_pointer, adj_sourcearrays, &
                  size(adj_sourcearrays), phase_is_inner, myrank, nrec, &
                  NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC),&
                  islice_selected_rec, nadj_rec_local, NTSTEP_BETWEEN_READ_ADJSRC)
-        else      
+        else
           irec_local = 0
           do irec = 1,nrec
             ! add the source (only if this proc carries the source)
@@ -353,14 +354,14 @@
               ! adds source array
               ispec = ispec_selected_rec(irec)
               if( ispec_is_acoustic(ispec) ) then
-              
+
                 ! checks if element is in phase_is_inner run
                 if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
                   do k = 1,NGLLZ
                     do j = 1,NGLLY
                       do i = 1,NGLLX
                         iglob = ibool(i,j,k,ispec)
-                        ! beware, for acoustic medium, a pressure source would be taking the negative 
+                        ! beware, for acoustic medium, a pressure source would be taking the negative
                         ! and divide by Kappa of the fluid;
                         ! this would have to be done when constructing the adjoint source.
                         !
@@ -387,31 +388,31 @@
 !           thus indexing is NSTEP - it , instead of NSTEP - it - 1
 
 ! adjoint simulations
-  if (SIMULATION_TYPE == 3) then
+  if (SIMULATION_TYPE == 3 .and. nsources_local > 0) then
 
     ! on GPU
     if(GPU_MODE) then
-      if( NSOURCES > 0 ) then    
+      if( NSOURCES > 0 ) then
         do isource = 1,NSOURCES
           if(USE_FORCE_POINT_SOURCE) then
             ! precomputes source time function factors
             stf_pre_compute(isource) = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr( &
-                  dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur(isource))          
+                  dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur(isource))
           else
             stf_pre_compute(isource) = comp_source_time_function_gauss( &
-                  dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))   
-          endif        
+                  dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+          endif
         enddo
         stf_used_total = stf_used_total + sum(stf_pre_compute(:))
-      
+
         ! only implements SIMTYPE=3
-        call compute_add_sources_acoustic_sim3_cuda(Mesh_pointer, phase_is_inner, &
+        call compute_add_sources_ac_s3_cuda(Mesh_pointer, phase_is_inner, &
                                     NSOURCES, SIMULATION_TYPE, &
                                     USE_FORCE_POINT_SOURCE, stf_pre_compute, myrank)
       endif
 
     else ! .NOT. GPU_MODE
-      
+
       ! adds acoustic sources
       do isource = 1,NSOURCES
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -40,13 +40,17 @@
                         xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,&
                         station_name,network_name,adj_source_file, &
                         LOCAL_PATH,wgllwgll_xy, &
-                        num_free_surface_faces,free_surface_ispec,free_surface_ijk,free_surface_jacobian2Dw, &
+                        num_free_surface_faces,free_surface_ispec, &
+                        free_surface_ijk,free_surface_jacobian2Dw, &
                         noise_sourcearray,irec_master_noise, &
-                        normal_x_noise,normal_y_noise,normal_z_noise,mask_noise,noise_surface_movie, &
-                        nrec_local,number_receiver_global
+                        normal_x_noise,normal_y_noise,normal_z_noise, &
+                        mask_noise,noise_surface_movie, &
+                        nrec_local,number_receiver_global, &
+                        nsources_local
 
   use specfem_par_movie,only: &
-                        store_val_ux_external_mesh,store_val_uy_external_mesh,store_val_uz_external_mesh
+                        store_val_ux_external_mesh,store_val_uy_external_mesh, &
+                        store_val_uz_external_mesh
 
   implicit none
 
@@ -95,7 +99,7 @@
   real(kind=CUSTOM_REAL),dimension(:,:,:,:,:),allocatable:: adj_sourcearray
   real(kind=CUSTOM_REAL) stf_used,stf_used_total_all,time_source
   ! for GPU_MODE
-  real(kind=SIZE_DOUBLE), dimension(NSOURCES) :: stf_pre_compute
+  double precision, dimension(NSOURCES) :: stf_pre_compute
   integer :: isource,iglob,i,j,k,ispec
   integer :: irec_local,irec, ier
 
@@ -109,7 +113,7 @@
   integer(kind=4) :: i4head(nheader/4)  ! 4-byte-integer
   real(kind=4)    :: r4head(nheader/4)  ! 4-byte-real
   equivalence (i2head,i4head,r4head)    ! share the same 240-byte-memory
-  double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY),hgammar(NGLLZ), hpgammar(NGLLZ)
+  double precision :: hxir(NGLLX),hpxir(NGLLX),hetar(NGLLY),hpetar(NGLLY),hgammar(NGLLZ),hpgammar(NGLLZ)
 
 ! plotting source time function
   if(PRINT_SOURCE_TIME_FUNCTION .and. .not. phase_is_inner ) then
@@ -118,89 +122,93 @@
   endif
 
   ! forward simulations
-  if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0) then
+  if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then
 
-     if(GPU_MODE) then
+    if(GPU_MODE) then
+      do isource = 1,NSOURCES
+        stf_pre_compute(isource) = comp_source_time_function( &
+                                        dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+      enddo
+      ! only implements SIMTYPE=1 and NOISE_TOM=0
+      ! write(*,*) "fortran dt = ", dt
+      ! change dt -> DT
+      call compute_add_sources_el_cuda(Mesh_pointer, &
+                                      !NSPEC_AB, NGLOB_AB,
+                                      phase_is_inner,NSOURCES, &
+                                      !it, DT, t0, &
+                                      !SIMULATION_TYPE, NSTEP, NOISE_TOMOGRAPHY,&
+                                      !USE_FORCE_POINT_SOURCE, &
+                                      stf_pre_compute, myrank)
 
-        do isource = 1,NSOURCES
-           stf_pre_compute(isource) = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian)
-        enddo
-        ! only implements SIMTYPE=1 and NOISE_TOM=0
-        ! write(*,*) "fortran dt = ", dt
-        ! change dt -> DT
-        call compute_add_sources_elastic_cuda(Mesh_pointer, NSPEC_AB, NGLOB_AB, phase_is_inner,&
-             NSOURCES, it, DT, t0, SIMULATION_TYPE, NSTEP, NOISE_TOMOGRAPHY,&
-             USE_FORCE_POINT_SOURCE, stf_pre_compute, myrank)
+    else ! .NOT. GPU_MODE
 
-     else ! .NOT. GPU_MODE
+      do isource = 1,NSOURCES
 
-        do isource = 1,NSOURCES
+         !   add the source (only if this proc carries the source)
+         if(myrank == islice_selected_source(isource)) then
 
-           !   add the source (only if this proc carries the source)
-           if(myrank == islice_selected_source(isource)) then
+            ispec = ispec_selected_source(isource)
 
-              ispec = ispec_selected_source(isource)
+            if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
 
-              if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+               if( ispec_is_elastic(ispec) ) then
 
-                 if( ispec_is_elastic(ispec) ) then
+                  if(USE_FORCE_POINT_SOURCE) then
 
-                    if(USE_FORCE_POINT_SOURCE) then
+                     ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+                     iglob = ibool(nint(xi_source(isource)), &
+                          nint(eta_source(isource)), &
+                          nint(gamma_source(isource)), &
+                          ispec_selected_source(isource))
 
-                       ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
-                       iglob = ibool(nint(xi_source(isource)), &
-                            nint(eta_source(isource)), &
-                            nint(gamma_source(isource)), &
-                            ispec_selected_source(isource))
+                     f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
 
-                       f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
+                     !if (it == 1 .and. myrank == 0) then
+                     !  write(IMAIN,*) 'using a source of dominant frequency ',f0
+                     !  write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+                     !  write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+                     !endif
 
-                       !if (it == 1 .and. myrank == 0) then
-                       !  write(IMAIN,*) 'using a source of dominant frequency ',f0
-                       !  write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
-                       !  write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
-                       !endif
+                     ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
+                     stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
 
-                       ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
-                       stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
+                     ! we use a force in a single direction along one of the components:
+                     !  x/y/z or E/N/Z-direction would correspond to 1/2/3 = COMPONENT_FORCE_SOURCE
+                     ! e.g. nu_source(:,3) here would be a source normal to the surface (z-direction).
+                     accel(:,iglob) = accel(:,iglob)  &
+                          + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
 
-                       ! we use a force in a single direction along one of the components:
-                       !  x/y/z or E/N/Z-direction would correspond to 1/2/3 = COMPONENT_FORCE_SOURCE
-                       ! e.g. nu_source(:,3) here would be a source normal to the surface (z-direction).
-                       accel(:,iglob) = accel(:,iglob)  &
-                            + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
+                  else
 
-                    else
+                     stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
 
-                       stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+                     !     distinguish between single and double precision for reals
+                     if(CUSTOM_REAL == SIZE_REAL) then
+                        stf_used = sngl(stf)
+                     else
+                        stf_used = stf
+                     endif
 
-                       !     distinguish between single and double precision for reals
-                       if(CUSTOM_REAL == SIZE_REAL) then
-                          stf_used = sngl(stf)
-                       else
-                          stf_used = stf
-                       endif
+                     !     add source array
+                     do k=1,NGLLZ
+                        do j=1,NGLLY
+                           do i=1,NGLLX
+                              iglob = ibool(i,j,k,ispec)
+                              accel(:,iglob) = accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
+                           enddo
+                        enddo
+                     enddo
 
-                       !     add source array
-                       do k=1,NGLLZ
-                          do j=1,NGLLY
-                             do i=1,NGLLX
-                                iglob = ibool(i,j,k,ispec)
-                                accel(:,iglob) = accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
-                             enddo
-                          enddo
-                       enddo
+                  endif ! USE_FORCE_POINT_SOURCE
 
-                    endif ! USE_FORCE_POINT_SOURCE
+                  stf_used_total = stf_used_total + stf_used
 
-                    stf_used_total = stf_used_total + stf_used
+               endif ! ispec_is_elastic
+            endif ! ispec_is_inner
+         endif ! myrank
 
-                 endif ! ispec_is_elastic
-              endif ! ispec_is_inner
-           endif ! myrank
-
-        enddo ! NSOURCES
-     endif ! GPU_MODE
+      enddo ! NSOURCES
+    endif ! GPU_MODE
   endif ! forward
 
 ! NOTE: adjoint sources and backward wavefield timing:
@@ -350,10 +358,10 @@
            enddo ! nrec
         else ! GPU_MODE == .true.
            call add_sources_sim_type_2_or_3(Mesh_pointer, adj_sourcearrays, &
-                size(adj_sourcearrays), ispec_is_inner,&
-                phase_is_inner, ispec_selected_rec,ibool,myrank, nrec, &
-                NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC),&
-                islice_selected_rec, nadj_rec_local, NTSTEP_BETWEEN_READ_ADJSRC)       
+                                          size(adj_sourcearrays), ispec_is_inner,&
+                                          phase_is_inner, ispec_selected_rec,ibool,myrank, nrec, &
+                                          NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC),&
+                                          islice_selected_rec, nadj_rec_local, NTSTEP_BETWEEN_READ_ADJSRC)
         endif ! GPU_MODE
      endif ! it
 
@@ -364,84 +372,87 @@
 !           thus indexing is NSTEP - it , instead of NSTEP - it - 1
 
 ! adjoint simulations
-  if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0) then
+  if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then
 
-     if(GPU_MODE) then
-        do isource = 1,NSOURCES
-           stf_pre_compute(isource) = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
-        enddo
-        call add_sourcearrays_adjoint_cuda(Mesh_pointer, USE_FORCE_POINT_SOURCE,&
-             stf_pre_compute, NSOURCES,phase_is_inner,myrank)
-     else ! .NOT. GPU_MODE
+    if(GPU_MODE) then
+      do isource = 1,NSOURCES
+        stf_pre_compute(isource) = comp_source_time_function( &
+                                          dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+      enddo
 
-        ! backward source reconstruction
-        do isource = 1,NSOURCES
+      call compute_add_sources_el_s3_cuda(Mesh_pointer, USE_FORCE_POINT_SOURCE,&
+                                          stf_pre_compute, NSOURCES,phase_is_inner,myrank)
 
-           ! add the source (only if this proc carries the source)
-           if(myrank == islice_selected_source(isource)) then
+    else ! .NOT. GPU_MODE
 
-              ispec = ispec_selected_source(isource)
+      ! backward source reconstruction
+      do isource = 1,NSOURCES
 
-              if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+         ! add the source (only if this proc carries the source)
+         if(myrank == islice_selected_source(isource)) then
 
-                 if( ispec_is_elastic(ispec) ) then
+            ispec = ispec_selected_source(isource)
 
-                    if(USE_FORCE_POINT_SOURCE) then
+            if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
 
-                       ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
-                       iglob = ibool(nint(xi_source(isource)), &
-                            nint(eta_source(isource)), &
-                            nint(gamma_source(isource)), &
-                            ispec_selected_source(isource))
+               if( ispec_is_elastic(ispec) ) then
 
-                       f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
+                  if(USE_FORCE_POINT_SOURCE) then
 
-                       !if (it == 1 .and. myrank == 0) then
-                       !   write(IMAIN,*) 'using a source of dominant frequency ',f0
-                       !   write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
-                       !   write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
-                       !endif
+                     ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+                     iglob = ibool(nint(xi_source(isource)), &
+                          nint(eta_source(isource)), &
+                          nint(gamma_source(isource)), &
+                          ispec_selected_source(isource))
 
-                       ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
-                       stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),f0)
+                     f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
 
-                       ! e.g. we use nu_source(:,3) here if we want a source normal to the surface.
-                       ! note: time step is now at NSTEP-it
-                       b_accel(:,iglob) = b_accel(:,iglob)  &
-                            + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
+                     !if (it == 1 .and. myrank == 0) then
+                     !   write(IMAIN,*) 'using a source of dominant frequency ',f0
+                     !   write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+                     !   write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+                     !endif
 
-                    else
+                     ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
+                     stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),f0)
 
-                       ! see note above: time step corresponds now to NSTEP-it
-                       ! (also compare to it-1 for forward simulation)
-                       stf = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+                     ! e.g. we use nu_source(:,3) here if we want a source normal to the surface.
+                     ! note: time step is now at NSTEP-it
+                     b_accel(:,iglob) = b_accel(:,iglob)  &
+                          + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
 
-                       ! distinguish between single and double precision for reals
-                       if(CUSTOM_REAL == SIZE_REAL) then
-                          stf_used = sngl(stf)
-                       else
-                          stf_used = stf
-                       endif
+                  else
 
-                       !  add source array
-                       do k=1,NGLLZ
-                          do j=1,NGLLY
-                             do i=1,NGLLX
-                                iglob = ibool(i,j,k,ispec_selected_source(isource))
-                                b_accel(:,iglob) = b_accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
-                             enddo
-                          enddo
-                       enddo
-                    endif ! USE_FORCE_POINT_SOURCE
+                     ! see note above: time step corresponds now to NSTEP-it
+                     ! (also compare to it-1 for forward simulation)
+                     stf = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
 
-                    stf_used_total = stf_used_total + stf_used
+                     ! distinguish between single and double precision for reals
+                     if(CUSTOM_REAL == SIZE_REAL) then
+                        stf_used = sngl(stf)
+                     else
+                        stf_used = stf
+                     endif
 
-                 endif ! elastic
-              endif ! phase_inner
-           endif ! myrank
+                     !  add source array
+                     do k=1,NGLLZ
+                        do j=1,NGLLY
+                           do i=1,NGLLX
+                              iglob = ibool(i,j,k,ispec_selected_source(isource))
+                              b_accel(:,iglob) = b_accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
+                           enddo
+                        enddo
+                     enddo
+                  endif ! USE_FORCE_POINT_SOURCE
 
-        enddo ! NSOURCES
-     endif ! GPU_MODE
+                  stf_used_total = stf_used_total + stf_used
+
+               endif ! elastic
+            endif ! phase_inner
+         endif ! myrank
+
+      enddo ! NSOURCES
+    endif ! GPU_MODE
   endif ! adjoint
 
   ! master prints out source time function to file
@@ -468,7 +479,7 @@
                it,irec_master_noise, &
                NSPEC_AB,NGLOB_AB)
        else ! GPU_MODE == .true.
-          call add_source_master_rec_noise_cuda(Mesh_pointer, myrank, it, irec_master_noise, islice_selected_rec)
+          call add_source_master_rec_noise_cu(Mesh_pointer, myrank, it, irec_master_noise, islice_selected_rec)
        endif
     elseif ( NOISE_TOMOGRAPHY == 2 ) then
        ! second step of noise tomography, i.e., read the surface movie saved at every timestep

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -81,20 +81,20 @@
                         num_free_surface_faces,ispec_is_acoustic)
   else
     ! on GPU
-    call acoustic_enforce_free_surface_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
+    call acoustic_enforce_free_surf_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
   endif
 
   if(PML) then
     ! enforces free surface on PML elements
 
-    ! note: 
-    ! PML routines are not implemented as CUDA kernels, we just transfer the fields 
+    ! note:
+    ! PML routines are not implemented as CUDA kernels, we just transfer the fields
     ! from the GPU to the CPU and vice versa
 
-    ! transfers potentials to the CPU      
-    if(GPU_MODE) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
-    
+    ! transfers potentials to the CPU
+    if(GPU_MODE) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
     call PML_acoustic_enforce_free_srfc(NSPEC_AB,NGLOB_AB, &
                         potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
                         ibool,free_surface_ijk,free_surface_ispec, &
@@ -106,8 +106,8 @@
                         chi3_dot_dot,chi4_dot_dot)
 
     ! transfers potentials back to GPU
-    if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
+    if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
   endif
 
 ! distinguishes two runs: for points on MPI interfaces, and points within the partitions
@@ -147,15 +147,16 @@
     else
       ! on GPU
       ! includes code for SIMULATION_TYPE==3
-      call compute_forces_acoustic_cuda(Mesh_pointer, iphase, nspec_outer_acoustic, nspec_inner_acoustic, &
-             SIMULATION_TYPE)
+      call compute_forces_acoustic_cuda(Mesh_pointer, iphase, &
+                                      nspec_outer_acoustic, nspec_inner_acoustic, &
+                                      SIMULATION_TYPE)
     endif
 
 
    if(PML) then
       ! transfers potentials to CPU
-      if(GPU_MODE) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
+      if(GPU_MODE) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
 
       call compute_forces_acoustic_PML(NSPEC_AB,NGLOB_AB, &
                         ibool,ispec_is_inner,phase_is_inner, &
@@ -178,8 +179,8 @@
                         chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
 
       ! transfers potentials back to GPU
-      if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
+      if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
 
     endif ! PML
 
@@ -197,9 +198,9 @@
                         chi1_dot,chi2_t,chi2_t_dot,chi3_dot,chi4_dot,&
                         chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
         ! transfers potentials back to GPU
-        if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
-                        
+        if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
       else
        ! Stacey boundary conditions
         call compute_stacey_acoustic(NSPEC_AB,NGLOB_AB, &
@@ -215,7 +216,7 @@
     endif
 
 ! elastic coupling
-    if(ELASTIC_SIMULATION ) then    
+    if(ELASTIC_SIMULATION ) then
       if( .NOT. GPU_MODE ) then
         call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
                         ibool,displ,potential_dot_dot_acoustic, &
@@ -236,9 +237,9 @@
       else
         ! on GPU
         if( num_coupling_ac_el_faces > 0 ) &
-          call compute_coupling_acoustic_el_cuda(Mesh_pointer,phase_is_inner, &
+          call compute_coupling_ac_el_cuda(Mesh_pointer,phase_is_inner, &
                                               num_coupling_ac_el_faces,SIMULATION_TYPE)
-      
+
       endif
     endif
 
@@ -264,7 +265,7 @@
 ! assemble all the contributions between slices using MPI
     if( phase_is_inner .eqv. .false. ) then
       ! sends potential_dot_dot_acoustic values to corresponding MPI interface neighbors (non-blocking)
-      if(.NOT. GPU_MODE) then      
+      if(.NOT. GPU_MODE) then
         call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
                         buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
                         num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
@@ -273,7 +274,7 @@
                         request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
       else
         ! on GPU
-        call transfer_boundary_potential_from_device(NGLOB_AB, Mesh_pointer, &
+        call transfer_boun_pot_from_device(NGLOB_AB, Mesh_pointer, &
                                             potential_dot_dot_acoustic, &
                                             buffer_send_scalar_ext_mesh, &
                                             num_interfaces_ext_mesh, &
@@ -281,14 +282,14 @@
                                             nibool_interfaces_ext_mesh, &
                                             ibool_interfaces_ext_mesh, &
                                             1) ! <-- 1 == fwd accel
-        call assemble_MPI_scalar_ext_mesh_send_cuda(NPROC, &
+        call assemble_MPI_scalar_send_cuda(NPROC, &
                         buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
                         num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
                         nibool_interfaces_ext_mesh,&
                         my_neighbours_ext_mesh, &
-                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)        
+                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
       endif
-                        
+
       ! adjoint simulations
       if( SIMULATION_TYPE == 3 ) then
         if(.NOT. GPU_MODE) then
@@ -300,7 +301,7 @@
                         b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
         else
           ! on GPU
-          call transfer_boundary_potential_from_device(NGLOB_AB, Mesh_pointer, &
+          call transfer_boun_pot_from_device(NGLOB_AB, Mesh_pointer, &
                                                   b_potential_dot_dot_acoustic, &
                                                   b_buffer_send_scalar_ext_mesh,&
                                                   num_interfaces_ext_mesh, &
@@ -308,21 +309,21 @@
                                                   nibool_interfaces_ext_mesh, &
                                                   ibool_interfaces_ext_mesh, &
                                                   3) ! <-- 3 == adjoint b_accel
-               
-          call assemble_MPI_scalar_ext_mesh_send_cuda(NPROC, &
+
+          call assemble_MPI_scalar_send_cuda(NPROC, &
                           b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, &
                           num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
                           nibool_interfaces_ext_mesh,&
                           my_neighbours_ext_mesh, &
                           b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
-          
+
         endif
       endif
-      
+
     else
-      
+
       ! waits for send/receive requests to be completed and assembles values
-      if(.NOT. GPU_MODE) then                          
+      if(.NOT. GPU_MODE) then
         call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
                         buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
                         max_nibool_interfaces_ext_mesh, &
@@ -330,18 +331,18 @@
                         request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
       else
         ! on GPU
-        call assemble_MPI_scalar_ext_mesh_write_cuda(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
+        call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
                         Mesh_pointer,&
                         buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
                         max_nibool_interfaces_ext_mesh, &
                         nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
                         request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
-                        1)      
+                        1)
       endif
-                        
+
       ! adjoint simulations
       if( SIMULATION_TYPE == 3 ) then
-        if(.NOT. GPU_MODE) then          
+        if(.NOT. GPU_MODE) then
           call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
                         b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
                         max_nibool_interfaces_ext_mesh, &
@@ -349,14 +350,14 @@
                         b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
         else
           ! on GPU
-          call assemble_MPI_scalar_ext_mesh_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, &
+          call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, &
                         Mesh_pointer, &
                         b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
                         max_nibool_interfaces_ext_mesh, &
                         nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
                         b_request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
-                        3)          
-        endif      
+                        3)
+        endif
       endif
     endif !phase_is_inner
 
@@ -376,9 +377,9 @@
 
 
   if(PML) then
-    ! note: no need to transfer fields between CPU and GPU; 
+    ! note: no need to transfer fields between CPU and GPU;
     !           PML arrays are all handled on the CPU
-      
+
     ! divides local contributions with mass term
     call PML_acoustic_mass_update(NSPEC_AB,NGLOB_AB,&
                         ispec_is_acoustic,rmass_acoustic,ibool,&
@@ -410,7 +411,7 @@
 ! corrector:
 !   updates the chi_dot term which requires chi_dot_dot(t+delta)
  if( .NOT. GPU_MODE ) then
-    ! corrector  
+    ! corrector
     potential_dot_acoustic(:) = potential_dot_acoustic(:) + deltatover2*potential_dot_dot_acoustic(:)
 
     ! adjoint simulations
@@ -418,14 +419,14 @@
       b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) + b_deltatover2*b_potential_dot_dot_acoustic(:)
   else
     ! on GPU
-    call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,SIMULATION_TYPE,b_deltatover2)    
+    call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,SIMULATION_TYPE,b_deltatover2)
   endif
 
   ! updates potential_dot_acoustic and potential_dot_dot_acoustic inside PML region for plotting seismograms/movies
-  if(PML) then 
+  if(PML) then
     ! transfers potentials to CPU
-    if(GPU_MODE) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
+    if(GPU_MODE) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
 
     call PML_acoustic_update_potentials(NGLOB_AB,NSPEC_AB, &
                         ibool,ispec_is_acoustic, &
@@ -439,9 +440,9 @@
                         chi1_dot,chi2_t_dot,chi3_dot,chi4_dot,&
                         chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
 
-    ! transfers potentials to GPU                      
-    if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
+    ! transfers potentials to GPU
+    if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
 
   endif
 
@@ -461,15 +462,15 @@
                         num_free_surface_faces,ispec_is_acoustic)
   else
     ! on GPU
-    call acoustic_enforce_free_surface_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)    
+    call acoustic_enforce_free_surf_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
   endif
 
 
   if(PML) then
     ! enforces free surface on PML elements
-    if( GPU_MODE ) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
-    
+    if( GPU_MODE ) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
     call PML_acoustic_enforce_free_srfc(NSPEC_AB,NGLOB_AB, &
                         potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
                         ibool,free_surface_ijk,free_surface_ispec, &
@@ -481,9 +482,9 @@
                         chi1_dot_dot,chi2_t_dot_dot,&
                         chi3_dot_dot,chi4_dot_dot)
 
-    if( GPU_MODE ) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
-  endif  
+    if( GPU_MODE ) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+  endif
 
 end subroutine compute_forces_acoustic
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -50,172 +50,19 @@
 
 
 ! elastic term
-    if(USE_DEVILLE_PRODUCTS) then
-       if (NGLLX == 5) then
-          if(.NOT. GPU_MODE) then
-             call compute_forces_elastic_Dev_5points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION, &
-                  one_minus_sum_beta,factor_common, &
-                  alphaval,betaval,gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  R_xx,R_yy,R_xy,R_xz,R_yz, &
-                  epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                  epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  dsdx_top,dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
+    if( .NOT. GPU_MODE ) then
+      if(USE_DEVILLE_PRODUCTS) then
+        ! uses Deville (2002) optimizations
+        call compute_forces_elastic_Dev_sim1(iphase)
 
-          else ! GPU_MODE==.true.
-             ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
-             call compute_forces_elastic_cuda(Mesh_pointer, iphase, nspec_outer_elastic, &
-                  nspec_inner_elastic,COMPUTE_AND_STORE_STRAIN,SIMULATION_TYPE,ATTENUATION)   
-          endif
-      else if (NGLLX == 6) then
-      call compute_forces_elastic_Dev_6points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        alphaval,betaval,gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        R_xx,R_yy,R_xy,R_xz,R_yz, &
-                        epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                        epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        dsdx_top,dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
+        ! adjoint simulations: backward/reconstructed wavefield
+        if( SIMULATION_TYPE == 3 ) &
+          call compute_forces_elastic_Dev_sim3(iphase)
 
-      else if (NGLLX == 7) then
-      call compute_forces_elastic_Dev_7points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+      else
+        ! no optimizations used
+        call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        alphaval,betaval,gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        R_xx,R_yy,R_xy,R_xz,R_yz, &
-                        epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                        epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        dsdx_top,dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
-
-      else if (NGLLX == 8) then
-      call compute_forces_elastic_Dev_8points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        alphaval,betaval,gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        R_xx,R_yy,R_xy,R_xz,R_yz, &
-                        epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                        epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        dsdx_top,dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
-
-      else if (NGLLX == 9) then
-      call compute_forces_elastic_Dev_9points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        alphaval,betaval,gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        R_xx,R_yy,R_xy,R_xz,R_yz, &
-                        epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                        epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        dsdx_top,dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
-
-      else 
-      call compute_forces_elastic_Dev_10points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        alphaval,betaval,gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        R_xx,R_yy,R_xy,R_xz,R_yz, &
-                        epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                        epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        dsdx_top,dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
-      endif
-    else
-      call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                         hprime_xx,hprime_yy,hprime_zz, &
                         hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
                         wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
@@ -239,177 +86,12 @@
                         ispec2D_moho_top,ispec2D_moho_bot, &
                         num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
                         phase_ispec_inner_elastic  )
-    endif
- 
-    ! adjoint simulations: backward/reconstructed wavefield
-    ! GPU_MODE for SIM_TYPE==3 contained above in compute_forces_elastic_cuda
-    if( SIMULATION_TYPE == 3 ) then
-      if(USE_DEVILLE_PRODUCTS) then
-      if (NGLLX == 5) then
-        call compute_forces_elastic_Dev_5points(iphase, NSPEC_AB,NGLOB_AB, &
-                        b_displ,b_accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        b_alphaval,b_betaval,b_gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                        b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                        b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        b_dsdx_top,b_dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
-      else if (NGLLX == 6) then
-        call compute_forces_elastic_Dev_6points(iphase, NSPEC_AB,NGLOB_AB, &
-                        b_displ,b_accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        b_alphaval,b_betaval,b_gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                        b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                        b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        b_dsdx_top,b_dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
 
-      else if (NGLLX == 7) then
-        call compute_forces_elastic_Dev_7points(iphase, NSPEC_AB,NGLOB_AB, &
+        ! adjoint simulations: backward/reconstructed wavefield
+        if( SIMULATION_TYPE == 3 ) &
+          call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,&
                         b_displ,b_accel, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        b_alphaval,b_betaval,b_gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                        b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                        b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        b_dsdx_top,b_dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
-
-      else if (NGLLX == 8) then
-        call compute_forces_elastic_Dev_8points(iphase, NSPEC_AB,NGLOB_AB, &
-                        b_displ,b_accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        b_alphaval,b_betaval,b_gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                        b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                        b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        b_dsdx_top,b_dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
-
-      else if (NGLLX == 9) then
-        call compute_forces_elastic_Dev_9points(iphase, NSPEC_AB,NGLOB_AB, &
-                        b_displ,b_accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        b_alphaval,b_betaval,b_gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                        b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                        b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        b_dsdx_top,b_dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
-
-      else 
-        call compute_forces_elastic_Dev_10points(iphase, NSPEC_AB,NGLOB_AB, &
-                        b_displ,b_accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION, &
-                        one_minus_sum_beta,factor_common, &
-                        b_alphaval,b_betaval,b_gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                        b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                        b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        b_dsdx_top,b_dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic )
-      endif
-      else
-        call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,&
-                        b_displ,b_accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                         hprime_xx,hprime_yy,hprime_zz, &
                         hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
                         wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
@@ -435,9 +117,17 @@
                         phase_ispec_inner_elastic  )
 
       endif
-    endif
 
+    else
+      ! on GPU
+      ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
+      call compute_forces_elastic_cuda(Mesh_pointer, iphase, &
+                                      nspec_outer_elastic, &
+                                      nspec_inner_elastic, &
+                                      SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,ATTENUATION)
+    endif ! GPU_MODE
 
+
 ! adds elastic absorbing boundary term to acceleration (Stacey conditions)
     if(ABSORBING_CONDITIONS) &
       call compute_stacey_elastic(NSPEC_AB,NGLOB_AB,accel, &
@@ -452,8 +142,8 @@
                         GPU_MODE,Mesh_pointer)
 
 ! acoustic coupling
-    if( ACOUSTIC_SIMULATION ) then      
-      if( .NOT. GPU_MODE ) then    
+    if( ACOUSTIC_SIMULATION ) then
+      if( .NOT. GPU_MODE ) then
         call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
                         ibool,accel,potential_dot_dot_acoustic, &
                         num_coupling_ac_el_faces, &
@@ -474,9 +164,9 @@
       else
         ! on GPU
         if( num_coupling_ac_el_faces > 0 ) &
-          call compute_coupling_elastic_ac_cuda(Mesh_pointer,phase_is_inner, &
+          call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, &
                                               num_coupling_ac_el_faces,SIMULATION_TYPE)
-      
+
       endif
     endif
 
@@ -495,8 +185,9 @@
                         ispec_is_elastic,SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
                         nrec,islice_selected_rec,ispec_selected_rec, &
                         nadj_rec_local,adj_sourcearrays,b_accel, &
-                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY,GPU_MODE, Mesh_pointer )
-    
+                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, &
+                        GPU_MODE, Mesh_pointer )
+
     ! assemble all the contributions between slices using MPI
     if( phase_is_inner .eqv. .false. ) then
        ! sends accel values to corresponding MPI interface neighbors
@@ -508,18 +199,18 @@
                my_neighbours_ext_mesh, &
                request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
        else ! GPU_MODE==1
-          call transfer_boundary_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, accel,&
-               buffer_send_vector_ext_mesh,&
-               num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
-               nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,1) ! <-- 1 == fwd accel
-          call assemble_MPI_vector_ext_mesh_send_cuda(NPROC, &
+          call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, accel,&
+                        buffer_send_vector_ext_mesh,&
+                        num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
+                        nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,1) ! <-- 1 == fwd accel
+          call assemble_MPI_vector_send_cuda(NPROC, &
                buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
                num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
                nibool_interfaces_ext_mesh,&
                my_neighbours_ext_mesh, &
                request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
        endif ! GPU_MODE
-       
+
        ! adjoint simulations
        if( SIMULATION_TYPE == 3 ) then
           if(.NOT. GPU_MODE) then
@@ -530,17 +221,17 @@
                   my_neighbours_ext_mesh, &
                   b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
           else ! GPU_MODE == 1
-             call transfer_boundary_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
-               b_buffer_send_vector_ext_mesh,&
-               num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
-               nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
-             call assemble_MPI_vector_ext_mesh_send_cuda(NPROC, &
+             call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
+                       b_buffer_send_vector_ext_mesh,&
+                       num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
+                       nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
+             call assemble_MPI_vector_send_cuda(NPROC, &
                   b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
                   num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
                   nibool_interfaces_ext_mesh,&
                   my_neighbours_ext_mesh, &
                   b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
-             
+
           endif ! GPU
        endif !adjoint
 
@@ -548,31 +239,31 @@
       ! waits for send/receive requests to be completed and assembles values
       if(.NOT. GPU_MODE) then
          call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_AB,accel, &
-              buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-              max_nibool_interfaces_ext_mesh, &
-              nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-              request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                            max_nibool_interfaces_ext_mesh, &
+                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
       else ! GPU_MODE == 1
-         call assemble_MPI_vector_ext_mesh_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
-              buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-              max_nibool_interfaces_ext_mesh, &
-              nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-              request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
+         call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
+                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                            max_nibool_interfaces_ext_mesh, &
+                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
       endif
       ! adjoint simulations
       if( SIMULATION_TYPE == 3 ) then
          if(.NOT. GPU_MODE) then
             call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_accel, &
-                 b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                 max_nibool_interfaces_ext_mesh, &
-                 nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                 b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
+                             b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                             max_nibool_interfaces_ext_mesh, &
+                             nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                             b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
          else ! GPU_MODE == 1
-            call assemble_MPI_vector_ext_mesh_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
-              b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-              max_nibool_interfaces_ext_mesh, &
-              nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-              b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
+            call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
+                              b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                              max_nibool_interfaces_ext_mesh, &
+                              nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                              b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
          endif
       endif !adjoint
 
@@ -585,7 +276,7 @@
     !! DK DK May 2009: For adjoint runs below (SIMULATION_TYPE == 3) they should be used as well.
 
  enddo
- 
+
  ! multiplies with inverse of mass matrix (note: rmass has been inverted already)
  if(.NOT. GPU_MODE) then
     accel(1,:) = accel(1,:)*rmass(:)
@@ -597,7 +288,7 @@
        b_accel(2,:) = b_accel(2,:)*rmass(:)
        b_accel(3,:) = b_accel(3,:)*rmass(:)
     endif !adjoint
- else ! GPU_MODE == 1    
+ else ! GPU_MODE == 1
     call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS)
  endif
 
@@ -636,8 +327,8 @@
      veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
      ! adjoint simulations
      if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
-  else ! GPU_MODE == 1    
-    if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2)  
+  else ! GPU_MODE == 1
+    if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2)
   endif
 
 
@@ -746,3 +437,385 @@
 
 end subroutine elastic_ocean_load
 
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! distributes routines according to chosen NGLLX in constants.h
+
+!daniel
+! note:
+! i put it here rather than in compute_forces_elastic_Dev.f90 because compiler complains that:
+! " The storage extent of the dummy argument exceeds that of the actual argument. "
+
+subroutine compute_forces_elastic_Dev_sim1(iphase)
+
+! forward simulations
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+
+  implicit none
+
+  integer,intent(in) :: iphase
+
+  select case(NGLLX)
+
+  case (5)
+    call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+            xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+            hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+            wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+            kappastore,mustore,jacobian,ibool, &
+            ATTENUATION, &
+            one_minus_sum_beta,factor_common, &
+            alphaval,betaval,gammaval, &
+            NSPEC_ATTENUATION_AB, &
+            R_xx,R_yy,R_xy,R_xz,R_yz, &
+            epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+            epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+            ANISOTROPY,NSPEC_ANISO, &
+            c11store,c12store,c13store,c14store,c15store,c16store,&
+            c22store,c23store,c24store,c25store,c26store,c33store,&
+            c34store,c35store,c36store,c44store,c45store,c46store,&
+            c55store,c56store,c66store, &
+            SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+            NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+            is_moho_top,is_moho_bot, &
+            dsdx_top,dsdx_bot, &
+            ispec2D_moho_top,ispec2D_moho_bot, &
+            num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+            phase_ispec_inner_elastic )
+
+  case (6)
+    call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                    kappastore,mustore,jacobian,ibool, &
+                    ATTENUATION, &
+                    one_minus_sum_beta,factor_common, &
+                    alphaval,betaval,gammaval, &
+                    NSPEC_ATTENUATION_AB, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                    c55store,c56store,c66store, &
+                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                    is_moho_top,is_moho_bot, &
+                    dsdx_top,dsdx_bot, &
+                    ispec2D_moho_top,ispec2D_moho_bot, &
+                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                    phase_ispec_inner_elastic )
+
+  case (7)
+    call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                    kappastore,mustore,jacobian,ibool, &
+                    ATTENUATION, &
+                    one_minus_sum_beta,factor_common, &
+                    alphaval,betaval,gammaval, &
+                    NSPEC_ATTENUATION_AB, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                    c55store,c56store,c66store, &
+                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                    is_moho_top,is_moho_bot, &
+                    dsdx_top,dsdx_bot, &
+                    ispec2D_moho_top,ispec2D_moho_bot, &
+                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                    phase_ispec_inner_elastic )
+
+  case (8)
+    call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                    kappastore,mustore,jacobian,ibool, &
+                    ATTENUATION, &
+                    one_minus_sum_beta,factor_common, &
+                    alphaval,betaval,gammaval, &
+                    NSPEC_ATTENUATION_AB, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                    c55store,c56store,c66store, &
+                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                    is_moho_top,is_moho_bot, &
+                    dsdx_top,dsdx_bot, &
+                    ispec2D_moho_top,ispec2D_moho_bot, &
+                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                    phase_ispec_inner_elastic )
+
+  case (9)
+    call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                    kappastore,mustore,jacobian,ibool, &
+                    ATTENUATION, &
+                    one_minus_sum_beta,factor_common, &
+                    alphaval,betaval,gammaval, &
+                    NSPEC_ATTENUATION_AB, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                    c55store,c56store,c66store, &
+                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                    is_moho_top,is_moho_bot, &
+                    dsdx_top,dsdx_bot, &
+                    ispec2D_moho_top,ispec2D_moho_bot, &
+                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                    phase_ispec_inner_elastic )
+
+  case (10)
+    call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                    kappastore,mustore,jacobian,ibool, &
+                    ATTENUATION, &
+                    one_minus_sum_beta,factor_common, &
+                    alphaval,betaval,gammaval, &
+                    NSPEC_ATTENUATION_AB, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                    c55store,c56store,c66store, &
+                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                    is_moho_top,is_moho_bot, &
+                    dsdx_top,dsdx_bot, &
+                    ispec2D_moho_top,ispec2D_moho_bot, &
+                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                    phase_ispec_inner_elastic )
+
+  case default
+
+    stop 'error no Deville routine available for chosen NGLLX'
+
+  end select
+
+end subroutine compute_forces_elastic_Dev_sim1
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+subroutine compute_forces_elastic_Dev_sim3(iphase)
+
+! uses backward/reconstructed displacement and acceleration arrays
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+
+  implicit none
+
+  integer,intent(in) :: iphase
+
+  select case(NGLLX)
+
+  case (5)
+    call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case (6)
+    call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case (7)
+    call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case (8)
+    call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case (9)
+    call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case (10)
+    call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case default
+
+    stop 'error no Deville routine available for chosen NGLLX'
+
+  end select
+
+
+end subroutine compute_forces_elastic_Dev_sim3
+
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -25,7 +25,8 @@
 !=====================================================================
 
 
-subroutine compute_forces_elastic_Dev_5points( iphase ,NSPEC_AB,NGLOB_AB, &
+
+subroutine compute_forces_elastic_Dev_5p( iphase ,NSPEC_AB,NGLOB_AB, &
                                     displ,accel, &
                                     xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                                     hprime_xx,hprime_xxT, &
@@ -72,7 +73,8 @@
         kappastore,mustore,jacobian
 
 ! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(NGLLX,5) :: hprime_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(5,NGLLX) :: hprime_xxT,hprimewgll_xx
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
   real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -117,15 +119,15 @@
   integer :: ispec2D_moho_top, ispec2D_moho_bot
 
 ! local parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+  real(kind=CUSTOM_REAL), dimension(5,5,5) :: dummyx_loc,dummyy_loc,dummyz_loc, &
     newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+  real(kind=CUSTOM_REAL), dimension(5,5,5) :: &
     tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
 
   ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_5points,E2_m1_m2_5points,E3_m1_m2_5points
+  real(kind=CUSTOM_REAL), dimension(5,25) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
+  real(kind=CUSTOM_REAL), dimension(5,25) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
+  real(kind=CUSTOM_REAL), dimension(5,25) :: E1_m1_m2_5points,E2_m1_m2_5points,E3_m1_m2_5points
 
   equivalence(dummyx_loc,B1_m1_m2_5points)
   equivalence(dummyy_loc,B2_m1_m2_5points)
@@ -137,11 +139,11 @@
   equivalence(newtempy1,E2_m1_m2_5points)
   equivalence(newtempz1,E3_m1_m2_5points)
 
-  real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+  real(kind=CUSTOM_REAL), dimension(25,5) :: &
     A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(25,5) :: &
     C1_mxm_m2_m1_5points,C2_mxm_m2_m1_5points,C3_mxm_m2_m1_5points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(25,5) :: &
     E1_mxm_m2_m1_5points,E2_mxm_m2_m1_5points,E3_mxm_m2_m1_5points
 
   equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
@@ -649,35 +651,13 @@
 
   enddo  ! spectral element loop
 
-end subroutine compute_forces_elastic_Dev_5points
-!=====================================================================
+end subroutine compute_forces_elastic_Dev_5p
+
 !
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
 !=====================================================================
+!
 
-
-subroutine compute_forces_elastic_Dev_6points( iphase ,NSPEC_AB,NGLOB_AB, &
+subroutine compute_forces_elastic_Dev_6p( iphase ,NSPEC_AB,NGLOB_AB, &
                                     displ,accel, &
                                     xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                                     hprime_xx,hprime_xxT, &
@@ -724,7 +704,8 @@
         kappastore,mustore,jacobian
 
 ! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(NGLLX,6) :: hprime_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(6,NGLLX) :: hprime_xxT,hprimewgll_xx
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
   real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -769,15 +750,15 @@
   integer :: ispec2D_moho_top, ispec2D_moho_bot
 
 ! local parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+  real(kind=CUSTOM_REAL), dimension(6,6,6) :: dummyx_loc,dummyy_loc,dummyz_loc, &
     newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+  real(kind=CUSTOM_REAL), dimension(6,6,6) :: &
     tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
 
   ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_6points,B2_m1_m2_6points,B3_m1_m2_6points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_6points,C2_m1_m2_6points,C3_m1_m2_6points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_6points,E2_m1_m2_6points,E3_m1_m2_6points
+  real(kind=CUSTOM_REAL), dimension(6,36) :: B1_m1_m2_6points,B2_m1_m2_6points,B3_m1_m2_6points
+  real(kind=CUSTOM_REAL), dimension(6,36) :: C1_m1_m2_6points,C2_m1_m2_6points,C3_m1_m2_6points
+  real(kind=CUSTOM_REAL), dimension(6,36) :: E1_m1_m2_6points,E2_m1_m2_6points,E3_m1_m2_6points
 
   equivalence(dummyx_loc,B1_m1_m2_6points)
   equivalence(dummyy_loc,B2_m1_m2_6points)
@@ -789,11 +770,11 @@
   equivalence(newtempy1,E2_m1_m2_6points)
   equivalence(newtempz1,E3_m1_m2_6points)
 
-  real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+  real(kind=CUSTOM_REAL), dimension(36,6) :: &
     A1_mxm_m2_m1_6points,A2_mxm_m2_m1_6points,A3_mxm_m2_m1_6points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(36,6) :: &
     C1_mxm_m2_m1_6points,C2_mxm_m2_m1_6points,C3_mxm_m2_m1_6points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(36,6) :: &
     E1_mxm_m2_m1_6points,E2_mxm_m2_m1_6points,E3_mxm_m2_m1_6points
 
   equivalence(dummyx_loc,A1_mxm_m2_m1_6points)
@@ -881,19 +862,19 @@
                                   hprime_xx(i,3)*B1_m1_m2_6points(3,j) + &
                                   hprime_xx(i,4)*B1_m1_m2_6points(4,j) + &
                                   hprime_xx(i,5)*B1_m1_m2_6points(5,j) + &
-                                  hprime_xx(i,6)*B1_m1_m2_6points(6,j) 
+                                  hprime_xx(i,6)*B1_m1_m2_6points(6,j)
             C2_m1_m2_6points(i,j) = hprime_xx(i,1)*B2_m1_m2_6points(1,j) + &
                                   hprime_xx(i,2)*B2_m1_m2_6points(2,j) + &
                                   hprime_xx(i,3)*B2_m1_m2_6points(3,j) + &
                                   hprime_xx(i,4)*B2_m1_m2_6points(4,j) + &
                                   hprime_xx(i,5)*B2_m1_m2_6points(5,j) + &
-                                  hprime_xx(i,6)*B2_m1_m2_6points(6,j) 
+                                  hprime_xx(i,6)*B2_m1_m2_6points(6,j)
             C3_m1_m2_6points(i,j) = hprime_xx(i,1)*B3_m1_m2_6points(1,j) + &
                                   hprime_xx(i,2)*B3_m1_m2_6points(2,j) + &
                                   hprime_xx(i,3)*B3_m1_m2_6points(3,j) + &
                                   hprime_xx(i,4)*B3_m1_m2_6points(4,j) + &
                                   hprime_xx(i,5)*B3_m1_m2_6points(5,j) + &
-                                  hprime_xx(i,6)*B3_m1_m2_6points(6,j) 
+                                  hprime_xx(i,6)*B3_m1_m2_6points(6,j)
           enddo
         enddo
 
@@ -908,19 +889,19 @@
                             dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
                             dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
                             dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyx_loc(i,6,k)*hprime_xxT(6,j) 
+                            dummyx_loc(i,6,k)*hprime_xxT(6,j)
               tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
                             dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
                             dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
                             dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
                             dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyy_loc(i,6,k)*hprime_xxT(6,j) 
+                            dummyy_loc(i,6,k)*hprime_xxT(6,j)
               tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
                             dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
                             dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
                             dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
                             dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyz_loc(i,6,k)*hprime_xxT(6,j) 
+                            dummyz_loc(i,6,k)*hprime_xxT(6,j)
             enddo
           enddo
         enddo
@@ -933,19 +914,19 @@
                                       A1_mxm_m2_m1_6points(i,3)*hprime_xxT(3,j) + &
                                       A1_mxm_m2_m1_6points(i,4)*hprime_xxT(4,j) + &
                                       A1_mxm_m2_m1_6points(i,5)*hprime_xxT(5,j) + &
-                                      A1_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j) 
+                                      A1_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
             C2_mxm_m2_m1_6points(i,j) = A2_mxm_m2_m1_6points(i,1)*hprime_xxT(1,j) + &
                                       A2_mxm_m2_m1_6points(i,2)*hprime_xxT(2,j) + &
                                       A2_mxm_m2_m1_6points(i,3)*hprime_xxT(3,j) + &
                                       A2_mxm_m2_m1_6points(i,4)*hprime_xxT(4,j) + &
                                       A2_mxm_m2_m1_6points(i,5)*hprime_xxT(5,j) + &
-                                      A2_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j) 
+                                      A2_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
             C3_mxm_m2_m1_6points(i,j) = A3_mxm_m2_m1_6points(i,1)*hprime_xxT(1,j) + &
                                       A3_mxm_m2_m1_6points(i,2)*hprime_xxT(2,j) + &
                                       A3_mxm_m2_m1_6points(i,3)*hprime_xxT(3,j) + &
                                       A3_mxm_m2_m1_6points(i,4)*hprime_xxT(4,j) + &
                                       A3_mxm_m2_m1_6points(i,5)*hprime_xxT(5,j) + &
-                                      A3_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j) 
+                                      A3_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
           enddo
         enddo
 
@@ -1176,19 +1157,19 @@
                                   hprimewgll_xxT(i,3)*C1_m1_m2_6points(3,j) + &
                                   hprimewgll_xxT(i,4)*C1_m1_m2_6points(4,j) + &
                                   hprimewgll_xxT(i,5)*C1_m1_m2_6points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C1_m1_m2_6points(6,j) 
+                                  hprimewgll_xxT(i,6)*C1_m1_m2_6points(6,j)
             E2_m1_m2_6points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_6points(1,j) + &
                                   hprimewgll_xxT(i,2)*C2_m1_m2_6points(2,j) + &
                                   hprimewgll_xxT(i,3)*C2_m1_m2_6points(3,j) + &
                                   hprimewgll_xxT(i,4)*C2_m1_m2_6points(4,j) + &
                                   hprimewgll_xxT(i,5)*C2_m1_m2_6points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C2_m1_m2_6points(6,j) 
+                                  hprimewgll_xxT(i,6)*C2_m1_m2_6points(6,j)
             E3_m1_m2_6points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_6points(1,j) + &
                                   hprimewgll_xxT(i,2)*C3_m1_m2_6points(2,j) + &
                                   hprimewgll_xxT(i,3)*C3_m1_m2_6points(3,j) + &
                                   hprimewgll_xxT(i,4)*C3_m1_m2_6points(4,j) + &
                                   hprimewgll_xxT(i,5)*C3_m1_m2_6points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C3_m1_m2_6points(6,j) 
+                                  hprimewgll_xxT(i,6)*C3_m1_m2_6points(6,j)
           enddo
         enddo
 
@@ -1203,19 +1184,19 @@
                                tempx2(i,3,k)*hprimewgll_xx(3,j) + &
                                tempx2(i,4,k)*hprimewgll_xx(4,j) + &
                                tempx2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempx2(i,6,k)*hprimewgll_xx(6,j) 
+                               tempx2(i,6,k)*hprimewgll_xx(6,j)
               newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
                                tempy2(i,2,k)*hprimewgll_xx(2,j) + &
                                tempy2(i,3,k)*hprimewgll_xx(3,j) + &
                                tempy2(i,4,k)*hprimewgll_xx(4,j) + &
                                tempy2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempy2(i,6,k)*hprimewgll_xx(6,j) 
+                               tempy2(i,6,k)*hprimewgll_xx(6,j)
               newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
                                tempz2(i,2,k)*hprimewgll_xx(2,j) + &
                                tempz2(i,3,k)*hprimewgll_xx(3,j) + &
                                tempz2(i,4,k)*hprimewgll_xx(4,j) + &
                                tempz2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempz2(i,6,k)*hprimewgll_xx(6,j) 
+                               tempz2(i,6,k)*hprimewgll_xx(6,j)
             enddo
           enddo
         enddo
@@ -1228,19 +1209,19 @@
                                       C1_mxm_m2_m1_6points(i,3)*hprimewgll_xx(3,j) + &
                                       C1_mxm_m2_m1_6points(i,4)*hprimewgll_xx(4,j) + &
                                       C1_mxm_m2_m1_6points(i,5)*hprimewgll_xx(5,j) + &
-                                      C1_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j) 
+                                      C1_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
             E2_mxm_m2_m1_6points(i,j) = C2_mxm_m2_m1_6points(i,1)*hprimewgll_xx(1,j) + &
                                       C2_mxm_m2_m1_6points(i,2)*hprimewgll_xx(2,j) + &
                                       C2_mxm_m2_m1_6points(i,3)*hprimewgll_xx(3,j) + &
                                       C2_mxm_m2_m1_6points(i,4)*hprimewgll_xx(4,j) + &
                                       C2_mxm_m2_m1_6points(i,5)*hprimewgll_xx(5,j) + &
-                                      C2_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j) 
+                                      C2_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
             E3_mxm_m2_m1_6points(i,j) = C3_mxm_m2_m1_6points(i,1)*hprimewgll_xx(1,j) + &
                                       C3_mxm_m2_m1_6points(i,2)*hprimewgll_xx(2,j) + &
                                       C3_mxm_m2_m1_6points(i,3)*hprimewgll_xx(3,j) + &
                                       C3_mxm_m2_m1_6points(i,4)*hprimewgll_xx(4,j) + &
                                       C3_mxm_m2_m1_6points(i,5)*hprimewgll_xx(5,j) + &
-                                      C3_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j) 
+                                      C3_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
           enddo
         enddo
 
@@ -1319,35 +1300,13 @@
 
   enddo  ! spectral element loop
 
-end subroutine compute_forces_elastic_Dev_6points
-!=====================================================================
+end subroutine compute_forces_elastic_Dev_6p
+
 !
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
 !=====================================================================
+!
 
-
-subroutine compute_forces_elastic_Dev_7points( iphase ,NSPEC_AB,NGLOB_AB, &
+subroutine compute_forces_elastic_Dev_7p( iphase ,NSPEC_AB,NGLOB_AB, &
                                     displ,accel, &
                                     xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                                     hprime_xx,hprime_xxT, &
@@ -1394,7 +1353,8 @@
         kappastore,mustore,jacobian
 
 ! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(NGLLX,7) :: hprime_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(7,NGLLX) :: hprime_xxT,hprimewgll_xx
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
   real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -1439,15 +1399,15 @@
   integer :: ispec2D_moho_top, ispec2D_moho_bot
 
 ! local parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+  real(kind=CUSTOM_REAL), dimension(7,7,7) :: dummyx_loc,dummyy_loc,dummyz_loc, &
     newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+  real(kind=CUSTOM_REAL), dimension(7,7,7) :: &
     tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
 
   ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_7points,B2_m1_m2_7points,B3_m1_m2_7points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_7points,C2_m1_m2_7points,C3_m1_m2_7points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_7points,E2_m1_m2_7points,E3_m1_m2_7points
+  real(kind=CUSTOM_REAL), dimension(7,49) :: B1_m1_m2_7points,B2_m1_m2_7points,B3_m1_m2_7points
+  real(kind=CUSTOM_REAL), dimension(7,49) :: C1_m1_m2_7points,C2_m1_m2_7points,C3_m1_m2_7points
+  real(kind=CUSTOM_REAL), dimension(7,49) :: E1_m1_m2_7points,E2_m1_m2_7points,E3_m1_m2_7points
 
   equivalence(dummyx_loc,B1_m1_m2_7points)
   equivalence(dummyy_loc,B2_m1_m2_7points)
@@ -1459,11 +1419,11 @@
   equivalence(newtempy1,E2_m1_m2_7points)
   equivalence(newtempz1,E3_m1_m2_7points)
 
-  real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+  real(kind=CUSTOM_REAL), dimension(49,7) :: &
     A1_mxm_m2_m1_7points,A2_mxm_m2_m1_7points,A3_mxm_m2_m1_7points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(49,7) :: &
     C1_mxm_m2_m1_7points,C2_mxm_m2_m1_7points,C3_mxm_m2_m1_7points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(49,7) :: &
     E1_mxm_m2_m1_7points,E2_mxm_m2_m1_7points,E3_mxm_m2_m1_7points
 
   equivalence(dummyx_loc,A1_mxm_m2_m1_7points)
@@ -1552,21 +1512,21 @@
                                   hprime_xx(i,4)*B1_m1_m2_7points(4,j) + &
                                   hprime_xx(i,5)*B1_m1_m2_7points(5,j) + &
                                   hprime_xx(i,6)*B1_m1_m2_7points(6,j) + &
-                                  hprime_xx(i,7)*B1_m1_m2_7points(7,j) 
+                                  hprime_xx(i,7)*B1_m1_m2_7points(7,j)
             C2_m1_m2_7points(i,j) = hprime_xx(i,1)*B2_m1_m2_7points(1,j) + &
                                   hprime_xx(i,2)*B2_m1_m2_7points(2,j) + &
                                   hprime_xx(i,3)*B2_m1_m2_7points(3,j) + &
                                   hprime_xx(i,4)*B2_m1_m2_7points(4,j) + &
                                   hprime_xx(i,5)*B2_m1_m2_7points(5,j) + &
                                   hprime_xx(i,6)*B2_m1_m2_7points(6,j) + &
-                                  hprime_xx(i,7)*B2_m1_m2_7points(7,j) 
+                                  hprime_xx(i,7)*B2_m1_m2_7points(7,j)
             C3_m1_m2_7points(i,j) = hprime_xx(i,1)*B3_m1_m2_7points(1,j) + &
                                   hprime_xx(i,2)*B3_m1_m2_7points(2,j) + &
                                   hprime_xx(i,3)*B3_m1_m2_7points(3,j) + &
                                   hprime_xx(i,4)*B3_m1_m2_7points(4,j) + &
                                   hprime_xx(i,5)*B3_m1_m2_7points(5,j) + &
                                   hprime_xx(i,6)*B3_m1_m2_7points(6,j) + &
-                                  hprime_xx(i,7)*B3_m1_m2_7points(7,j) 
+                                  hprime_xx(i,7)*B3_m1_m2_7points(7,j)
           enddo
         enddo
 
@@ -1582,21 +1542,21 @@
                             dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
                             dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
                             dummyx_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyx_loc(i,7,k)*hprime_xxT(7,j) 
+                            dummyx_loc(i,7,k)*hprime_xxT(7,j)
               tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
                             dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
                             dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
                             dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
                             dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
                             dummyy_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyy_loc(i,7,k)*hprime_xxT(7,j) 
+                            dummyy_loc(i,7,k)*hprime_xxT(7,j)
               tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
                             dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
                             dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
                             dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
                             dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
                             dummyz_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyz_loc(i,7,k)*hprime_xxT(7,j) 
+                            dummyz_loc(i,7,k)*hprime_xxT(7,j)
             enddo
           enddo
         enddo
@@ -1610,21 +1570,21 @@
                                       A1_mxm_m2_m1_7points(i,4)*hprime_xxT(4,j) + &
                                       A1_mxm_m2_m1_7points(i,5)*hprime_xxT(5,j) + &
                                       A1_mxm_m2_m1_7points(i,6)*hprime_xxT(6,j) + &
-                                      A1_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j) 
+                                      A1_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
             C2_mxm_m2_m1_7points(i,j) = A2_mxm_m2_m1_7points(i,1)*hprime_xxT(1,j) + &
                                       A2_mxm_m2_m1_7points(i,2)*hprime_xxT(2,j) + &
                                       A2_mxm_m2_m1_7points(i,3)*hprime_xxT(3,j) + &
                                       A2_mxm_m2_m1_7points(i,4)*hprime_xxT(4,j) + &
                                       A2_mxm_m2_m1_7points(i,5)*hprime_xxT(5,j) + &
                                       A2_mxm_m2_m1_7points(i,6)*hprime_xxT(6,j) + &
-                                      A2_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j) 
+                                      A2_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
             C3_mxm_m2_m1_7points(i,j) = A3_mxm_m2_m1_7points(i,1)*hprime_xxT(1,j) + &
                                       A3_mxm_m2_m1_7points(i,2)*hprime_xxT(2,j) + &
                                       A3_mxm_m2_m1_7points(i,3)*hprime_xxT(3,j) + &
                                       A3_mxm_m2_m1_7points(i,4)*hprime_xxT(4,j) + &
                                       A3_mxm_m2_m1_7points(i,5)*hprime_xxT(5,j) + &
                                       A3_mxm_m2_m1_7points(i,6)*hprime_xxT(6,j) + &
-                                      A3_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j) 
+                                      A3_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
           enddo
         enddo
 
@@ -1856,21 +1816,21 @@
                                   hprimewgll_xxT(i,4)*C1_m1_m2_7points(4,j) + &
                                   hprimewgll_xxT(i,5)*C1_m1_m2_7points(5,j) + &
                                   hprimewgll_xxT(i,6)*C1_m1_m2_7points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C1_m1_m2_7points(7,j) 
+                                  hprimewgll_xxT(i,7)*C1_m1_m2_7points(7,j)
             E2_m1_m2_7points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_7points(1,j) + &
                                   hprimewgll_xxT(i,2)*C2_m1_m2_7points(2,j) + &
                                   hprimewgll_xxT(i,3)*C2_m1_m2_7points(3,j) + &
                                   hprimewgll_xxT(i,4)*C2_m1_m2_7points(4,j) + &
                                   hprimewgll_xxT(i,5)*C2_m1_m2_7points(5,j) + &
                                   hprimewgll_xxT(i,6)*C2_m1_m2_7points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C2_m1_m2_7points(7,j) 
+                                  hprimewgll_xxT(i,7)*C2_m1_m2_7points(7,j)
             E3_m1_m2_7points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_7points(1,j) + &
                                   hprimewgll_xxT(i,2)*C3_m1_m2_7points(2,j) + &
                                   hprimewgll_xxT(i,3)*C3_m1_m2_7points(3,j) + &
                                   hprimewgll_xxT(i,4)*C3_m1_m2_7points(4,j) + &
                                   hprimewgll_xxT(i,5)*C3_m1_m2_7points(5,j) + &
                                   hprimewgll_xxT(i,6)*C3_m1_m2_7points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C3_m1_m2_7points(7,j) 
+                                  hprimewgll_xxT(i,7)*C3_m1_m2_7points(7,j)
           enddo
         enddo
 
@@ -1886,21 +1846,21 @@
                                tempx2(i,4,k)*hprimewgll_xx(4,j) + &
                                tempx2(i,5,k)*hprimewgll_xx(5,j) + &
                                tempx2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempx2(i,7,k)*hprimewgll_xx(7,j) 
+                               tempx2(i,7,k)*hprimewgll_xx(7,j)
               newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
                                tempy2(i,2,k)*hprimewgll_xx(2,j) + &
                                tempy2(i,3,k)*hprimewgll_xx(3,j) + &
                                tempy2(i,4,k)*hprimewgll_xx(4,j) + &
                                tempy2(i,5,k)*hprimewgll_xx(5,j) + &
                                tempy2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempy2(i,7,k)*hprimewgll_xx(7,j) 
+                               tempy2(i,7,k)*hprimewgll_xx(7,j)
               newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
                                tempz2(i,2,k)*hprimewgll_xx(2,j) + &
                                tempz2(i,3,k)*hprimewgll_xx(3,j) + &
                                tempz2(i,4,k)*hprimewgll_xx(4,j) + &
                                tempz2(i,5,k)*hprimewgll_xx(5,j) + &
                                tempz2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempz2(i,7,k)*hprimewgll_xx(7,j) 
+                               tempz2(i,7,k)*hprimewgll_xx(7,j)
             enddo
           enddo
         enddo
@@ -1914,21 +1874,21 @@
                                       C1_mxm_m2_m1_7points(i,4)*hprimewgll_xx(4,j) + &
                                       C1_mxm_m2_m1_7points(i,5)*hprimewgll_xx(5,j) + &
                                       C1_mxm_m2_m1_7points(i,6)*hprimewgll_xx(6,j) + &
-                                      C1_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j) 
+                                      C1_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
             E2_mxm_m2_m1_7points(i,j) = C2_mxm_m2_m1_7points(i,1)*hprimewgll_xx(1,j) + &
                                       C2_mxm_m2_m1_7points(i,2)*hprimewgll_xx(2,j) + &
                                       C2_mxm_m2_m1_7points(i,3)*hprimewgll_xx(3,j) + &
                                       C2_mxm_m2_m1_7points(i,4)*hprimewgll_xx(4,j) + &
                                       C2_mxm_m2_m1_7points(i,5)*hprimewgll_xx(5,j) + &
                                       C2_mxm_m2_m1_7points(i,6)*hprimewgll_xx(6,j) + &
-                                      C2_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j) 
+                                      C2_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
             E3_mxm_m2_m1_7points(i,j) = C3_mxm_m2_m1_7points(i,1)*hprimewgll_xx(1,j) + &
                                       C3_mxm_m2_m1_7points(i,2)*hprimewgll_xx(2,j) + &
                                       C3_mxm_m2_m1_7points(i,3)*hprimewgll_xx(3,j) + &
                                       C3_mxm_m2_m1_7points(i,4)*hprimewgll_xx(4,j) + &
                                       C3_mxm_m2_m1_7points(i,5)*hprimewgll_xx(5,j) + &
                                       C3_mxm_m2_m1_7points(i,6)*hprimewgll_xx(6,j) + &
-                                      C3_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j) 
+                                      C3_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
           enddo
         enddo
 
@@ -2007,35 +1967,13 @@
 
   enddo  ! spectral element loop
 
-end subroutine compute_forces_elastic_Dev_7points
-!=====================================================================
+end subroutine compute_forces_elastic_Dev_7p
+
 !
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
 !=====================================================================
+!
 
-
-subroutine compute_forces_elastic_Dev_8points( iphase ,NSPEC_AB,NGLOB_AB, &
+subroutine compute_forces_elastic_Dev_8p( iphase ,NSPEC_AB,NGLOB_AB, &
                                     displ,accel, &
                                     xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                                     hprime_xx,hprime_xxT, &
@@ -2082,7 +2020,8 @@
         kappastore,mustore,jacobian
 
 ! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(NGLLX,8) :: hprime_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(8,NGLLX) :: hprime_xxT,hprimewgll_xx
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
   real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -2127,15 +2066,15 @@
   integer :: ispec2D_moho_top, ispec2D_moho_bot
 
 ! local parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+  real(kind=CUSTOM_REAL), dimension(8,8,8) :: dummyx_loc,dummyy_loc,dummyz_loc, &
     newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+  real(kind=CUSTOM_REAL), dimension(8,8,8) :: &
     tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
 
   ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_8points,B2_m1_m2_8points,B3_m1_m2_8points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_8points,C2_m1_m2_8points,C3_m1_m2_8points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_8points,E2_m1_m2_8points,E3_m1_m2_8points
+  real(kind=CUSTOM_REAL), dimension(8,64) :: B1_m1_m2_8points,B2_m1_m2_8points,B3_m1_m2_8points
+  real(kind=CUSTOM_REAL), dimension(8,64) :: C1_m1_m2_8points,C2_m1_m2_8points,C3_m1_m2_8points
+  real(kind=CUSTOM_REAL), dimension(8,64) :: E1_m1_m2_8points,E2_m1_m2_8points,E3_m1_m2_8points
 
   equivalence(dummyx_loc,B1_m1_m2_8points)
   equivalence(dummyy_loc,B2_m1_m2_8points)
@@ -2147,11 +2086,11 @@
   equivalence(newtempy1,E2_m1_m2_8points)
   equivalence(newtempz1,E3_m1_m2_8points)
 
-  real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+  real(kind=CUSTOM_REAL), dimension(64,8) :: &
     A1_mxm_m2_m1_8points,A2_mxm_m2_m1_8points,A3_mxm_m2_m1_8points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(64,8) :: &
     C1_mxm_m2_m1_8points,C2_mxm_m2_m1_8points,C3_mxm_m2_m1_8points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(64,8) :: &
     E1_mxm_m2_m1_8points,E2_mxm_m2_m1_8points,E3_mxm_m2_m1_8points
 
   equivalence(dummyx_loc,A1_mxm_m2_m1_8points)
@@ -2241,7 +2180,7 @@
                                   hprime_xx(i,5)*B1_m1_m2_8points(5,j) + &
                                   hprime_xx(i,6)*B1_m1_m2_8points(6,j) + &
                                   hprime_xx(i,7)*B1_m1_m2_8points(7,j) + &
-                                  hprime_xx(i,8)*B1_m1_m2_8points(8,j) 
+                                  hprime_xx(i,8)*B1_m1_m2_8points(8,j)
             C2_m1_m2_8points(i,j) = hprime_xx(i,1)*B2_m1_m2_8points(1,j) + &
                                   hprime_xx(i,2)*B2_m1_m2_8points(2,j) + &
                                   hprime_xx(i,3)*B2_m1_m2_8points(3,j) + &
@@ -2249,7 +2188,7 @@
                                   hprime_xx(i,5)*B2_m1_m2_8points(5,j) + &
                                   hprime_xx(i,6)*B2_m1_m2_8points(6,j) + &
                                   hprime_xx(i,7)*B2_m1_m2_8points(7,j) + &
-                                  hprime_xx(i,8)*B2_m1_m2_8points(8,j) 
+                                  hprime_xx(i,8)*B2_m1_m2_8points(8,j)
             C3_m1_m2_8points(i,j) = hprime_xx(i,1)*B3_m1_m2_8points(1,j) + &
                                   hprime_xx(i,2)*B3_m1_m2_8points(2,j) + &
                                   hprime_xx(i,3)*B3_m1_m2_8points(3,j) + &
@@ -2257,7 +2196,7 @@
                                   hprime_xx(i,5)*B3_m1_m2_8points(5,j) + &
                                   hprime_xx(i,6)*B3_m1_m2_8points(6,j) + &
                                   hprime_xx(i,7)*B3_m1_m2_8points(7,j) + &
-                                  hprime_xx(i,8)*B3_m1_m2_8points(8,j) 
+                                  hprime_xx(i,8)*B3_m1_m2_8points(8,j)
           enddo
         enddo
 
@@ -2274,7 +2213,7 @@
                             dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
                             dummyx_loc(i,6,k)*hprime_xxT(6,j) + &
                             dummyx_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyx_loc(i,8,k)*hprime_xxT(8,j) 
+                            dummyx_loc(i,8,k)*hprime_xxT(8,j)
               tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
                             dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
                             dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
@@ -2282,7 +2221,7 @@
                             dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
                             dummyy_loc(i,6,k)*hprime_xxT(6,j) + &
                             dummyy_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyy_loc(i,8,k)*hprime_xxT(8,j) 
+                            dummyy_loc(i,8,k)*hprime_xxT(8,j)
               tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
                             dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
                             dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
@@ -2290,7 +2229,7 @@
                             dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
                             dummyz_loc(i,6,k)*hprime_xxT(6,j) + &
                             dummyz_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyz_loc(i,8,k)*hprime_xxT(8,j) 
+                            dummyz_loc(i,8,k)*hprime_xxT(8,j)
             enddo
           enddo
         enddo
@@ -2305,7 +2244,7 @@
                                       A1_mxm_m2_m1_8points(i,5)*hprime_xxT(5,j) + &
                                       A1_mxm_m2_m1_8points(i,6)*hprime_xxT(6,j) + &
                                       A1_mxm_m2_m1_8points(i,7)*hprime_xxT(7,j) + &
-                                      A1_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j) 
+                                      A1_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
             C2_mxm_m2_m1_8points(i,j) = A2_mxm_m2_m1_8points(i,1)*hprime_xxT(1,j) + &
                                       A2_mxm_m2_m1_8points(i,2)*hprime_xxT(2,j) + &
                                       A2_mxm_m2_m1_8points(i,3)*hprime_xxT(3,j) + &
@@ -2313,7 +2252,7 @@
                                       A2_mxm_m2_m1_8points(i,5)*hprime_xxT(5,j) + &
                                       A2_mxm_m2_m1_8points(i,6)*hprime_xxT(6,j) + &
                                       A2_mxm_m2_m1_8points(i,7)*hprime_xxT(7,j) + &
-                                      A2_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j) 
+                                      A2_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
             C3_mxm_m2_m1_8points(i,j) = A3_mxm_m2_m1_8points(i,1)*hprime_xxT(1,j) + &
                                       A3_mxm_m2_m1_8points(i,2)*hprime_xxT(2,j) + &
                                       A3_mxm_m2_m1_8points(i,3)*hprime_xxT(3,j) + &
@@ -2321,7 +2260,7 @@
                                       A3_mxm_m2_m1_8points(i,5)*hprime_xxT(5,j) + &
                                       A3_mxm_m2_m1_8points(i,6)*hprime_xxT(6,j) + &
                                       A3_mxm_m2_m1_8points(i,7)*hprime_xxT(7,j) + &
-                                      A3_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j) 
+                                      A3_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
           enddo
         enddo
 
@@ -2554,7 +2493,7 @@
                                   hprimewgll_xxT(i,5)*C1_m1_m2_8points(5,j) + &
                                   hprimewgll_xxT(i,6)*C1_m1_m2_8points(6,j) + &
                                   hprimewgll_xxT(i,7)*C1_m1_m2_8points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C1_m1_m2_8points(8,j) 
+                                  hprimewgll_xxT(i,8)*C1_m1_m2_8points(8,j)
             E2_m1_m2_8points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_8points(1,j) + &
                                   hprimewgll_xxT(i,2)*C2_m1_m2_8points(2,j) + &
                                   hprimewgll_xxT(i,3)*C2_m1_m2_8points(3,j) + &
@@ -2562,7 +2501,7 @@
                                   hprimewgll_xxT(i,5)*C2_m1_m2_8points(5,j) + &
                                   hprimewgll_xxT(i,6)*C2_m1_m2_8points(6,j) + &
                                   hprimewgll_xxT(i,7)*C2_m1_m2_8points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C2_m1_m2_8points(8,j) 
+                                  hprimewgll_xxT(i,8)*C2_m1_m2_8points(8,j)
             E3_m1_m2_8points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_8points(1,j) + &
                                   hprimewgll_xxT(i,2)*C3_m1_m2_8points(2,j) + &
                                   hprimewgll_xxT(i,3)*C3_m1_m2_8points(3,j) + &
@@ -2570,7 +2509,7 @@
                                   hprimewgll_xxT(i,5)*C3_m1_m2_8points(5,j) + &
                                   hprimewgll_xxT(i,6)*C3_m1_m2_8points(6,j) + &
                                   hprimewgll_xxT(i,7)*C3_m1_m2_8points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C3_m1_m2_8points(8,j) 
+                                  hprimewgll_xxT(i,8)*C3_m1_m2_8points(8,j)
           enddo
         enddo
 
@@ -2587,7 +2526,7 @@
                                tempx2(i,5,k)*hprimewgll_xx(5,j) + &
                                tempx2(i,6,k)*hprimewgll_xx(6,j) + &
                                tempx2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempx2(i,8,k)*hprimewgll_xx(8,j) 
+                               tempx2(i,8,k)*hprimewgll_xx(8,j)
               newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
                                tempy2(i,2,k)*hprimewgll_xx(2,j) + &
                                tempy2(i,3,k)*hprimewgll_xx(3,j) + &
@@ -2595,7 +2534,7 @@
                                tempy2(i,5,k)*hprimewgll_xx(5,j) + &
                                tempy2(i,6,k)*hprimewgll_xx(6,j) + &
                                tempy2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempy2(i,8,k)*hprimewgll_xx(8,j) 
+                               tempy2(i,8,k)*hprimewgll_xx(8,j)
               newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
                                tempz2(i,2,k)*hprimewgll_xx(2,j) + &
                                tempz2(i,3,k)*hprimewgll_xx(3,j) + &
@@ -2603,7 +2542,7 @@
                                tempz2(i,5,k)*hprimewgll_xx(5,j) + &
                                tempz2(i,6,k)*hprimewgll_xx(6,j) + &
                                tempz2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempz2(i,8,k)*hprimewgll_xx(8,j) 
+                               tempz2(i,8,k)*hprimewgll_xx(8,j)
             enddo
           enddo
         enddo
@@ -2618,7 +2557,7 @@
                                       C1_mxm_m2_m1_8points(i,5)*hprimewgll_xx(5,j) + &
                                       C1_mxm_m2_m1_8points(i,6)*hprimewgll_xx(6,j) + &
                                       C1_mxm_m2_m1_8points(i,7)*hprimewgll_xx(7,j) + &
-                                      C1_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j) 
+                                      C1_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
             E2_mxm_m2_m1_8points(i,j) = C2_mxm_m2_m1_8points(i,1)*hprimewgll_xx(1,j) + &
                                       C2_mxm_m2_m1_8points(i,2)*hprimewgll_xx(2,j) + &
                                       C2_mxm_m2_m1_8points(i,3)*hprimewgll_xx(3,j) + &
@@ -2626,7 +2565,7 @@
                                       C2_mxm_m2_m1_8points(i,5)*hprimewgll_xx(5,j) + &
                                       C2_mxm_m2_m1_8points(i,6)*hprimewgll_xx(6,j) + &
                                       C2_mxm_m2_m1_8points(i,7)*hprimewgll_xx(7,j) + &
-                                      C2_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j) 
+                                      C2_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
             E3_mxm_m2_m1_8points(i,j) = C3_mxm_m2_m1_8points(i,1)*hprimewgll_xx(1,j) + &
                                       C3_mxm_m2_m1_8points(i,2)*hprimewgll_xx(2,j) + &
                                       C3_mxm_m2_m1_8points(i,3)*hprimewgll_xx(3,j) + &
@@ -2634,7 +2573,7 @@
                                       C3_mxm_m2_m1_8points(i,5)*hprimewgll_xx(5,j) + &
                                       C3_mxm_m2_m1_8points(i,6)*hprimewgll_xx(6,j) + &
                                       C3_mxm_m2_m1_8points(i,7)*hprimewgll_xx(7,j) + &
-                                      C3_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j) 
+                                      C3_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
           enddo
         enddo
 
@@ -2713,35 +2652,13 @@
 
   enddo  ! spectral element loop
 
-end subroutine compute_forces_elastic_Dev_8points
-!=====================================================================
+end subroutine compute_forces_elastic_Dev_8p
+
 !
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
 !=====================================================================
+!
 
-
-subroutine compute_forces_elastic_Dev_9points( iphase ,NSPEC_AB,NGLOB_AB, &
+subroutine compute_forces_elastic_Dev_9p( iphase ,NSPEC_AB,NGLOB_AB, &
                                     displ,accel, &
                                     xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                                     hprime_xx,hprime_xxT, &
@@ -2788,7 +2705,8 @@
         kappastore,mustore,jacobian
 
 ! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(NGLLX,9) :: hprime_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(9,NGLLX) :: hprime_xxT,hprimewgll_xx
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
   real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -2833,15 +2751,15 @@
   integer :: ispec2D_moho_top, ispec2D_moho_bot
 
 ! local parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+  real(kind=CUSTOM_REAL), dimension(9,9,9) :: dummyx_loc,dummyy_loc,dummyz_loc, &
     newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+  real(kind=CUSTOM_REAL), dimension(9,9,9) :: &
     tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
 
   ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_9points,B2_m1_m2_9points,B3_m1_m2_9points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_9points,C2_m1_m2_9points,C3_m1_m2_9points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_9points,E2_m1_m2_9points,E3_m1_m2_9points
+  real(kind=CUSTOM_REAL), dimension(9,81) :: B1_m1_m2_9points,B2_m1_m2_9points,B3_m1_m2_9points
+  real(kind=CUSTOM_REAL), dimension(9,81) :: C1_m1_m2_9points,C2_m1_m2_9points,C3_m1_m2_9points
+  real(kind=CUSTOM_REAL), dimension(9,81) :: E1_m1_m2_9points,E2_m1_m2_9points,E3_m1_m2_9points
 
   equivalence(dummyx_loc,B1_m1_m2_9points)
   equivalence(dummyy_loc,B2_m1_m2_9points)
@@ -2853,11 +2771,11 @@
   equivalence(newtempy1,E2_m1_m2_9points)
   equivalence(newtempz1,E3_m1_m2_9points)
 
-  real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+  real(kind=CUSTOM_REAL), dimension(81,9) :: &
     A1_mxm_m2_m1_9points,A2_mxm_m2_m1_9points,A3_mxm_m2_m1_9points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(81,9) :: &
     C1_mxm_m2_m1_9points,C2_mxm_m2_m1_9points,C3_mxm_m2_m1_9points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(81,9) :: &
     E1_mxm_m2_m1_9points,E2_mxm_m2_m1_9points,E3_mxm_m2_m1_9points
 
   equivalence(dummyx_loc,A1_mxm_m2_m1_9points)
@@ -2948,7 +2866,7 @@
                                   hprime_xx(i,6)*B1_m1_m2_9points(6,j) + &
                                   hprime_xx(i,7)*B1_m1_m2_9points(7,j) + &
                                   hprime_xx(i,8)*B1_m1_m2_9points(8,j) + &
-                                  hprime_xx(i,9)*B1_m1_m2_9points(9,j) 
+                                  hprime_xx(i,9)*B1_m1_m2_9points(9,j)
             C2_m1_m2_9points(i,j) = hprime_xx(i,1)*B2_m1_m2_9points(1,j) + &
                                   hprime_xx(i,2)*B2_m1_m2_9points(2,j) + &
                                   hprime_xx(i,3)*B2_m1_m2_9points(3,j) + &
@@ -2957,7 +2875,7 @@
                                   hprime_xx(i,6)*B2_m1_m2_9points(6,j) + &
                                   hprime_xx(i,7)*B2_m1_m2_9points(7,j) + &
                                   hprime_xx(i,8)*B2_m1_m2_9points(8,j) + &
-                                  hprime_xx(i,9)*B2_m1_m2_9points(9,j) 
+                                  hprime_xx(i,9)*B2_m1_m2_9points(9,j)
             C3_m1_m2_9points(i,j) = hprime_xx(i,1)*B3_m1_m2_9points(1,j) + &
                                   hprime_xx(i,2)*B3_m1_m2_9points(2,j) + &
                                   hprime_xx(i,3)*B3_m1_m2_9points(3,j) + &
@@ -2966,7 +2884,7 @@
                                   hprime_xx(i,6)*B3_m1_m2_9points(6,j) + &
                                   hprime_xx(i,7)*B3_m1_m2_9points(7,j) + &
                                   hprime_xx(i,8)*B3_m1_m2_9points(8,j) + &
-                                  hprime_xx(i,9)*B3_m1_m2_9points(9,j) 
+                                  hprime_xx(i,9)*B3_m1_m2_9points(9,j)
           enddo
         enddo
 
@@ -2984,7 +2902,7 @@
                             dummyx_loc(i,6,k)*hprime_xxT(6,j) + &
                             dummyx_loc(i,7,k)*hprime_xxT(7,j) + &
                             dummyx_loc(i,8,k)*hprime_xxT(8,j) + &
-                            dummyx_loc(i,9,k)*hprime_xxT(9,j) 
+                            dummyx_loc(i,9,k)*hprime_xxT(9,j)
               tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
                             dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
                             dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
@@ -2993,7 +2911,7 @@
                             dummyy_loc(i,6,k)*hprime_xxT(6,j) + &
                             dummyy_loc(i,7,k)*hprime_xxT(7,j) + &
                             dummyy_loc(i,8,k)*hprime_xxT(8,j) + &
-                            dummyy_loc(i,9,k)*hprime_xxT(9,j) 
+                            dummyy_loc(i,9,k)*hprime_xxT(9,j)
               tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
                             dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
                             dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
@@ -3002,7 +2920,7 @@
                             dummyz_loc(i,6,k)*hprime_xxT(6,j) + &
                             dummyz_loc(i,7,k)*hprime_xxT(7,j) + &
                             dummyz_loc(i,8,k)*hprime_xxT(8,j) + &
-                            dummyz_loc(i,9,k)*hprime_xxT(9,j) 
+                            dummyz_loc(i,9,k)*hprime_xxT(9,j)
             enddo
           enddo
         enddo
@@ -3018,7 +2936,7 @@
                                       A1_mxm_m2_m1_9points(i,6)*hprime_xxT(6,j) + &
                                       A1_mxm_m2_m1_9points(i,7)*hprime_xxT(7,j) + &
                                       A1_mxm_m2_m1_9points(i,8)*hprime_xxT(8,j) + &
-                                      A1_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j) 
+                                      A1_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
             C2_mxm_m2_m1_9points(i,j) = A2_mxm_m2_m1_9points(i,1)*hprime_xxT(1,j) + &
                                       A2_mxm_m2_m1_9points(i,2)*hprime_xxT(2,j) + &
                                       A2_mxm_m2_m1_9points(i,3)*hprime_xxT(3,j) + &
@@ -3027,7 +2945,7 @@
                                       A2_mxm_m2_m1_9points(i,6)*hprime_xxT(6,j) + &
                                       A2_mxm_m2_m1_9points(i,7)*hprime_xxT(7,j) + &
                                       A2_mxm_m2_m1_9points(i,8)*hprime_xxT(8,j) + &
-                                      A2_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j) 
+                                      A2_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
             C3_mxm_m2_m1_9points(i,j) = A3_mxm_m2_m1_9points(i,1)*hprime_xxT(1,j) + &
                                       A3_mxm_m2_m1_9points(i,2)*hprime_xxT(2,j) + &
                                       A3_mxm_m2_m1_9points(i,3)*hprime_xxT(3,j) + &
@@ -3036,7 +2954,7 @@
                                       A3_mxm_m2_m1_9points(i,6)*hprime_xxT(6,j) + &
                                       A3_mxm_m2_m1_9points(i,7)*hprime_xxT(7,j) + &
                                       A3_mxm_m2_m1_9points(i,8)*hprime_xxT(8,j) + &
-                                      A3_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j) 
+                                      A3_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
           enddo
         enddo
 
@@ -3270,7 +3188,7 @@
                                   hprimewgll_xxT(i,6)*C1_m1_m2_9points(6,j) + &
                                   hprimewgll_xxT(i,7)*C1_m1_m2_9points(7,j) + &
                                   hprimewgll_xxT(i,8)*C1_m1_m2_9points(8,j) + &
-                                  hprimewgll_xxT(i,9)*C1_m1_m2_9points(9,j) 
+                                  hprimewgll_xxT(i,9)*C1_m1_m2_9points(9,j)
             E2_m1_m2_9points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_9points(1,j) + &
                                   hprimewgll_xxT(i,2)*C2_m1_m2_9points(2,j) + &
                                   hprimewgll_xxT(i,3)*C2_m1_m2_9points(3,j) + &
@@ -3279,7 +3197,7 @@
                                   hprimewgll_xxT(i,6)*C2_m1_m2_9points(6,j) + &
                                   hprimewgll_xxT(i,7)*C2_m1_m2_9points(7,j) + &
                                   hprimewgll_xxT(i,8)*C2_m1_m2_9points(8,j) + &
-                                  hprimewgll_xxT(i,9)*C2_m1_m2_9points(9,j) 
+                                  hprimewgll_xxT(i,9)*C2_m1_m2_9points(9,j)
             E3_m1_m2_9points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_9points(1,j) + &
                                   hprimewgll_xxT(i,2)*C3_m1_m2_9points(2,j) + &
                                   hprimewgll_xxT(i,3)*C3_m1_m2_9points(3,j) + &
@@ -3288,7 +3206,7 @@
                                   hprimewgll_xxT(i,6)*C3_m1_m2_9points(6,j) + &
                                   hprimewgll_xxT(i,7)*C3_m1_m2_9points(7,j) + &
                                   hprimewgll_xxT(i,8)*C3_m1_m2_9points(8,j) + &
-                                  hprimewgll_xxT(i,9)*C3_m1_m2_9points(9,j) 
+                                  hprimewgll_xxT(i,9)*C3_m1_m2_9points(9,j)
           enddo
         enddo
 
@@ -3306,7 +3224,7 @@
                                tempx2(i,6,k)*hprimewgll_xx(6,j) + &
                                tempx2(i,7,k)*hprimewgll_xx(7,j) + &
                                tempx2(i,8,k)*hprimewgll_xx(8,j) + &
-                               tempx2(i,9,k)*hprimewgll_xx(9,j) 
+                               tempx2(i,9,k)*hprimewgll_xx(9,j)
               newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
                                tempy2(i,2,k)*hprimewgll_xx(2,j) + &
                                tempy2(i,3,k)*hprimewgll_xx(3,j) + &
@@ -3315,7 +3233,7 @@
                                tempy2(i,6,k)*hprimewgll_xx(6,j) + &
                                tempy2(i,7,k)*hprimewgll_xx(7,j) + &
                                tempy2(i,8,k)*hprimewgll_xx(8,j) + &
-                               tempy2(i,9,k)*hprimewgll_xx(9,j) 
+                               tempy2(i,9,k)*hprimewgll_xx(9,j)
               newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
                                tempz2(i,2,k)*hprimewgll_xx(2,j) + &
                                tempz2(i,3,k)*hprimewgll_xx(3,j) + &
@@ -3324,7 +3242,7 @@
                                tempz2(i,6,k)*hprimewgll_xx(6,j) + &
                                tempz2(i,7,k)*hprimewgll_xx(7,j) + &
                                tempz2(i,8,k)*hprimewgll_xx(8,j) + &
-                               tempz2(i,9,k)*hprimewgll_xx(9,j) 
+                               tempz2(i,9,k)*hprimewgll_xx(9,j)
             enddo
           enddo
         enddo
@@ -3340,7 +3258,7 @@
                                       C1_mxm_m2_m1_9points(i,6)*hprimewgll_xx(6,j) + &
                                       C1_mxm_m2_m1_9points(i,7)*hprimewgll_xx(7,j) + &
                                       C1_mxm_m2_m1_9points(i,8)*hprimewgll_xx(8,j) + &
-                                      C1_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j) 
+                                      C1_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
             E2_mxm_m2_m1_9points(i,j) = C2_mxm_m2_m1_9points(i,1)*hprimewgll_xx(1,j) + &
                                       C2_mxm_m2_m1_9points(i,2)*hprimewgll_xx(2,j) + &
                                       C2_mxm_m2_m1_9points(i,3)*hprimewgll_xx(3,j) + &
@@ -3349,7 +3267,7 @@
                                       C2_mxm_m2_m1_9points(i,6)*hprimewgll_xx(6,j) + &
                                       C2_mxm_m2_m1_9points(i,7)*hprimewgll_xx(7,j) + &
                                       C2_mxm_m2_m1_9points(i,8)*hprimewgll_xx(8,j) + &
-                                      C2_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j) 
+                                      C2_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
             E3_mxm_m2_m1_9points(i,j) = C3_mxm_m2_m1_9points(i,1)*hprimewgll_xx(1,j) + &
                                       C3_mxm_m2_m1_9points(i,2)*hprimewgll_xx(2,j) + &
                                       C3_mxm_m2_m1_9points(i,3)*hprimewgll_xx(3,j) + &
@@ -3358,7 +3276,7 @@
                                       C3_mxm_m2_m1_9points(i,6)*hprimewgll_xx(6,j) + &
                                       C3_mxm_m2_m1_9points(i,7)*hprimewgll_xx(7,j) + &
                                       C3_mxm_m2_m1_9points(i,8)*hprimewgll_xx(8,j) + &
-                                      C3_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j) 
+                                      C3_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
           enddo
         enddo
 
@@ -3437,35 +3355,13 @@
 
   enddo  ! spectral element loop
 
-end subroutine compute_forces_elastic_Dev_9points
-!=====================================================================
+end subroutine compute_forces_elastic_Dev_9p
+
 !
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
 !=====================================================================
+!
 
-
-subroutine compute_forces_elastic_Dev_10points( iphase ,NSPEC_AB,NGLOB_AB, &
+subroutine compute_forces_elastic_Dev_10p( iphase ,NSPEC_AB,NGLOB_AB, &
                                     displ,accel, &
                                     xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                                     hprime_xx,hprime_xxT, &
@@ -3512,7 +3408,8 @@
         kappastore,mustore,jacobian
 
 ! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(NGLLX,10) :: hprime_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(10,NGLLX) :: hprime_xxT,hprimewgll_xx
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
   real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -3557,15 +3454,15 @@
   integer :: ispec2D_moho_top, ispec2D_moho_bot
 
 ! local parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+  real(kind=CUSTOM_REAL), dimension(10,10,10) :: dummyx_loc,dummyy_loc,dummyz_loc, &
     newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+  real(kind=CUSTOM_REAL), dimension(10,10,10) :: &
     tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
 
   ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_10points,B2_m1_m2_10points,B3_m1_m2_10points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_10points,C2_m1_m2_10points,C3_m1_m2_10points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_10points,E2_m1_m2_10points,E3_m1_m2_10points
+  real(kind=CUSTOM_REAL), dimension(10,100) :: B1_m1_m2_10points,B2_m1_m2_10points,B3_m1_m2_10points
+  real(kind=CUSTOM_REAL), dimension(10,100) :: C1_m1_m2_10points,C2_m1_m2_10points,C3_m1_m2_10points
+  real(kind=CUSTOM_REAL), dimension(10,100) :: E1_m1_m2_10points,E2_m1_m2_10points,E3_m1_m2_10points
 
   equivalence(dummyx_loc,B1_m1_m2_10points)
   equivalence(dummyy_loc,B2_m1_m2_10points)
@@ -3577,11 +3474,11 @@
   equivalence(newtempy1,E2_m1_m2_10points)
   equivalence(newtempz1,E3_m1_m2_10points)
 
-  real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+  real(kind=CUSTOM_REAL), dimension(100,10) :: &
     A1_mxm_m2_m1_10points,A2_mxm_m2_m1_10points,A3_mxm_m2_m1_10points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(100,10) :: &
     C1_mxm_m2_m1_10points,C2_mxm_m2_m1_10points,C3_mxm_m2_m1_10points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+  real(kind=CUSTOM_REAL), dimension(100,10) :: &
     E1_mxm_m2_m1_10points,E2_mxm_m2_m1_10points,E3_mxm_m2_m1_10points
 
   equivalence(dummyx_loc,A1_mxm_m2_m1_10points)
@@ -3673,7 +3570,7 @@
                                   hprime_xx(i,7)*B1_m1_m2_10points(7,j) + &
                                   hprime_xx(i,8)*B1_m1_m2_10points(8,j) + &
                                   hprime_xx(i,9)*B1_m1_m2_10points(9,j) + &
-                                  hprime_xx(i,10)*B1_m1_m2_10points(10,j) 
+                                  hprime_xx(i,10)*B1_m1_m2_10points(10,j)
             C2_m1_m2_10points(i,j) = hprime_xx(i,1)*B2_m1_m2_10points(1,j) + &
                                   hprime_xx(i,2)*B2_m1_m2_10points(2,j) + &
                                   hprime_xx(i,3)*B2_m1_m2_10points(3,j) + &
@@ -3683,7 +3580,7 @@
                                   hprime_xx(i,7)*B2_m1_m2_10points(7,j) + &
                                   hprime_xx(i,8)*B2_m1_m2_10points(8,j) + &
                                   hprime_xx(i,9)*B2_m1_m2_10points(9,j) + &
-                                  hprime_xx(i,10)*B2_m1_m2_10points(10,j) 
+                                  hprime_xx(i,10)*B2_m1_m2_10points(10,j)
             C3_m1_m2_10points(i,j) = hprime_xx(i,1)*B3_m1_m2_10points(1,j) + &
                                   hprime_xx(i,2)*B3_m1_m2_10points(2,j) + &
                                   hprime_xx(i,3)*B3_m1_m2_10points(3,j) + &
@@ -3693,7 +3590,7 @@
                                   hprime_xx(i,7)*B3_m1_m2_10points(7,j) + &
                                   hprime_xx(i,8)*B3_m1_m2_10points(8,j) + &
                                   hprime_xx(i,9)*B3_m1_m2_10points(9,j) + &
-                                  hprime_xx(i,10)*B3_m1_m2_10points(10,j) 
+                                  hprime_xx(i,10)*B3_m1_m2_10points(10,j)
           enddo
         enddo
 
@@ -3712,7 +3609,7 @@
                             dummyx_loc(i,7,k)*hprime_xxT(7,j) + &
                             dummyx_loc(i,8,k)*hprime_xxT(8,j) + &
                             dummyx_loc(i,9,k)*hprime_xxT(9,j) + &
-                            dummyx_loc(i,10,k)*hprime_xxT(10,j) 
+                            dummyx_loc(i,10,k)*hprime_xxT(10,j)
               tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
                             dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
                             dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
@@ -3749,7 +3646,7 @@
                                       A1_mxm_m2_m1_10points(i,7)*hprime_xxT(7,j) + &
                                       A1_mxm_m2_m1_10points(i,8)*hprime_xxT(8,j) + &
                                       A1_mxm_m2_m1_10points(i,9)*hprime_xxT(9,j) + &
-                                      A1_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j) 
+                                      A1_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
             C2_mxm_m2_m1_10points(i,j) = A2_mxm_m2_m1_10points(i,1)*hprime_xxT(1,j) + &
                                       A2_mxm_m2_m1_10points(i,2)*hprime_xxT(2,j) + &
                                       A2_mxm_m2_m1_10points(i,3)*hprime_xxT(3,j) + &
@@ -3759,7 +3656,7 @@
                                       A2_mxm_m2_m1_10points(i,7)*hprime_xxT(7,j) + &
                                       A2_mxm_m2_m1_10points(i,8)*hprime_xxT(8,j) + &
                                       A2_mxm_m2_m1_10points(i,9)*hprime_xxT(9,j) + &
-                                      A2_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j) 
+                                      A2_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
             C3_mxm_m2_m1_10points(i,j) = A3_mxm_m2_m1_10points(i,1)*hprime_xxT(1,j) + &
                                       A3_mxm_m2_m1_10points(i,2)*hprime_xxT(2,j) + &
                                       A3_mxm_m2_m1_10points(i,3)*hprime_xxT(3,j) + &
@@ -3769,7 +3666,7 @@
                                       A3_mxm_m2_m1_10points(i,7)*hprime_xxT(7,j) + &
                                       A3_mxm_m2_m1_10points(i,8)*hprime_xxT(8,j) + &
                                       A3_mxm_m2_m1_10points(i,9)*hprime_xxT(9,j) + &
-                                      A3_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j) 
+                                      A3_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
           enddo
         enddo
 
@@ -4004,7 +3901,7 @@
                                   hprimewgll_xxT(i,7)*C1_m1_m2_10points(7,j) + &
                                   hprimewgll_xxT(i,8)*C1_m1_m2_10points(8,j) + &
                                   hprimewgll_xxT(i,9)*C1_m1_m2_10points(9,j) + &
-                                  hprimewgll_xxT(i,10)*C1_m1_m2_10points(10,j) 
+                                  hprimewgll_xxT(i,10)*C1_m1_m2_10points(10,j)
             E2_m1_m2_10points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_10points(1,j) + &
                                   hprimewgll_xxT(i,2)*C2_m1_m2_10points(2,j) + &
                                   hprimewgll_xxT(i,3)*C2_m1_m2_10points(3,j) + &
@@ -4024,7 +3921,7 @@
                                   hprimewgll_xxT(i,7)*C3_m1_m2_10points(7,j) + &
                                   hprimewgll_xxT(i,8)*C3_m1_m2_10points(8,j) + &
                                   hprimewgll_xxT(i,9)*C3_m1_m2_10points(9,j) + &
-                                  hprimewgll_xxT(i,10)*C3_m1_m2_10points(10,j) 
+                                  hprimewgll_xxT(i,10)*C3_m1_m2_10points(10,j)
           enddo
         enddo
 
@@ -4053,7 +3950,7 @@
                                tempy2(i,7,k)*hprimewgll_xx(7,j) + &
                                tempy2(i,8,k)*hprimewgll_xx(8,j) + &
                                tempy2(i,9,k)*hprimewgll_xx(9,j) + &
-                               tempy2(i,10,k)*hprimewgll_xx(10,j) 
+                               tempy2(i,10,k)*hprimewgll_xx(10,j)
               newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
                                tempz2(i,2,k)*hprimewgll_xx(2,j) + &
                                tempz2(i,3,k)*hprimewgll_xx(3,j) + &
@@ -4080,7 +3977,7 @@
                                       C1_mxm_m2_m1_10points(i,7)*hprimewgll_xx(7,j) + &
                                       C1_mxm_m2_m1_10points(i,8)*hprimewgll_xx(8,j) + &
                                       C1_mxm_m2_m1_10points(i,9)*hprimewgll_xx(9,j) + &
-                                      C1_mxm_m2_m1_10points(i,10)*hprimewgll_xx(10,j) 
+                                      C1_mxm_m2_m1_10points(i,10)*hprimewgll_xx(10,j)
             E2_mxm_m2_m1_10points(i,j) = C2_mxm_m2_m1_10points(i,1)*hprimewgll_xx(1,j) + &
                                       C2_mxm_m2_m1_10points(i,2)*hprimewgll_xx(2,j) + &
                                       C2_mxm_m2_m1_10points(i,3)*hprimewgll_xx(3,j) + &
@@ -4179,4 +4076,4 @@
 
   enddo  ! spectral element loop
 
-end subroutine compute_forces_elastic_Dev_10points
+end subroutine compute_forces_elastic_Dev_10p

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -64,8 +64,8 @@
 
 ! displacement and acceleration
   real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,accel
-  
 
+
 ! arrays with mesh parameters per slice
   integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
@@ -124,11 +124,11 @@
   ! newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
   ! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NUM_THREADS) :: &
   ! tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-  
+
   real(kind=CUSTOM_REAL), dimension(:,:,:,:),allocatable :: &
        dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3,&
        newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3,&
-       tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3  
+       tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
 
   ! real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB,NUM_THREADS) :: accel_omp
   ! real(kind=CUSTOM_REAL), dimension(:,:,:),allocatable :: accel_omp
@@ -160,7 +160,7 @@
   double precision end_time
   double precision accumulate_time_start
   double precision accumulate_time_stop
-  
+
   ! local anisotropy parameters
   real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
                         c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
@@ -171,14 +171,14 @@
   integer thread_id
   integer NUM_THREADS
   integer omp_get_num_threads ! function
-  
+
   imodulo_N_SLS = mod(N_SLS,3)
   ! NUM_THREADS = 12
   NUM_THREADS = OMP_GET_MAX_THREADS()
-  
-  
+
+
   ! allocate(accel_omp(NDIM,NGLOB_AB,NUM_THREADS))
-  
+
   ! allocate local arrays
   allocate(dummyx_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
   allocate(dummyy_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
@@ -209,7 +209,7 @@
  endif
  ! "start" timer
  start_time = omp_get_wtime()
-  
+
   !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(&
   !$OMP R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3,&
   !$OMP factor_loc,alphaval_loc,betaval_loc,gammaval_loc,&
@@ -228,17 +228,17 @@
   !$OMP ispec,iglob,ispec_p,&
   !$OMP i,j,k,&
   !$OMP thread_id)
-  
+
   thread_id = OMP_get_thread_num()+1
   ! thread_id = 1
 
-  
-  
+
+
   ! accel_omp(:,:,thread_id) = 0.0
   !$OMP DO
   do ispec_p = 1,num_elements
-     
-     
+
+
      ! returns element id from stored element list
         ispec = phase_ispec_inner_elastic(ispec_p,iphase)
 
@@ -268,7 +268,7 @@
     ! pages 386 and 389 and Figure 8.3.1
         ! call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
         do j=1,m2
-           do i=1,m1            
+           do i=1,m1
               tempx1(i,j,1,thread_id) = &
                    hprime_xx(i,1)*dummyx_loc(1,j,1,thread_id) + &
                    hprime_xx(i,2)*dummyx_loc(2,j,1,thread_id) + &
@@ -314,7 +314,7 @@
             enddo
           enddo
         enddo
-        
+
         ! call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
         do j=1,m1
            do i=1,m2
@@ -631,7 +631,7 @@
                    tempz3(i,1,5,thread_id)*hprimewgll_xx(5,j)
            enddo
         enddo
-        
+
         do k=1,NGLLZ
            do j=1,NGLLY
               do i=1,NGLLX
@@ -651,28 +651,28 @@
                  ! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id)&
                  !      - fac1*newtempz1(i,j,k,thread_id) - fac2*newtempz2(i,j,k,thread_id)&
                  !      - fac3*newtempz3(i,j,k,thread_id)
-                 
+
                  !$OMP ATOMIC
                  accel(1,iglob) = accel(1,iglob) - (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
                  !$OMP ATOMIC
                  accel(2,iglob) = accel(2,iglob) - (fac1*newtempy1(i,j,k,thread_id) + fac2*newtempy2(i,j,k,thread_id) + fac3*newtempy3(i,j,k,thread_id))
                  !$OMP ATOMIC
                  accel(3,iglob) = accel(3,iglob) - (fac1*newtempz1(i,j,k,thread_id) + fac2*newtempz2(i,j,k,thread_id) + fac3*newtempz3(i,j,k,thread_id))
-                 
+
                  ! accel(1,iglob) = accel(1,iglob) - &
                  ! (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
                  ! accel(2,iglob) = accel(2,iglob) - &
                  ! (fac1*newtempy1(i,j,k,thread_id) + fac2*newtempy2(i,j,k,thread_id) + fac3*newtempy3(i,j,k,thread_id))
                  ! accel(3,iglob) = accel(3,iglob) - &
                  ! (fac1*newtempz1(i,j,k,thread_id) + fac2*newtempz2(i,j,k,thread_id) + fac3*newtempz3(i,j,k,thread_id))
-                 
+
                  ! accel_omp(1,iglob,thread_id) = accel_omp(1,iglob,thread_id) - fac1*newtempx1(i,j,k,thread_id) - &
                  !                   fac2*newtempx2(i,j,k,thread_id) - fac3*newtempx3(i,j,k,thread_id)
                  ! accel_omp(2,iglob,thread_id) = accel_omp(2,iglob,thread_id) - fac1*newtempy1(i,j,k,thread_id) - &
                  !                   fac2*newtempy2(i,j,k,thread_id) - fac3*newtempy3(i,j,k,thread_id)
                  ! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id) - fac1*newtempz1(i,j,k,thread_id) - &
                  !      fac2*newtempz2(i,j,k,thread_id) - fac3*newtempz3(i,j,k,thread_id)
-                 
+
               !  update memory variables based upon the Runge-Kutta scheme
               if(ATTENUATION) then
 
@@ -731,32 +731,32 @@
 
   enddo  ! spectral element loop
   !$OMP END DO
-  
-  
-  ! accel(:,:) = accel(:,:) + accel_omp(:,:,thread_id)    
+
+
+  ! accel(:,:) = accel(:,:) + accel_omp(:,:,thread_id)
   ! do i=1,NGLOB_AB
   ! accel(1,i) = accel(1,i) + accel_omp(1,i,thread_id)
   ! accel(2,i) = accel(1,i) + accel_omp(2,i,thread_id)
-  ! accel(3,i) = accel(1,i) + accel_omp(3,i,thread_id)     
+  ! accel(3,i) = accel(1,i) + accel_omp(3,i,thread_id)
   ! enddo
-    
+
   !$OMP END PARALLEL
 
   ! accumulate_time_start = omp_get_wtime()
-  
+
   ! do i=1,NUM_THREADS
   !    !    ! parallel vector add
   !    accel(:,:) = accel(:,:) + accel_omp(:,:,i)
   ! end do
   ! accumulate_time_stop = omp_get_wtime()
-  
+
   ! "stop" timer
   end_time = omp_get_wtime()
 
   write(*,*) "Total Elapsed time: ", (end_time-start_time) , "seconds. (Threads=",NUM_THREADS,")"
   ! write(*,*) "Accumulate Elapsed time: ", (accumulate_time_stop-accumulate_time_start) , "seconds"
-  
-  
+
+
   deallocate(dummyx_loc)
   deallocate(dummyy_loc)
   deallocate(dummyz_loc)
@@ -778,7 +778,7 @@
   deallocate(tempz1)
   deallocate(tempz2)
   deallocate(tempz3)
-  
+
 ! accel(:,:) = accel_omp(:,:,1)
-  
+
 end subroutine compute_forces_elastic_Dev_openmp

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -48,7 +48,7 @@
   if ( APPROXIMATE_HESS_KL ) then
     call compute_kernels_hessian()
   endif
-  
+
   end subroutine compute_kernels
 
 
@@ -72,7 +72,7 @@
   ! updates kernels on GPU
   if(GPU_MODE) then
     call compute_kernels_elastic_cuda(Mesh_pointer,deltat)
-                          
+
     ! for noise simulations --- source strength kernel
     if (NOISE_TOMOGRAPHY == 3)  &
       call compute_kernels_strength_noise(NGLLSQUARE*num_free_surface_faces,ibool, &
@@ -82,11 +82,11 @@
                         NSPEC_AB,NGLOB_AB, &
                         num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
                         GPU_MODE,Mesh_pointer)
-  
+
     ! kernels are done
     return
   endif
-  
+
   ! updates kernels on CPU
   do ispec = 1, NSPEC_AB
 
@@ -143,7 +143,7 @@
     endif !ispec_is_elastic
 
   enddo
-  
+
   ! moho kernel
   if( SAVE_MOHO_MESH ) then
       call compute_boundary_kernel()
@@ -158,7 +158,7 @@
                         NSPEC_AB,NGLOB_AB, &
                         num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
                         GPU_MODE,Mesh_pointer)
-  
+
   end subroutine compute_kernels_el
 
 !
@@ -249,21 +249,20 @@
   real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: b_accel_elm,accel_elm
   integer :: i,j,k,ispec,iglob
 
-  !daniel: todo - workaround to do this on GPU?
-  if( GPU_MODE) then
-    if( ACOUSTIC_SIMULATION) then
-      call transfer_potential_dot_dot_from_device(NGLOB_AB,potential_dot_dot_acoustic, Mesh_pointer)    
-      call transfer_b_potential_dot_dot_from_device(NGLOB_AB,b_potential_dot_dot_acoustic, Mesh_pointer)    
-    endif    
-    if( ELASTIC_SIMULATION ) then
-      call transfer_accel_from_device(NGLOB_AB*NDIM,accel,Mesh_pointer)
-      call transfer_b_accel_from_device(NGLOB_AB*NDIM,b_accel,Mesh_pointer)      
-    endif    
+  ! updates kernels on GPU
+  if(GPU_MODE) then
+
+    ! computes contribution to density and bulk modulus kernel
+    call compute_kernels_hess_cuda(Mesh_pointer,deltat, &
+                                  ELASTIC_SIMULATION,ACOUSTIC_SIMULATION)
+
+    ! done on GPU
+    return
   endif
 
   ! loops over all elements
   do ispec = 1, NSPEC_AB
-  
+
     ! acoustic domains
     if( ispec_is_acoustic(ispec) ) then
 
@@ -280,7 +279,7 @@
                       hprime_xx,hprime_yy,hprime_zz, &
                       xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                       ibool,rhostore)
-    
+
       do k = 1, NGLLZ
         do j = 1, NGLLY
           do i = 1, NGLLX
@@ -293,7 +292,7 @@
 
           enddo
         enddo
-      enddo    
+      enddo
     endif
 
     ! elastic domains
@@ -310,9 +309,9 @@
 
           enddo
         enddo
-      enddo    
+      enddo
     endif
-    
+
   enddo
 
   end subroutine compute_kernels_hessian

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -70,20 +70,20 @@
 
   ! GPU_MODE variables
   integer(kind=8) :: Mesh_pointer
-  logical :: GPU_MODE  
+  logical :: GPU_MODE
 
 ! local parameters
   real(kind=CUSTOM_REAL) :: rhol,cpl,jacobianw,absorbl
   integer :: ispec,iglob,i,j,k,iface,igll
   !integer:: reclen1,reclen2
-  
+
   ! checks if anything to do
   if( num_abs_boundary_faces == 0 ) return
 
 ! adjoint simulations:
   if (SIMULATION_TYPE == 3 .and. num_abs_boundary_faces > 0)  then
     ! reads in absorbing boundary array when first phase is running
-    if( phase_is_inner .eqv. .false. ) then    
+    if( phase_is_inner .eqv. .false. ) then
       ! note: the index NSTEP-it+1 is valid if b_displ is read in after the Newark scheme
       ! uses fortran routine
       !read(IOABS_AC,rec=NSTEP-it+1) reclen1,b_absorb_potential,reclen2
@@ -91,7 +91,7 @@
       !  call exit_mpi(0,'Error reading absorbing contribution b_absorb_potential')
       ! uses c routine for faster reading
       call read_abs(1,b_absorb_potential,b_reclen_potential,NSTEP-it+1)
-    endif    
+    endif
   endif !adjoint
 
 ! absorbs absorbing-boundary surface using Sommerfeld condition (vanishing field in the outer-space)
@@ -144,13 +144,14 @@
         endif ! ispec_is_acoustic
       endif ! ispec_is_inner
     enddo ! num_abs_boundary_faces
-  else 
+  else
     ! GPU_MODE == .true.
-    call compute_stacey_acoustic_cuda(Mesh_pointer, phase_is_inner, &
-                                SIMULATION_TYPE,SAVE_FORWARD,b_absorb_potential)
+    if( num_abs_boundary_faces > 0 ) &
+      call compute_stacey_acoustic_cuda(Mesh_pointer, phase_is_inner, &
+                                       SIMULATION_TYPE,SAVE_FORWARD,b_absorb_potential)
 
   endif
-  
+
   ! adjoint simulations: stores absorbed wavefield part
   if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) then
     ! writes out absorbing boundary value only when second phase is running

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -77,8 +77,8 @@
 
   ! GPU_MODE variables
   integer(kind=8) :: Mesh_pointer
-  logical :: GPU_MODE  
-  
+  logical :: GPU_MODE
+
 ! local parameters
   real(kind=CUSTOM_REAL) vx,vy,vz,nx,ny,nz,tx,ty,tz,vn,jacobianw
   integer :: ispec,iglob,i,j,k,iface,igll
@@ -90,7 +90,7 @@
 ! adjoint simulations:
   if (SIMULATION_TYPE == 3 .and. num_abs_boundary_faces > 0)  then
     ! reads in absorbing boundary array when first phase is running
-    if( phase_is_inner .eqv. .false. ) then    
+    if( phase_is_inner .eqv. .false. ) then
       ! note: the index NSTEP-it+1 is valid if b_displ is read in after the Newark scheme
       ! uses fortran routine
       !read(IOABS,rec=NSTEP-it+1) reclen1,b_absorb_field,reclen2
@@ -98,10 +98,10 @@
       !  call exit_mpi(0,'Error reading absorbing contribution b_absorb_field')
       ! uses c routine for faster reading
       call read_abs(0,b_absorb_field,b_reclen_field,NSTEP-it+1)
-    endif    
+    endif
   endif !adjoint
 
-  
+
   if(.NOT. GPU_MODE) then
 
      ! absorbs absorbing-boundary surface using Stacey condition (Clayton & Enquist)
@@ -162,16 +162,17 @@
         endif ! ispec_is_inner
      enddo
 
-  else 
+  else
     ! GPU_MODE == .true.
-    call compute_stacey_elastic_cuda(Mesh_pointer,phase_is_inner, &
-                        SIMULATION_TYPE,SAVE_FORWARD,b_absorb_field)
+    if( num_abs_boundary_faces > 0 ) &
+      call compute_stacey_elastic_cuda(Mesh_pointer,phase_is_inner, &
+                                      SIMULATION_TYPE,SAVE_FORWARD,b_absorb_field)
   endif
 
   ! adjoint simulations: stores absorbed wavefield part
   if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) then
     ! writes out absorbing boundary value only when second phase is running
-    if( phase_is_inner .eqv. .true. ) then    
+    if( phase_is_inner .eqv. .true. ) then
       ! uses fortran routine
       !write(IOABS,rec=it) b_reclen_field,b_absorb_field,b_reclen_field
       ! uses c routine

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -129,6 +129,8 @@
   !character(len=256) :: vtkfilename
   integer :: zoom_factor = 4
   logical :: zoom
+  integer, dimension(1) :: tmp_pixel_loc
+  integer, dimension(1,0:NPROC-1) :: tmp_pixel_per_proc
 
   ! checks image type
   if(IMAGE_TYPE > 4 .or. IMAGE_TYPE < 1) then
@@ -378,12 +380,15 @@
   if( nb_pixel_loc > 0 ) then
     if( .not. allocated(num_pixel_loc) ) call exit_MPI(myrank,'error num_pixel_loc allocation')
   endif
-  
+
   ! filling array iglob_image_color, containing info on which process owns which pixels.
   allocate(nb_pixel_per_proc(0:NPROC-1),stat=ier)
   if( ier /= 0 ) stop 'error allocating array nb_pixel_per_proc'
-  call gather_all_i(nb_pixel_loc,1,nb_pixel_per_proc,1,NPROC)
 
+  tmp_pixel_loc(1) = nb_pixel_loc
+  call gather_all_i(tmp_pixel_loc,1,tmp_pixel_per_proc,1,NPROC)
+  nb_pixel_per_proc(:) = tmp_pixel_per_proc(1,:)
+
   ! allocates receiving array
   if ( myrank == 0 ) then
     allocate( num_pixel_recv(maxval(nb_pixel_per_proc(:)),0:NPROC-1),stat=ier)
@@ -428,7 +433,7 @@
     if(ier /= 0 ) call exit_mpi(myrank,'error allocating image send data')
     data_pixel_send(:) = 0._CUSTOM_REAL
   endif
-  
+
   ! handles vp background data
   call write_PNM_GIF_vp_background()
 
@@ -474,7 +479,7 @@
     ! master collects
     if (myrank == 0) then
       do iproc = 1, NPROC-1
-        if( nb_pixel_per_proc(iproc) > 0 ) then      
+        if( nb_pixel_per_proc(iproc) > 0 ) then
           call recvv_cr(data_pixel_recv(1),nb_pixel_per_proc(iproc),iproc,43)
           ! fills vp display array
           do k = 1, nb_pixel_per_proc(iproc)
@@ -851,15 +856,19 @@
   if( ELASTIC_SIMULATION ) then
     if( ispec_is_elastic(ispec) ) then
       if( SIMULATION_TYPE == 3 ) then
-        veloc_val(:) = b_veloc(:,iglob)
+        ! to display re-constructed wavefield
+        !veloc_val(:) = b_veloc(:,iglob)
+        ! to display adjoint wavefield
+        veloc_val(:) = veloc(:,iglob)
       else
-        veloc_val(:) = veloc(:,iglob)      
+        veloc_val(:) = veloc(:,iglob)
       endif
 
       ! returns with this result
       return
     endif
   endif
+
   if( ACOUSTIC_SIMULATION ) then
     if( ispec_is_acoustic(ispec) ) then
       if( SIMULATION_TYPE == 3 ) then
@@ -868,7 +877,7 @@
                           b_potential_dot_acoustic, veloc_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)      
+                          ibool,rhostore)
       else
         ! velocity vector
         call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
@@ -877,7 +886,7 @@
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                           ibool,rhostore)
       endif
-      
+
       ! returns corresponding iglob velocity entry
       do k=1,NGLLZ
         do j=1,NGLLY

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -31,13 +31,13 @@
   use specfem_par
   use specfem_par_elastic
   use specfem_par_acoustic
-  
+
   implicit none
 
   integer :: irec_local
 
   ! save last frame
-  
+
   if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
      open(unit=27,file=prname(1:len_trim(prname))//'save_forward_arrays.bin',&
           status='unknown',form='unformatted')

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -215,10 +215,10 @@
       stop 'GPU mode does not support SAVE_MOHO_MESH yet'
     if( ATTENUATION ) then
       if( N_SLS /= 3 ) &
-        stop 'GPU mode does not support N_SLS /= 3 yet'   
+        stop 'GPU mode does not support N_SLS /= 3 yet'
     endif
-    if( ANISOTROPY ) &  
-      stop 'GPU mode does not support ANISOTROPY yet'    
+    if( ANISOTROPY ) &
+      stop 'GPU mode does not support ANISOTROPY yet'
   endif
 
   ! absorbing surfaces

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -126,7 +126,7 @@
   enddo   ! end of main time loop
 
   ! Transfer fields from GPU card to host for further analysis
-  if(GPU_MODE) call it_transfer_from_GPU() 
+  if(GPU_MODE) call it_transfer_from_GPU()
 
   end subroutine iterate_time
 
@@ -148,32 +148,32 @@
   integer :: ihours,iminutes,iseconds,int_tCPU, &
              ihours_remain,iminutes_remain,iseconds_remain,int_t_remain, &
              ihours_total,iminutes_total,iseconds_total,int_t_total
-  
+
 !  if(GPU_MODE) then
-!    ! way 1: copy whole fields   
+!    ! way 1: copy whole fields
 !    ! elastic wavefield
-!    if( ELASTIC_SIMULATION ) then  
-!      call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
-!      ! backward/reconstructed wavefield     
+!    if( ELASTIC_SIMULATION ) then
+!      call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+!      ! backward/reconstructed wavefield
 !      if(SIMULATION_TYPE==3) &
 !        call transfer_b_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
-!    endif    
+!    endif
 !  endif
 
 ! compute maximum of norm of displacement in each slice
   if( ELASTIC_SIMULATION ) then
     if( GPU_MODE) then
-      ! way 2: just get maximum of field from GPU                    
-      call get_norm_elastic_from_device_cuda(Usolidnorm,Mesh_pointer,1)
+      ! way 2: just get maximum of field from GPU
+      call get_norm_elastic_from_device(Usolidnorm,Mesh_pointer,1)
     else
       Usolidnorm = maxval(sqrt(displ(1,:)**2 + displ(2,:)**2 + displ(3,:)**2))
     endif
   else
     if( ACOUSTIC_SIMULATION ) then
       if(GPU_MODE) then
-        ! way 2: just get maximum of field from GPU                    
-        call get_norm_acoustic_from_device_cuda(Usolidnorm,Mesh_pointer,1)
-      else    
+        ! way 2: just get maximum of field from GPU
+        call get_norm_acoustic_from_device(Usolidnorm,Mesh_pointer,1)
+      else
         Usolidnorm = maxval(abs(potential_dot_dot_acoustic(:)))
       endif
     endif
@@ -187,7 +187,7 @@
     if( ELASTIC_SIMULATION ) then
       ! way 2
       if(GPU_MODE) then
-        call get_norm_elastic_from_device_cuda(b_Usolidnorm,Mesh_pointer,3)      
+        call get_norm_elastic_from_device(b_Usolidnorm,Mesh_pointer,3)
       else
         b_Usolidnorm = maxval(sqrt(b_displ(1,:)**2 + b_displ(2,:)**2 + b_displ(3,:)**2))
       endif
@@ -195,8 +195,8 @@
       if( ACOUSTIC_SIMULATION ) then
         ! way 2
         if(GPU_MODE) then
-          call get_norm_acoustic_from_device_cuda(b_Usolidnorm,Mesh_pointer,3)        
-        else      
+          call get_norm_acoustic_from_device(b_Usolidnorm,Mesh_pointer,3)
+        else
           b_Usolidnorm = maxval(abs(b_potential_dot_dot_acoustic(:)))
         endif
       endif
@@ -335,7 +335,7 @@
 ! updates acoustic potentials
   if( ACOUSTIC_SIMULATION ) then
 
-    if(.NOT. GPU_MODE) then  
+    if(.NOT. GPU_MODE) then
       potential_acoustic(:) = potential_acoustic(:) &
                             + deltat * potential_dot_acoustic(:) &
                             + deltatsqover2 * potential_dot_dot_acoustic(:)
@@ -344,15 +344,15 @@
       potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
     else
       ! on GPU
-      call it_update_displacement_scheme_acoustic_cuda(Mesh_pointer, NGLOB_AB, &
+      call it_update_displacement_ac_cuda(Mesh_pointer, NGLOB_AB, &
                                         deltat, deltatsqover2, deltatover2, &
                                         SIMULATION_TYPE, b_deltat, b_deltatsqover2, b_deltatover2)
-    endif    
+    endif
 
     ! time marching potentials
-    if(PML)  then     
-      if( GPU_MODE ) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
+    if(PML)  then
+      if( GPU_MODE ) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
 
       call PML_acoustic_time_march(NSPEC_AB,NGLOB_AB,ibool,&
                         potential_acoustic,potential_dot_acoustic,&
@@ -367,22 +367,22 @@
                         my_neighbours_ext_mesh,NPROC,&
                         ispec_is_acoustic)
 
-      if( GPU_MODE ) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)     
+      if( GPU_MODE ) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
     endif
-    
+
   endif ! ACOUSTIC_SIMULATION
 
 ! updates elastic displacement and velocity
   if( ELASTIC_SIMULATION ) then
-  
+
      if(.NOT. GPU_MODE) then
         displ(:,:) = displ(:,:) + deltat*veloc(:,:) + deltatsqover2*accel(:,:)
         veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
         accel(:,:) = 0._CUSTOM_REAL
      else ! GPU_MODE == 1
         ! Includes SIM_TYPE 1 & 3 (for noise tomography)
-        call it_update_displacement_scheme_cuda(Mesh_pointer, size(displ), deltat, deltatsqover2,&
+        call it_update_displacement_cuda(Mesh_pointer, size(displ), deltat, deltatsqover2,&
              deltatover2, SIMULATION_TYPE, b_deltat, b_deltatsqover2, b_deltatover2)
      endif
   endif
@@ -398,7 +398,7 @@
                                   + b_deltatover2 * b_potential_dot_dot_acoustic(:)
       b_potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
     endif
-    
+
     ! elastic backward fields
     if( ELASTIC_SIMULATION ) then
       b_displ(:,:) = b_displ(:,:) + b_deltat*b_veloc(:,:) + b_deltatsqover2*b_accel(:,:)
@@ -446,11 +446,11 @@
     read(27) b_potential_acoustic
     read(27) b_potential_dot_acoustic
     read(27) b_potential_dot_dot_acoustic
-    
+
     ! transfers fields onto GPU
     if(GPU_MODE) &
-      call transfer_b_fields_acoustic_to_device(NGLOB_AB,b_potential_acoustic, &
-                          b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)      
+      call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
+                          b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
   endif
 
   ! elastic wavefields
@@ -459,7 +459,7 @@
     read(27) b_veloc
     read(27) b_accel
 
-    ! puts elastic wavefield to GPU 
+    ! puts elastic wavefield to GPU
     if(GPU_MODE) &
       call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
 
@@ -475,19 +475,19 @@
       read(27) b_epsilondev_xy
       read(27) b_epsilondev_xz
       read(27) b_epsilondev_yz
-       
-      ! puts elastic attenuation arrays to GPU 
+
+      ! puts elastic attenuation arrays to GPU
       if(GPU_MODE) &
           call transfer_b_fields_att_to_device(Mesh_pointer, &
                                             b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &
                                             b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
-                                            size(b_epsilondev_xx))                 
+                                            size(b_epsilondev_xx))
     endif
 
   endif
 
   close(27)
-  
+
   end subroutine it_read_foward_arrays
 
 !=====================================================================
@@ -520,6 +520,13 @@
         read(27) b_displ
         read(27) b_veloc
         read(27) b_accel
+
+        ! puts elastic fields onto GPU
+        if(GPU_MODE) then
+          ! wavefields
+          call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
+        endif
+
         read(27) b_R_xx
         read(27) b_R_yy
         read(27) b_R_xy
@@ -530,30 +537,28 @@
         read(27) b_epsilondev_xy
         read(27) b_epsilondev_xz
         read(27) b_epsilondev_yz
-               
+
         ! puts elastic fields onto GPU
         if(GPU_MODE) then
-          ! wavefields
-          call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
           ! attenuation arrays
           call transfer_b_fields_att_to_device(Mesh_pointer, &
                                             b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &
                                             b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
-                                            size(b_epsilondev_xx))          
-        endif        
+                                            size(b_epsilondev_xx))
+        endif
       endif
-      
+
       if( ACOUSTIC_SIMULATION ) then
-        ! reads arrays from disk files      
+        ! reads arrays from disk files
         read(27) b_potential_acoustic
         read(27) b_potential_dot_acoustic
         read(27) b_potential_dot_dot_acoustic
 
         ! puts acoustic fields onto GPU
         if(GPU_MODE) &
-          call transfer_b_fields_acoustic_to_device(NGLOB_AB,b_potential_acoustic, &
+          call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
                               b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
-        
+
       endif
       close(27)
     else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. mod(it,NSTEP_Q_SAVE) == 0) then
@@ -564,20 +569,22 @@
       if( ELASTIC_SIMULATION ) then
         ! gets elastic fields from GPU onto CPU
         if(GPU_MODE) then
-          call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+          call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+        endif
 
+        ! writes to disk file
+        write(27) displ
+        write(27) veloc
+        write(27) accel
+
+        if(GPU_MODE) then
           ! attenuation arrays
           call transfer_fields_att_from_device(Mesh_pointer, &
                                             R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
                                             epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
-                                            size(epsilondev_xx))          
+                                            size(epsilondev_xx))
         endif
-        
-        ! writes to disk file      
-        write(27) displ
-        write(27) veloc
-        write(27) accel
-        
+
         write(27) R_xx
         write(27) R_yy
         write(27) R_xy
@@ -592,10 +599,10 @@
       if( ACOUSTIC_SIMULATION ) then
        ! gets acoustic fields from GPU onto CPU
         if(GPU_MODE) &
-          call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+          call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
                               potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
 
-        ! writes to disk file      
+        ! writes to disk file
         write(27) potential_acoustic
         write(27) potential_dot_acoustic
         write(27) potential_dot_dot_acoustic
@@ -606,7 +613,7 @@
 
   end subroutine it_store_attenuation_arrays
 
-  
+
 !=====================================================================
 
   subroutine it_transfer_from_GPU()
@@ -624,53 +631,61 @@
 
     ! acoustic potentials
     if( ACOUSTIC_SIMULATION ) &
-      call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
-                            potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)    
-                            
+      call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+                            potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
     ! elastic wavefield
     if( ELASTIC_SIMULATION ) then
-      call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+      call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
 
       if (ATTENUATION) &
         call transfer_fields_att_from_device(Mesh_pointer, &
                                             R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
                                             epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
                                             size(epsilondev_xx))
-        
+
     endif
   else if (SIMULATION_TYPE == 3) then
 
     ! to store kernels
-    
+
     if( ACOUSTIC_SIMULATION ) then
       ! only in case needed...
-      !call transfer_b_fields_acoustic_from_device(NGLOB_AB,b_potential_acoustic, &
-      !                      b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)    
-      
-      ! acoustic kernels                      
-      call transfer_sensitivity_kernels_acoustic_to_host(Mesh_pointer,rho_ac_kl,kappa_ac_kl,NSPEC_AB)
+      !call transfer_b_fields_ac_from_device(NGLOB_AB,b_potential_acoustic, &
+      !                      b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+
+      ! acoustic kernels
+      call transfer_kernels_ac_to_host(Mesh_pointer,rho_ac_kl,kappa_ac_kl,NSPEC_AB)
     endif
 
     if( ELASTIC_SIMULATION ) then
       ! only in case needed...
       !call transfer_b_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
-      
+
       ! elastic kernels
-      call transfer_sensitivity_kernels_to_host(Mesh_pointer,rho_kl,mu_kl,kappa_kl,NSPEC_AB)
+      call transfer_kernels_el_to_host(Mesh_pointer,rho_kl,mu_kl,kappa_kl,NSPEC_AB)
     endif
 
     ! specific noise strength kernel
     if( NOISE_TOMOGRAPHY == 3 ) then
-      call transfer_sensitivity_kernels_to_host(Mesh_pointer,Sigma_kl,NSPEC_AB)  
+      call transfer_kernels_noise_to_host(Mesh_pointer,Sigma_kl,NSPEC_AB)
     endif
 
+    ! approximative hessian for preconditioning kernels
+    if ( APPROXIMATE_HESS_KL ) then
+      if( ELASTIC_SIMULATION ) call transfer_kernels_hess_el_tohost(Mesh_pointer,hess_kl,NSPEC_AB)
+      if( ACOUSTIC_SIMULATION ) call transfer_kernels_hess_ac_tohost(Mesh_pointer,hess_ac_kl,NSPEC_AB)
+    endif
+
   endif
 
-  
+
   ! frees allocated memory on GPU
   call prepare_cleanup_device(Mesh_pointer, &
-                              SIMULATION_TYPE,ACOUSTIC_SIMULATION,ELASTIC_SIMULATION, &
+                              SIMULATION_TYPE,SAVE_FORWARD, &
+                              ACOUSTIC_SIMULATION,ELASTIC_SIMULATION, &
                               ABSORBING_CONDITIONS,NOISE_TOMOGRAPHY,COMPUTE_AND_STORE_STRAIN, &
-                              ATTENUATION)
-  
+                              ATTENUATION,OCEANS, &
+                              APPROXIMATE_HESS_KL)
+
   end subroutine it_transfer_from_GPU

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_receivers.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_receivers.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,1091 +1,1091 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!----
-!---- locate_receivers finds the correct position of the receivers
-!----
-  subroutine locate_receivers(ibool,myrank,NSPEC_AB,NGLOB_AB, &
-                 xstore,ystore,zstore,xigll,yigll,zigll,rec_filename, &
-                 nrec,islice_selected_rec,ispec_selected_rec, &
-                 xi_receiver,eta_receiver,gamma_receiver,station_name,network_name,nu, &
-                 NPROC,utm_x_source,utm_y_source, &
-                 UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
-                 iglob_is_surface_external_mesh,ispec_is_surface_external_mesh, &
-                 num_free_surface_faces,free_surface_ispec,free_surface_ijk)
-
-  implicit none
-
-  include "constants.h"
-
-  logical SUPPRESS_UTM_PROJECTION
-
-  integer NPROC,UTM_PROJECTION_ZONE
-
-  integer nrec,myrank
-
-  integer NSPEC_AB,NGLOB_AB
-
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-
-! arrays containing coordinates of the points
-  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: xstore,ystore,zstore
-
-! for surface locating and normal computing with external mesh
-  integer :: pt0_ix,pt0_iy,pt0_iz,pt1_ix,pt1_iy,pt1_iz,pt2_ix,pt2_iy,pt2_iz
-  integer :: num_free_surface_faces
-  real(kind=CUSTOM_REAL), dimension(3) :: u_vector,v_vector,w_vector
-  logical, dimension(NGLOB_AB) :: iglob_is_surface_external_mesh
-  logical, dimension(NSPEC_AB) :: ispec_is_surface_external_mesh
-  integer, dimension(num_free_surface_faces) :: free_surface_ispec
-  integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
-
-  integer, allocatable, dimension(:) :: ix_initial_guess,iy_initial_guess,iz_initial_guess
-
-  integer iprocloop
-  integer ios
-
-  double precision,dimension(1) :: altitude_rec,distmin_ele
-  double precision,dimension(4) :: elevation_node,dist_node
-  double precision,dimension(NPROC) :: distmin_ele_all,elevation_all
-  double precision, allocatable, dimension(:) :: x_target,y_target,z_target
-  double precision, allocatable, dimension(:) :: horiz_dist
-  double precision, allocatable, dimension(:) :: x_found,y_found,z_found
-
-  integer irec
-  integer i,j,k,ispec,iglob,iface,inode,imin,imax,jmin,jmax,kmin,kmax,igll,jgll,kgll
-  integer iselected,jselected,iface_selected,iadjust,jadjust
-  integer iproc(1)
-
-  double precision utm_x_source,utm_y_source
-  double precision dist
-  double precision xi,eta,gamma,dx,dy,dz,dxi,deta,dgamma
-
-! Gauss-Lobatto-Legendre points of integration
-  double precision xigll(NGLLX)
-  double precision yigll(NGLLY)
-  double precision zigll(NGLLZ)
-
-! input receiver file name
-  character(len=*) rec_filename
-
-! topology of the control points of the surface element
-  integer iax,iay,iaz
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! coordinates of the control points of the surface element
-  double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
-
-  integer iter_loop,ispec_iterate
-
-  integer ia
-  double precision x,y,z
-  double precision xix,xiy,xiz
-  double precision etax,etay,etaz
-  double precision gammax,gammay,gammaz
-
-! timer MPI
-  double precision, external :: wtime
-  double precision time_start,tCPU
-
-! use dynamic allocation
-  double precision, dimension(:), allocatable :: final_distance
-  double precision distmin,final_distance_max
-
-! receiver information
-! timing information for the stations
-! station information for writing the seismograms
-
-  integer :: iglob_selected
-  integer, dimension(nrec) :: islice_selected_rec,ispec_selected_rec
-  double precision, dimension(nrec) :: xi_receiver,eta_receiver,gamma_receiver
-  double precision, dimension(3,3,nrec) :: nu
-  character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
-  character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
-  double precision, allocatable, dimension(:) :: stlat,stlon,stele,stbur,stutm_x,stutm_y,elevation
-
-  double precision, allocatable, dimension(:) :: x_found_all,y_found_all,z_found_all
-  double precision, dimension(:), allocatable :: final_distance_all
-  integer, allocatable, dimension(:) :: ispec_selected_rec_all
-  double precision, allocatable, dimension(:) :: xi_receiver_all,eta_receiver_all,gamma_receiver_all
-  double precision, allocatable, dimension(:,:,:) :: nu_all
-
-  integer :: ier
-  character(len=256) OUTPUT_FILES
-
-  real(kind=CUSTOM_REAL) :: xmin,xmax,ymin,ymax,zmin,zmax
-  real(kind=CUSTOM_REAL) :: xmin_ELE,xmax_ELE,ymin_ELE,ymax_ELE,zmin_ELE,zmax_ELE
-  integer :: imin_temp,imax_temp,jmin_temp,jmax_temp,kmin_temp,kmax_temp
-  integer,dimension(NGLLX*NGLLY*NGLLZ) :: iglob_temp
-
-! **************
-
-  ! dimension of model in current proc
-  xmin=minval(xstore(:));          xmax=maxval(xstore(:))
-  ymin=minval(ystore(:));          ymax=maxval(ystore(:))
-  zmin=minval(zstore(:));          zmax=maxval(zstore(:))
-  if(FASTER_RECEIVERS_POINTS_ONLY) then
-    imin_temp = 1; imax_temp = NGLLX
-    jmin_temp = 1; jmax_temp = NGLLY
-    kmin_temp = 1; kmax_temp = NGLLZ
-  else
-    imin_temp = 2; imax_temp = NGLLX - 1
-    jmin_temp = 2; jmax_temp = NGLLY - 1
-    kmin_temp = 2; kmax_temp = NGLLZ - 1
-  endif
-
-  ! get MPI starting time
-  time_start = wtime()
-
-  if(myrank == 0) then
-    write(IMAIN,*)
-    write(IMAIN,*) '********************'
-    write(IMAIN,*) ' locating receivers'
-    write(IMAIN,*) '********************'
-    write(IMAIN,*)
-  endif
-
-  ! define topology of the control element
-  call usual_hex_nodes(iaddx,iaddy,iaddz)
-
-  if(myrank == 0) then
-    write(IMAIN,*)
-    write(IMAIN,*) '*****************************************************************'
-    write(IMAIN,'(1x,a,a,a)') 'reading receiver information from ', trim(rec_filename), ' file'
-    write(IMAIN,*) '*****************************************************************'
-  endif
-
-  ! get number of stations from receiver file
-  open(unit=1,file=trim(rec_filename),status='old',action='read',iostat=ios)
-  if (ios /= 0) call exit_mpi(myrank,'error opening file '//trim(rec_filename))
-
-  ! allocate memory for arrays using number of stations
-  allocate(stlat(nrec), &
-          stlon(nrec), &
-          stele(nrec), &
-          stbur(nrec), &
-          stutm_x(nrec), &
-          stutm_y(nrec), &
-          horiz_dist(nrec), &
-          elevation(nrec), &
-          ix_initial_guess(nrec), &
-          iy_initial_guess(nrec), &
-          iz_initial_guess(nrec), &
-          x_target(nrec), &
-          y_target(nrec), &
-          z_target(nrec), &
-          x_found(nrec), &
-          y_found(nrec), &
-          z_found(nrec), &
-          final_distance(nrec), &
-          ispec_selected_rec_all(nrec), &
-          xi_receiver_all(nrec), &
-          eta_receiver_all(nrec), &
-          gamma_receiver_all(nrec), &
-          x_found_all(nrec), &
-          y_found_all(nrec), &
-          z_found_all(nrec), &
-          final_distance_all(nrec), &
-          nu_all(3,3,nrec),stat=ier)
-  if( ier /= 0 ) stop 'error allocating arrays for locating receivers'
-
-  ! loop on all the stations
-  do irec=1,nrec
-
-    read(1,*,iostat=ios) station_name(irec),network_name(irec),stlat(irec),stlon(irec),stele(irec),stbur(irec)
-    if (ios /= 0) call exit_mpi(myrank, 'Error reading station file '//trim(rec_filename))
-
-    ! convert station location to UTM
-    call utm_geo(stlon(irec),stlat(irec),stutm_x(irec),stutm_y(irec),&
-                UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
-
-    ! compute horizontal distance between source and receiver in km
-    horiz_dist(irec) = dsqrt((stutm_y(irec)-utm_y_source)**2 + (stutm_x(irec)-utm_x_source)**2) / 1000.
-
-    ! print some information about stations
-    if(myrank == 0) &
-        write(IMAIN,*) 'Station #',irec,': ',station_name(irec)(1:len_trim(station_name(irec))), &
-                       '.',network_name(irec)(1:len_trim(network_name(irec))), &
-                       '    horizontal distance:  ',sngl(horiz_dist(irec)),' km'
-
-    ! get approximate topography elevation at source long/lat coordinates
-    !   set distance to huge initial value
-    distmin = HUGEVAL
-    if(num_free_surface_faces > 0) then
-      iglob_selected = 1
-      ! loop only on points inside the element
-      ! exclude edges to ensure this point is not shared with other elements
-      imin = 2
-      imax = NGLLX - 1
-      jmin = 2
-      jmax = NGLLY - 1
-      iselected = 0
-      jselected = 0
-      iface_selected = 0
-      do iface=1,num_free_surface_faces
-        do j=jmin,jmax
-          do i=imin,imax
-
-            ispec = free_surface_ispec(iface)
-            igll = free_surface_ijk(1,(j-1)*NGLLY+i,iface)
-            jgll = free_surface_ijk(2,(j-1)*NGLLY+i,iface)
-            kgll = free_surface_ijk(3,(j-1)*NGLLY+i,iface)
-            iglob = ibool(igll,jgll,kgll,ispec)
-
-            ! keep this point if it is closer to the receiver
-            dist = dsqrt((stutm_x(irec)-dble(xstore(iglob)))**2 + &
-                       (stutm_y(irec)-dble(ystore(iglob)))**2)
-            if(dist < distmin) then
-              distmin = dist
-              iglob_selected = iglob
-              iface_selected = iface
-              iselected = i
-              jselected = j
-              altitude_rec(1) = zstore(iglob_selected)
-            endif
-          enddo
-        enddo
-      ! end of loop on all the elements on the free surface
-      end do
-      !  weighted mean at current point of topography elevation of the four closest nodes
-      !  set distance to huge initial value
-      distmin = HUGEVAL
-      do j=jselected,jselected+1
-        do i=iselected,iselected+1
-          inode = 1
-          do jadjust=0,1
-            do iadjust= 0,1
-              ispec = free_surface_ispec(iface_selected)
-              igll = free_surface_ijk(1,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
-              jgll = free_surface_ijk(2,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
-              kgll = free_surface_ijk(3,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
-              iglob = ibool(igll,jgll,kgll,ispec)
-
-              elevation_node(inode) = zstore(iglob)
-              dist_node(inode) = dsqrt((stutm_x(irec)-dble(xstore(iglob)))**2 + &
-                          (stutm_y(irec)-dble(ystore(iglob)))**2)
-              inode = inode + 1
-            end do
-          end do
-          dist = sum(dist_node)
-          if(dist < distmin) then
-            distmin = dist
-            altitude_rec(1) = (dist_node(1)/dist)*elevation_node(1) + &
-                       (dist_node(2)/dist)*elevation_node(2) + &
-                       (dist_node(3)/dist)*elevation_node(3) + &
-                       (dist_node(4)/dist)*elevation_node(4)
-          endif
-        end do
-      end do
-    end if
-    !  MPI communications to determine the best slice
-    distmin_ele(1)= distmin
-    call gather_all_dp(distmin_ele,1,distmin_ele_all,1,NPROC)
-    call gather_all_dp(altitude_rec,1,elevation_all,1,NPROC)
-    if(myrank == 0) then
-      iproc = minloc(distmin_ele_all)
-      altitude_rec(1) = elevation_all(iproc(1))
-    end if
-    call bcast_all_dp(altitude_rec,1)
-    elevation(irec) = altitude_rec(1)
-
-    ! reset distance to huge initial value
-    distmin=HUGEVAL
-
-!     get the Cartesian components of n in the model: nu
-
-    ! orientation consistent with the UTM projection
-    ! X coordinate - East
-    nu(1,1,irec) = 1.d0
-    nu(1,2,irec) = 0.d0
-    nu(1,3,irec) = 0.d0
-    ! Y coordinate - North
-    nu(2,1,irec) = 0.d0
-    nu(2,2,irec) = 1.d0
-    nu(2,3,irec) = 0.d0
-    ! Z coordinate - Vertical
-    nu(3,1,irec) = 0.d0
-    nu(3,2,irec) = 0.d0
-    nu(3,3,irec) = 1.d0
-
-    x_target(irec) = stutm_x(irec)
-    y_target(irec) = stutm_y(irec)
-
-    ! receiver's Z coordinate
-    if( USE_SOURCES_RECVS_Z ) then
-      ! alternative: burial depth is given as z value directly
-      z_target(irec) = stbur(irec)
-    else
-      ! burial depth in STATIONS file given in m
-      z_target(irec) = elevation(irec) - stbur(irec)
-    endif
-    !if (myrank == 0) write(IOVTK,*) x_target(irec), y_target(irec), z_target(irec)
-
-    if (.not. SU_FORMAT) then
-       ! determines closest GLL point
-       ispec_selected_rec(irec) = 0
-       do ispec=1,NSPEC_AB
-
-         ! define the interval in which we look for points
-         if(FASTER_RECEIVERS_POINTS_ONLY) then
-           imin = 1
-           imax = NGLLX
-
-           jmin = 1
-           jmax = NGLLY
-
-           kmin = 1
-           kmax = NGLLZ
-
-         else
-           ! loop only on points inside the element
-           ! exclude edges to ensure this point is not shared with other elements
-           imin = 2
-           imax = NGLLX - 1
-
-           jmin = 2
-           jmax = NGLLY - 1
-
-           kmin = 2
-           kmax = NGLLZ - 1
-         endif
-
-         do k = kmin,kmax
-           do j = jmin,jmax
-             do i = imin,imax
-
-               iglob = ibool(i,j,k,ispec)
-
-               if (.not. RECVS_CAN_BE_BURIED_EXT_MESH) then
-                 if ((.not. iglob_is_surface_external_mesh(iglob)) .or. (.not. ispec_is_surface_external_mesh(ispec))) then
-                   cycle
-                 endif
-               endif
-
-               dist = dsqrt((x_target(irec)-dble(xstore(iglob)))**2 &
-                           +(y_target(irec)-dble(ystore(iglob)))**2 &
-                           +(z_target(irec)-dble(zstore(iglob)))**2)
-
-               ! keep this point if it is closer to the receiver
-               if(dist < distmin) then
-                 distmin = dist
-                 ispec_selected_rec(irec) = ispec
-                 ix_initial_guess(irec) = i
-                 iy_initial_guess(irec) = j
-                 iz_initial_guess(irec) = k
-
-                 xi_receiver(irec) = dble(ix_initial_guess(irec))
-                 eta_receiver(irec) = dble(iy_initial_guess(irec))
-                 gamma_receiver(irec) = dble(iz_initial_guess(irec))
-                 x_found(irec) = xstore(iglob)
-                 y_found(irec) = ystore(iglob)
-                 z_found(irec) = zstore(iglob)
-               endif
-
-             enddo
-           enddo
-         enddo
-
-         ! compute final distance between asked and found (converted to km)
-         final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
-           (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
-
-       ! end of loop on all the spectral elements in current slice
-       enddo
-    else
-       ispec_selected_rec(irec) = 0
-       ix_initial_guess(irec) = 0
-       iy_initial_guess(irec) = 0
-       iz_initial_guess(irec) = 0
-       final_distance(irec) = HUGEVAL
-       if (  (x_target(irec)>=xmin .and. x_target(irec)<=xmax) .and. &
-             (y_target(irec)>=ymin .and. y_target(irec)<=ymax) .and. &
-             (z_target(irec)>=zmin .and. z_target(irec)<=zmax) ) then
-          do ispec=1,NSPEC_AB
-             iglob_temp=reshape(ibool(:,:,:,ispec),(/NGLLX*NGLLY*NGLLZ/))
-             xmin_ELE=minval(xstore(iglob_temp))
-             xmax_ELE=maxval(xstore(iglob_temp))
-             ymin_ELE=minval(ystore(iglob_temp))
-             ymax_ELE=maxval(ystore(iglob_temp))
-             zmin_ELE=minval(zstore(iglob_temp))
-             zmax_ELE=maxval(zstore(iglob_temp))
-             if (  (x_target(irec)>=xmin_ELE .and. x_target(irec)<=xmax_ELE) .and. &
-                   (y_target(irec)>=ymin_ELE .and. y_target(irec)<=ymax_ELE) .and. &
-                   (z_target(irec)>=zmin_ELE .and. z_target(irec)<=zmax_ELE) ) then
-                ! we find the element (ispec) which "may" contain the receiver (irec)
-                ! so we only need to compute distances (which is expensive because of "dsqrt") within those elements
-                ispec_selected_rec(irec) = ispec
-                do k = kmin_temp,kmax_temp
-                  do j = jmin_temp,jmax_temp
-                    do i = imin_temp,imax_temp
-                      iglob = ibool(i,j,k,ispec)
-                      ! for comparison purpose, we don't have to do "dsqrt", which is expensive
-                      dist =      ((x_target(irec)-dble(xstore(iglob)))**2 &
-                                  +(y_target(irec)-dble(ystore(iglob)))**2 &
-                                  +(z_target(irec)-dble(zstore(iglob)))**2)
-                      if(dist < distmin) then
-                        distmin = dist
-                        ix_initial_guess(irec) = i
-                        iy_initial_guess(irec) = j
-                        iz_initial_guess(irec) = k
-                        xi_receiver(irec) = dble(ix_initial_guess(irec))
-                        eta_receiver(irec) = dble(iy_initial_guess(irec))
-                        gamma_receiver(irec) = dble(iz_initial_guess(irec))
-                        x_found(irec) = xstore(iglob)
-                        y_found(irec) = ystore(iglob)
-                        z_found(irec) = zstore(iglob)
-                      endif
-                    enddo
-                  enddo
-                enddo
-                final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
-                  (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
-             endif ! if receiver "may" be within this element
-          enddo ! do ispec=1,NSPEC_AB
-       endif ! if receiver "may" be within this proc
-    endif !if (.not. SU_FORMAT)
-
-    if (ispec_selected_rec(irec) == 0) then
-      ! receiver is NOT within this proc, assign trivial values
-      ispec_selected_rec(irec) = 1
-      ix_initial_guess(irec) = 1
-      iy_initial_guess(irec) = 1
-      iz_initial_guess(irec) = 1
-      final_distance(irec) = HUGEVAL
-    endif
-
-    ! get normal to the face of the hexaedra if receiver is on the surface
-    if ((.not. RECVS_CAN_BE_BURIED_EXT_MESH) .and. &
-       .not. (ispec_selected_rec(irec) == 0)) then
-      pt0_ix = -1
-      pt0_iy = -1
-      pt0_iz = -1
-      pt1_ix = -1
-      pt1_iy = -1
-      pt1_iz = -1
-      pt2_ix = -1
-      pt2_iy = -1
-      pt2_iz = -1
-      ! we get two vectors of the face (three points) to compute the normal
-      if (ix_initial_guess(irec) == 1 .and. &
-         iglob_is_surface_external_mesh(ibool(1,2,2,ispec_selected_rec(irec)))) then
-        pt0_ix = 1
-        pt0_iy = NGLLY
-        pt0_iz = 1
-        pt1_ix = 1
-        pt1_iy = 1
-        pt1_iz = 1
-        pt2_ix = 1
-        pt2_iy = NGLLY
-        pt2_iz = NGLLZ
-      endif
-      if (ix_initial_guess(irec) == NGLLX .and. &
-         iglob_is_surface_external_mesh(ibool(NGLLX,2,2,ispec_selected_rec(irec)))) then
-        pt0_ix = NGLLX
-        pt0_iy = 1
-        pt0_iz = 1
-        pt1_ix = NGLLX
-        pt1_iy = NGLLY
-        pt1_iz = 1
-        pt2_ix = NGLLX
-        pt2_iy = 1
-        pt2_iz = NGLLZ
-      endif
-      if (iy_initial_guess(irec) == 1 .and. &
-         iglob_is_surface_external_mesh(ibool(2,1,2,ispec_selected_rec(irec)))) then
-        pt0_ix = 1
-        pt0_iy = 1
-        pt0_iz = 1
-        pt1_ix = NGLLX
-        pt1_iy = 1
-        pt1_iz = 1
-        pt2_ix = 1
-        pt2_iy = 1
-        pt2_iz = NGLLZ
-      endif
-      if (iy_initial_guess(irec) == NGLLY .and. &
-         iglob_is_surface_external_mesh(ibool(2,NGLLY,2,ispec_selected_rec(irec)))) then
-        pt0_ix = NGLLX
-        pt0_iy = NGLLY
-        pt0_iz = 1
-        pt1_ix = 1
-        pt1_iy = NGLLY
-        pt1_iz = 1
-        pt2_ix = NGLLX
-        pt2_iy = NGLLY
-        pt2_iz = NGLLZ
-      endif
-      if (iz_initial_guess(irec) == 1 .and. &
-         iglob_is_surface_external_mesh(ibool(2,2,1,ispec_selected_rec(irec)))) then
-        pt0_ix = NGLLX
-        pt0_iy = 1
-        pt0_iz = 1
-        pt1_ix = 1
-        pt1_iy = 1
-        pt1_iz = 1
-        pt2_ix = NGLLX
-        pt2_iy = NGLLY
-        pt2_iz = 1
-      endif
-      if (iz_initial_guess(irec) == NGLLZ .and. &
-         iglob_is_surface_external_mesh(ibool(2,2,NGLLZ,ispec_selected_rec(irec)))) then
-        pt0_ix = 1
-        pt0_iy = 1
-        pt0_iz = NGLLZ
-        pt1_ix = NGLLX
-        pt1_iy = 1
-        pt1_iz = NGLLZ
-        pt2_ix = 1
-        pt2_iy = NGLLY
-        pt2_iz = NGLLZ
-      endif
-
-      if (pt0_ix<0 .or.pt0_iy<0 .or. pt0_iz<0 .or. &
-         pt1_ix<0 .or. pt1_iy<0 .or. pt1_iz<0 .or. &
-         pt2_ix<0 .or. pt2_iy<0 .or. pt2_iz<0) then
-        stop 'error in computing normal for receivers.'
-      endif
-
-      u_vector(1) = xstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
-           - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
-      u_vector(2) = ystore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
-           - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
-      u_vector(3) = zstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
-           - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
-      v_vector(1) = xstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
-           - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
-      v_vector(2) = ystore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
-           - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
-      v_vector(3) = zstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
-           - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
-
-      ! cross product
-      w_vector(1) = u_vector(2)*v_vector(3) - u_vector(3)*v_vector(2)
-      w_vector(2) = u_vector(3)*v_vector(1) - u_vector(1)*v_vector(3)
-      w_vector(3) = u_vector(1)*v_vector(2) - u_vector(2)*v_vector(1)
-
-      ! normalize vector w
-      w_vector(:) = w_vector(:)/sqrt(w_vector(1)**2+w_vector(2)**2+w_vector(3)**2)
-
-      ! build the two other vectors for a direct base: we normalize u, and v=w^u
-      u_vector(:) = u_vector(:)/sqrt(u_vector(1)**2+u_vector(2)**2+u_vector(3)**2)
-      v_vector(1) = w_vector(2)*u_vector(3) - w_vector(3)*u_vector(2)
-      v_vector(2) = w_vector(3)*u_vector(1) - w_vector(1)*u_vector(3)
-      v_vector(3) = w_vector(1)*u_vector(2) - w_vector(2)*u_vector(1)
-
-      ! build rotation matrice nu for seismograms
-      if (EXT_MESH_RECV_NORMAL) then
-        !     East (u)
-        nu(1,1,irec) = u_vector(1)
-        nu(1,2,irec) = v_vector(1)
-        nu(1,3,irec) = w_vector(1)
-
-        !     North (v)
-        nu(2,1,irec) = u_vector(2)
-        nu(2,2,irec) = v_vector(2)
-        nu(2,3,irec) = w_vector(2)
-
-        !     Vertical (w)
-        nu(3,1,irec) = u_vector(3)
-        nu(3,2,irec) = v_vector(3)
-        nu(3,3,irec) = w_vector(3)
-      else
-        !     East
-        nu(1,1,irec) = 1.d0
-        nu(1,2,irec) = 0.d0
-        nu(1,3,irec) = 0.d0
-
-        !     North
-        nu(2,1,irec) = 0.d0
-        nu(2,2,irec) = 1.d0
-        nu(2,3,irec) = 0.d0
-
-        !     Vertical
-        nu(3,1,irec) = 0.d0
-        nu(3,2,irec) = 0.d0
-        nu(3,3,irec) = 1.d0
-      endif
-
-    endif ! of if (.not. RECVS_CAN_BE_BURIED_EXT_MESH)
-
-  ! end of loop on all the stations
-  enddo
-
-  ! close receiver file
-  close(1)
-
-! ****************************************
-! find the best (xi,eta,gamma) for each receiver
-! ****************************************
-
-  if(.not. FASTER_RECEIVERS_POINTS_ONLY) then
-
-    ! loop on all the receivers to iterate in that slice
-    do irec = 1,nrec
-
-      ispec_iterate = ispec_selected_rec(irec)
-
-      ! use initial guess in xi and eta
-      xi = xigll(ix_initial_guess(irec))
-      eta = yigll(iy_initial_guess(irec))
-      gamma = zigll(iz_initial_guess(irec))
-
-      ! define coordinates of the control points of the element
-      do ia=1,NGNOD
-        iax = 0
-        iay = 0
-        iaz = 0
-        if(iaddx(ia) == 0) then
-          iax = 1
-        else if(iaddx(ia) == 1) then
-          iax = (NGLLX+1)/2
-        else if(iaddx(ia) == 2) then
-          iax = NGLLX
-        else
-          call exit_MPI(myrank,'incorrect value of iaddx')
-        endif
-
-        if(iaddy(ia) == 0) then
-          iay = 1
-        else if(iaddy(ia) == 1) then
-          iay = (NGLLY+1)/2
-        else if(iaddy(ia) == 2) then
-          iay = NGLLY
-        else
-          call exit_MPI(myrank,'incorrect value of iaddy')
-        endif
-
-        if(iaddz(ia) == 0) then
-          iaz = 1
-        else if(iaddz(ia) == 1) then
-          iaz = (NGLLZ+1)/2
-        else if(iaddz(ia) == 2) then
-          iaz = NGLLZ
-        else
-          call exit_MPI(myrank,'incorrect value of iaddz')
-        endif
-
-        iglob = ibool(iax,iay,iaz,ispec_iterate)
-        xelm(ia) = dble(xstore(iglob))
-        yelm(ia) = dble(ystore(iglob))
-        zelm(ia) = dble(zstore(iglob))
-
-      enddo
-
-      ! iterate to solve the non linear system
-      do iter_loop = 1,NUM_ITER
-
-        ! impose receiver exactly at the surface
-        !    gamma = 1.d0
-
-        ! recompute jacobian for the new point
-        call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
-                xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
-
-        ! compute distance to target location
-        dx = - (x - x_target(irec))
-        dy = - (y - y_target(irec))
-        dz = - (z - z_target(irec))
-
-        ! compute increments
-        ! gamma does not change since we know the receiver is exactly on the surface
-        dxi  = xix*dx + xiy*dy + xiz*dz
-        deta = etax*dx + etay*dy + etaz*dz
-        dgamma = gammax*dx + gammay*dy + gammaz*dz
-
-        ! update values
-        xi = xi + dxi
-        eta = eta + deta
-        gamma = gamma + dgamma
-
-        ! impose that we stay in that element
-        ! (useful if user gives a receiver outside the mesh for instance)
-        ! we can go slightly outside the [1,1] segment since with finite elements
-        ! the polynomial solution is defined everywhere
-        ! this can be useful for convergence of itertive scheme with distorted elements
-        if (xi > 1.10d0) xi = 1.10d0
-        if (xi < -1.10d0) xi = -1.10d0
-        if (eta > 1.10d0) eta = 1.10d0
-        if (eta < -1.10d0) eta = -1.10d0
-        if (gamma > 1.10d0) gamma = 1.10d0
-        if (gamma < -1.10d0) gamma = -1.10d0
-
-      ! end of non linear iterations
-      enddo
-
-      ! impose receiver exactly at the surface after final iteration
-      !  gamma = 1.d0
-
-      ! compute final coordinates of point found
-      call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
-
-      ! store xi,eta and x,y,z of point found
-      xi_receiver(irec) = xi
-      eta_receiver(irec) = eta
-      gamma_receiver(irec) = gamma
-      x_found(irec) = x
-      y_found(irec) = y
-      z_found(irec) = z
-
-      ! compute final distance between asked and found (converted to km)
-      final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
-        (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
-
-    enddo
-
-  endif ! of if (.not. FASTER_RECEIVERS_POINTS_ONLY)
-
-  ! synchronize all the processes to make sure all the estimates are available
-  call sync_all()
-  ! for MPI version, gather information from all the nodes
-  if (myrank/=0) then ! gather information from other processors (one at a time)
-     call send_i(ispec_selected_rec, nrec,0,0)
-     call send_dp(xi_receiver,       nrec,0,1)
-     call send_dp(eta_receiver,      nrec,0,2)
-     call send_dp(gamma_receiver,    nrec,0,3)
-     call send_dp(final_distance,    nrec,0,4)
-     call send_dp(x_found,           nrec,0,5)
-     call send_dp(y_found,           nrec,0,6)
-     call send_dp(z_found,           nrec,0,7)
-     call send_dp(nu,            3*3*nrec,0,8)
-  else
-     islice_selected_rec(:) = 0
-     do iprocloop=1,NPROC-1
-        call recv_i(ispec_selected_rec_all, nrec,iprocloop,0)
-        call recv_dp(xi_receiver_all,       nrec,iprocloop,1)
-        call recv_dp(eta_receiver_all,      nrec,iprocloop,2)
-        call recv_dp(gamma_receiver_all,    nrec,iprocloop,3)
-        call recv_dp(final_distance_all,    nrec,iprocloop,4)
-        call recv_dp(x_found_all,           nrec,iprocloop,5)
-        call recv_dp(y_found_all,           nrec,iprocloop,6)
-        call recv_dp(z_found_all,           nrec,iprocloop,7)
-        call recv_dp(nu_all,            3*3*nrec,iprocloop,8)
-        do irec=1,nrec
-           if (final_distance_all(irec) < final_distance(irec)) then
-              final_distance(irec) = final_distance_all(irec)
-              islice_selected_rec(irec) = iprocloop
-              ispec_selected_rec(irec) = ispec_selected_rec_all(irec)
-              xi_receiver(irec) = xi_receiver_all(irec)
-              eta_receiver(irec) = eta_receiver_all(irec)
-              gamma_receiver(irec) = gamma_receiver_all(irec)
-              x_found(irec) = x_found_all(irec)
-              y_found(irec) = y_found_all(irec)
-              z_found(irec) = z_found_all(irec)
-              nu(:,:,irec) = nu_all(:,:,irec)
-           endif
-        enddo
-     enddo
-  endif
-  call sync_all()
-
-  ! this is executed by main process only
-  if(myrank == 0) then
-
-    do irec=1,nrec
-
-      write(IMAIN,*)
-      write(IMAIN,*) 'station # ',irec,'    ',station_name(irec),network_name(irec)
-
-      if(final_distance(irec) == HUGEVAL) call exit_MPI(myrank,'error locating receiver')
-
-      write(IMAIN,*) '     original latitude: ',sngl(stlat(irec))
-      write(IMAIN,*) '     original longitude: ',sngl(stlon(irec))
-      if( SUPPRESS_UTM_PROJECTION ) then
-        write(IMAIN,*) '     original x: ',sngl(stutm_x(irec))
-        write(IMAIN,*) '     original y: ',sngl(stutm_y(irec))
-      else
-        write(IMAIN,*) '     original UTM x: ',sngl(stutm_x(irec))
-        write(IMAIN,*) '     original UTM y: ',sngl(stutm_y(irec))
-      endif
-      if( USE_SOURCES_RECVS_Z ) then
-        write(IMAIN,*) '     original z: ',sngl(stbur(irec))
-      else
-        write(IMAIN,*) '     original depth: ',sngl(stbur(irec)),' m'
-      endif
-      write(IMAIN,*) '     horizontal distance: ',sngl(horiz_dist(irec))
-      write(IMAIN,*) '     target x, y, z: ',sngl(x_target(irec)),sngl(y_target(irec)),sngl(z_target(irec))
-
-      write(IMAIN,*) '     closest estimate found: ',sngl(final_distance(irec)),' m away'
-      write(IMAIN,*) '     in slice ',islice_selected_rec(irec),' in element ',ispec_selected_rec(irec)
-      if(FASTER_RECEIVERS_POINTS_ONLY) then
-        write(IMAIN,*) '     in point i,j,k = ',nint(xi_receiver(irec)),nint(eta_receiver(irec)),nint(gamma_receiver(irec))
-        write(IMAIN,*) '     nu1 = ',nu(1,:,irec)
-        write(IMAIN,*) '     nu2 = ',nu(2,:,irec)
-        write(IMAIN,*) '     nu3 = ',nu(3,:,irec)
-      else
-        write(IMAIN,*) '     at coordinates: '
-        write(IMAIN,*) '       xi    = ',xi_receiver(irec)
-        write(IMAIN,*) '       eta   = ',eta_receiver(irec)
-        write(IMAIN,*) '       gamma = ',gamma_receiver(irec)
-      endif
-      if( SUPPRESS_UTM_PROJECTION ) then
-        write(IMAIN,*) '         x: ',x_found(irec)
-        write(IMAIN,*) '         y: ',y_found(irec)
-      else
-        write(IMAIN,*) '     UTM x: ',x_found(irec)
-        write(IMAIN,*) '     UTM y: ',y_found(irec)
-      endif
-      if( USE_SOURCES_RECVS_Z ) then
-        write(IMAIN,*) '         z: ',z_found(irec)
-      else
-        write(IMAIN,*) '     depth: ',dabs(z_found(irec) - elevation(irec)),' m'
-        write(IMAIN,*) '         z: ',z_found(irec)
-      endif
-      write(IMAIN,*)
-
-
-      ! add warning if estimate is poor
-      ! (usually means receiver outside the mesh given by the user)
-      if(final_distance(irec) > 3000.d0) then
-        write(IMAIN,*) '*******************************************************'
-        write(IMAIN,*) '***** WARNING: receiver location estimate is poor *****'
-        write(IMAIN,*) '*******************************************************'
-      endif
-
-      write(IMAIN,*)
-
-    enddo
-
-    ! compute maximal distance for all the receivers
-    final_distance_max = maxval(final_distance(:))
-
-    ! display maximum error for all the receivers
-    write(IMAIN,*) 'maximum error in location of all the receivers: ',sngl(final_distance_max),' m'
-
-    ! add warning if estimate is poor
-    ! (usually means receiver outside the mesh given by the user)
-    if(final_distance_max > 1000.d0) then
-      write(IMAIN,*)
-      write(IMAIN,*) '************************************************************'
-      write(IMAIN,*) '************************************************************'
-      write(IMAIN,*) '***** WARNING: at least one receiver is poorly located *****'
-      write(IMAIN,*) '************************************************************'
-      write(IMAIN,*) '************************************************************'
-    endif
-
-    ! get the base pathname for output files
-    call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
-
-    !! write the list of stations and associated epicentral distance
-    !open(unit=27,file=trim(OUTPUT_FILES)//'/output_list_stations.txt',status='unknown')
-    !do irec=1,nrec
-    !  write(27,*) station_name(irec),'.',network_name(irec),' : ',horiz_dist(irec),' km horizontal distance'
-    !enddo
-    !close(27)
-
-    ! write the locations of stations, so that we can load them and write them to SU headers later
-    open(unit=IOUT_SU,file=trim(OUTPUT_FILES)//'/output_list_stations.txt',status='unknown')
-    do irec=1,nrec
-      write(IOUT_SU,*) x_found(irec),y_found(irec),z_found(irec)
-    enddo
-    close(IOUT_SU)
-
-    ! elapsed time since beginning of mesh generation
-    tCPU = wtime() - time_start
-    write(IMAIN,*)
-    write(IMAIN,*) 'Elapsed time for receiver detection in seconds = ',tCPU
-    write(IMAIN,*)
-    write(IMAIN,*) 'End of receiver detection - done'
-    write(IMAIN,*)
-
-  endif    ! end of section executed by main process only
-
-  ! main process broadcasts the results to all the slices
-  call bcast_all_i(islice_selected_rec,nrec)
-  call bcast_all_i(ispec_selected_rec,nrec)
-  call bcast_all_dp(xi_receiver,nrec)
-  call bcast_all_dp(eta_receiver,nrec)
-  call bcast_all_dp(gamma_receiver,nrec)
-  ! synchronize all the processes to make sure everybody has finished
-  call sync_all()
-
-  ! deallocate arrays
-  deallocate(stlat)
-  deallocate(stlon)
-  deallocate(stele)
-  deallocate(stbur)
-  deallocate(stutm_x)
-  deallocate(stutm_y)
-  deallocate(horiz_dist)
-  deallocate(ix_initial_guess)
-  deallocate(iy_initial_guess)
-  deallocate(iz_initial_guess)
-  deallocate(x_target)
-  deallocate(y_target)
-  deallocate(z_target)
-  deallocate(x_found)
-  deallocate(y_found)
-  deallocate(z_found)
-  deallocate(final_distance)
-  deallocate(ispec_selected_rec_all)
-  deallocate(xi_receiver_all)
-  deallocate(eta_receiver_all)
-  deallocate(gamma_receiver_all)
-  deallocate(x_found_all)
-  deallocate(y_found_all)
-  deallocate(z_found_all)
-  deallocate(final_distance_all)
-
-  end subroutine locate_receivers
-
-!=====================================================================
-
-
-  subroutine station_filter(SUPPRESS_UTM_PROJECTION,UTM_PROJECTION_ZONE,myrank,filename,filtered_filename,nfilter, &
-      LATITUDE_MIN, LATITUDE_MAX, LONGITUDE_MIN, LONGITUDE_MAX)
-
-  implicit none
-
-  include 'constants.h'
-
-! input
-  logical :: SUPPRESS_UTM_PROJECTION
-  integer :: UTM_PROJECTION_ZONE
-  integer :: myrank
-  character(len=*) :: filename,filtered_filename
-  double precision :: LATITUDE_MIN,LATITUDE_MAX,LONGITUDE_MIN,LONGITUDE_MAX
-
-! output
-  integer :: nfilter
-
-  integer :: nrec, nrec_filtered, ios
-
-  double precision :: stlat,stlon,stele,stbur,stutm_x,stutm_y
-  double precision :: minlat,minlon,maxlat,maxlon
-  character(len=MAX_LENGTH_STATION_NAME) :: station_name
-  character(len=MAX_LENGTH_NETWORK_NAME) :: network_name
-  character(len=256) :: dummystring
-
-  nrec = 0
-  nrec_filtered = 0
-
-  ! counts number of lines in stations file
-  open(unit=IIN, file=trim(filename), status = 'old', iostat = ios)
-  if (ios /= 0) call exit_mpi(myrank, 'No file '//trim(filename)//', exit')
-  do while(ios == 0)
-    read(IIN,"(a256)",iostat = ios) dummystring
-    if(ios /= 0) exit
-
-    if( len_trim(dummystring) > 0 ) nrec = nrec + 1
-  enddo
-  close(IIN)
-
-  ! reads in station locations
-  open(unit=IIN, file=trim(filename), status = 'old', iostat = ios)
-  !do irec = 1,nrec
-  !    read(IIN,*) station_name,network_name,stlat,stlon,stele,stbur
-  do while(ios == 0)
-    read(IIN,"(a256)",iostat = ios) dummystring
-    if( ios /= 0 ) exit
-
-    ! counts number of stations in min/max region
-    if( len_trim(dummystring) > 0 ) then
-        dummystring = trim(dummystring)
-        read(dummystring, *) station_name, network_name, stlat, stlon, stele, stbur
-
-        ! convert station location to UTM
-        call utm_geo(stlon,stlat,stutm_x,stutm_y,&
-             UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
-
-        ! counts stations within lon/lat region
-        if( stutm_y >= LATITUDE_MIN .and. stutm_y <= LATITUDE_MAX .and. &
-           stutm_x >= LONGITUDE_MIN .and. stutm_x <= LONGITUDE_MAX) &
-          nrec_filtered = nrec_filtered + 1
-     endif
-  enddo
-  close(IIN)
-
-  ! writes out filtered stations file
-  if (myrank == 0) then
-    open(unit=IIN,file=trim(filename),status='old',action='read',iostat=ios)
-    open(unit=IOUT,file=trim(filtered_filename),status='unknown')
-    do while(ios == 0)
-      read(IIN,"(a256)",iostat = ios) dummystring
-      if( ios /= 0 ) exit
-
-      !read(IIN,*) station_name,network_name,stlat,stlon,stele,stbur
-      if( len_trim(dummystring) > 0 ) then
-        dummystring = trim(dummystring)
-        read(dummystring, *) station_name, network_name, stlat, stlon, stele, stbur
-
-        ! convert station location to UTM
-        call utm_geo(stlon,stlat,stutm_x,stutm_y,&
-             UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
-
-        if( stutm_y >= LATITUDE_MIN .and. stutm_y <= LATITUDE_MAX .and. &
-           stutm_x >= LONGITUDE_MIN .and. stutm_x <= LONGITUDE_MAX) then
-
-          ! w/out formating
-          ! write(IOUT,*) trim(station_name),' ',trim(network_name),' ',sngl(stlat), &
-          !              ' ',sngl(stlon), ' ',sngl(stele), ' ',sngl(stbur)
-
-          ! w/ specific format
-          write(IOUT,'(a10,1x,a10,4e18.6)') &
-                            trim(station_name),trim(network_name), &
-                            sngl(stlat),sngl(stlon),sngl(stele),sngl(stbur)
-
-       endif
-      end if
-    enddo
-    close(IIN)
-    close(IOUT)
-
-    write(IMAIN,*)
-    write(IMAIN,*) 'there are ',nrec,' stations in file ', trim(filename)
-    write(IMAIN,*) 'saving ',nrec_filtered,' stations inside the model in file ', trim(filtered_filename)
-    write(IMAIN,*) 'excluding ',nrec - nrec_filtered,' stations located outside the model'
-    write(IMAIN,*)
-
-    if( nrec_filtered < 1 ) then
-      write(IMAIN,*) 'error filtered stations:'
-      write(IMAIN,*) '  simulation needs at least 1 station but got ',nrec_filtered
-      write(IMAIN,*)
-      write(IMAIN,*) '  check that stations in file '//trim(filename)//' are within'
-
-      if( SUPPRESS_UTM_PROJECTION ) then
-        write(IMAIN,*) '    latitude min/max : ',LATITUDE_MIN,LATITUDE_MAX
-        write(IMAIN,*) '    longitude min/max: ',LONGITUDE_MIN,LONGITUDE_MAX
-      else
-        ! convert edge locations from UTM back to lat/lon
-        call utm_geo(minlon,minlat,LONGITUDE_MIN,LATITUDE_MIN,&
-             UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
-        call utm_geo(maxlon,maxlat,LONGITUDE_MAX,LATITUDE_MAX,&
-             UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
-        write(IMAIN,*) '    longitude min/max: ',minlon,maxlon
-        write(IMAIN,*) '    latitude min/max : ',minlat,maxlat
-        write(IMAIN,*) '    UTM x min/max: ',LONGITUDE_MIN,LONGITUDE_MAX
-        write(IMAIN,*) '    UTM y min/max : ',LATITUDE_MIN,LATITUDE_MAX
-      endif
-
-      write(IMAIN,*)
-    endif
-
-  endif
-
-  nfilter = nrec_filtered
-
-  end subroutine station_filter
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!----
+!---- locate_receivers finds the correct position of the receivers
+!----
+  subroutine locate_receivers(ibool,myrank,NSPEC_AB,NGLOB_AB, &
+                 xstore,ystore,zstore,xigll,yigll,zigll,rec_filename, &
+                 nrec,islice_selected_rec,ispec_selected_rec, &
+                 xi_receiver,eta_receiver,gamma_receiver,station_name,network_name,nu, &
+                 NPROC,utm_x_source,utm_y_source, &
+                 UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+                 iglob_is_surface_external_mesh,ispec_is_surface_external_mesh, &
+                 num_free_surface_faces,free_surface_ispec,free_surface_ijk)
+
+  implicit none
+
+  include "constants.h"
+
+  logical SUPPRESS_UTM_PROJECTION
+
+  integer NPROC,UTM_PROJECTION_ZONE
+
+  integer nrec,myrank
+
+  integer NSPEC_AB,NGLOB_AB
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+
+! arrays containing coordinates of the points
+  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: xstore,ystore,zstore
+
+! for surface locating and normal computing with external mesh
+  integer :: pt0_ix,pt0_iy,pt0_iz,pt1_ix,pt1_iy,pt1_iz,pt2_ix,pt2_iy,pt2_iz
+  integer :: num_free_surface_faces
+  real(kind=CUSTOM_REAL), dimension(3) :: u_vector,v_vector,w_vector
+  logical, dimension(NGLOB_AB) :: iglob_is_surface_external_mesh
+  logical, dimension(NSPEC_AB) :: ispec_is_surface_external_mesh
+  integer, dimension(num_free_surface_faces) :: free_surface_ispec
+  integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
+
+  integer, allocatable, dimension(:) :: ix_initial_guess,iy_initial_guess,iz_initial_guess
+
+  integer iprocloop
+  integer ios
+
+  double precision,dimension(1) :: altitude_rec,distmin_ele
+  double precision,dimension(4) :: elevation_node,dist_node
+  double precision,dimension(NPROC) :: distmin_ele_all,elevation_all
+  double precision, allocatable, dimension(:) :: x_target,y_target,z_target
+  double precision, allocatable, dimension(:) :: horiz_dist
+  double precision, allocatable, dimension(:) :: x_found,y_found,z_found
+
+  integer irec
+  integer i,j,k,ispec,iglob,iface,inode,imin,imax,jmin,jmax,kmin,kmax,igll,jgll,kgll
+  integer iselected,jselected,iface_selected,iadjust,jadjust
+  integer iproc(1)
+
+  double precision utm_x_source,utm_y_source
+  double precision dist
+  double precision xi,eta,gamma,dx,dy,dz,dxi,deta,dgamma
+
+! Gauss-Lobatto-Legendre points of integration
+  double precision xigll(NGLLX)
+  double precision yigll(NGLLY)
+  double precision zigll(NGLLZ)
+
+! input receiver file name
+  character(len=*) rec_filename
+
+! topology of the control points of the surface element
+  integer iax,iay,iaz
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! coordinates of the control points of the surface element
+  double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
+
+  integer iter_loop,ispec_iterate
+
+  integer ia
+  double precision x,y,z
+  double precision xix,xiy,xiz
+  double precision etax,etay,etaz
+  double precision gammax,gammay,gammaz
+
+! timer MPI
+  double precision, external :: wtime
+  double precision time_start,tCPU
+
+! use dynamic allocation
+  double precision, dimension(:), allocatable :: final_distance
+  double precision distmin,final_distance_max
+
+! receiver information
+! timing information for the stations
+! station information for writing the seismograms
+
+  integer :: iglob_selected
+  integer, dimension(nrec) :: islice_selected_rec,ispec_selected_rec
+  double precision, dimension(nrec) :: xi_receiver,eta_receiver,gamma_receiver
+  double precision, dimension(3,3,nrec) :: nu
+  character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
+  character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
+  double precision, allocatable, dimension(:) :: stlat,stlon,stele,stbur,stutm_x,stutm_y,elevation
+
+  double precision, allocatable, dimension(:) :: x_found_all,y_found_all,z_found_all
+  double precision, dimension(:), allocatable :: final_distance_all
+  integer, allocatable, dimension(:) :: ispec_selected_rec_all
+  double precision, allocatable, dimension(:) :: xi_receiver_all,eta_receiver_all,gamma_receiver_all
+  double precision, allocatable, dimension(:,:,:) :: nu_all
+
+  integer :: ier
+  character(len=256) OUTPUT_FILES
+
+  real(kind=CUSTOM_REAL) :: xmin,xmax,ymin,ymax,zmin,zmax
+  real(kind=CUSTOM_REAL) :: xmin_ELE,xmax_ELE,ymin_ELE,ymax_ELE,zmin_ELE,zmax_ELE
+  integer :: imin_temp,imax_temp,jmin_temp,jmax_temp,kmin_temp,kmax_temp
+  integer,dimension(NGLLX*NGLLY*NGLLZ) :: iglob_temp
+
+! **************
+
+  ! dimension of model in current proc
+  xmin=minval(xstore(:));          xmax=maxval(xstore(:))
+  ymin=minval(ystore(:));          ymax=maxval(ystore(:))
+  zmin=minval(zstore(:));          zmax=maxval(zstore(:))
+  if(FASTER_RECEIVERS_POINTS_ONLY) then
+    imin_temp = 1; imax_temp = NGLLX
+    jmin_temp = 1; jmax_temp = NGLLY
+    kmin_temp = 1; kmax_temp = NGLLZ
+  else
+    imin_temp = 2; imax_temp = NGLLX - 1
+    jmin_temp = 2; jmax_temp = NGLLY - 1
+    kmin_temp = 2; kmax_temp = NGLLZ - 1
+  endif
+
+  ! get MPI starting time
+  time_start = wtime()
+
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) '********************'
+    write(IMAIN,*) ' locating receivers'
+    write(IMAIN,*) '********************'
+    write(IMAIN,*)
+  endif
+
+  ! define topology of the control element
+  call usual_hex_nodes(iaddx,iaddy,iaddz)
+
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) '*****************************************************************'
+    write(IMAIN,'(1x,a,a,a)') 'reading receiver information from ', trim(rec_filename), ' file'
+    write(IMAIN,*) '*****************************************************************'
+  endif
+
+  ! get number of stations from receiver file
+  open(unit=1,file=trim(rec_filename),status='old',action='read',iostat=ios)
+  if (ios /= 0) call exit_mpi(myrank,'error opening file '//trim(rec_filename))
+
+  ! allocate memory for arrays using number of stations
+  allocate(stlat(nrec), &
+          stlon(nrec), &
+          stele(nrec), &
+          stbur(nrec), &
+          stutm_x(nrec), &
+          stutm_y(nrec), &
+          horiz_dist(nrec), &
+          elevation(nrec), &
+          ix_initial_guess(nrec), &
+          iy_initial_guess(nrec), &
+          iz_initial_guess(nrec), &
+          x_target(nrec), &
+          y_target(nrec), &
+          z_target(nrec), &
+          x_found(nrec), &
+          y_found(nrec), &
+          z_found(nrec), &
+          final_distance(nrec), &
+          ispec_selected_rec_all(nrec), &
+          xi_receiver_all(nrec), &
+          eta_receiver_all(nrec), &
+          gamma_receiver_all(nrec), &
+          x_found_all(nrec), &
+          y_found_all(nrec), &
+          z_found_all(nrec), &
+          final_distance_all(nrec), &
+          nu_all(3,3,nrec),stat=ier)
+  if( ier /= 0 ) stop 'error allocating arrays for locating receivers'
+
+  ! loop on all the stations
+  do irec=1,nrec
+
+    read(1,*,iostat=ios) station_name(irec),network_name(irec),stlat(irec),stlon(irec),stele(irec),stbur(irec)
+    if (ios /= 0) call exit_mpi(myrank, 'Error reading station file '//trim(rec_filename))
+
+    ! convert station location to UTM
+    call utm_geo(stlon(irec),stlat(irec),stutm_x(irec),stutm_y(irec),&
+                UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
+
+    ! compute horizontal distance between source and receiver in km
+    horiz_dist(irec) = dsqrt((stutm_y(irec)-utm_y_source)**2 + (stutm_x(irec)-utm_x_source)**2) / 1000.
+
+    ! print some information about stations
+    if(myrank == 0) &
+        write(IMAIN,*) 'Station #',irec,': ',station_name(irec)(1:len_trim(station_name(irec))), &
+                       '.',network_name(irec)(1:len_trim(network_name(irec))), &
+                       '    horizontal distance:  ',sngl(horiz_dist(irec)),' km'
+
+    ! get approximate topography elevation at source long/lat coordinates
+    !   set distance to huge initial value
+    distmin = HUGEVAL
+    if(num_free_surface_faces > 0) then
+      iglob_selected = 1
+      ! loop only on points inside the element
+      ! exclude edges to ensure this point is not shared with other elements
+      imin = 2
+      imax = NGLLX - 1
+      jmin = 2
+      jmax = NGLLY - 1
+      iselected = 0
+      jselected = 0
+      iface_selected = 0
+      do iface=1,num_free_surface_faces
+        do j=jmin,jmax
+          do i=imin,imax
+
+            ispec = free_surface_ispec(iface)
+            igll = free_surface_ijk(1,(j-1)*NGLLY+i,iface)
+            jgll = free_surface_ijk(2,(j-1)*NGLLY+i,iface)
+            kgll = free_surface_ijk(3,(j-1)*NGLLY+i,iface)
+            iglob = ibool(igll,jgll,kgll,ispec)
+
+            ! keep this point if it is closer to the receiver
+            dist = dsqrt((stutm_x(irec)-dble(xstore(iglob)))**2 + &
+                       (stutm_y(irec)-dble(ystore(iglob)))**2)
+            if(dist < distmin) then
+              distmin = dist
+              iglob_selected = iglob
+              iface_selected = iface
+              iselected = i
+              jselected = j
+              altitude_rec(1) = zstore(iglob_selected)
+            endif
+          enddo
+        enddo
+      ! end of loop on all the elements on the free surface
+      end do
+      !  weighted mean at current point of topography elevation of the four closest nodes
+      !  set distance to huge initial value
+      distmin = HUGEVAL
+      do j=jselected,jselected+1
+        do i=iselected,iselected+1
+          inode = 1
+          do jadjust=0,1
+            do iadjust= 0,1
+              ispec = free_surface_ispec(iface_selected)
+              igll = free_surface_ijk(1,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+              jgll = free_surface_ijk(2,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+              kgll = free_surface_ijk(3,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+              iglob = ibool(igll,jgll,kgll,ispec)
+
+              elevation_node(inode) = zstore(iglob)
+              dist_node(inode) = dsqrt((stutm_x(irec)-dble(xstore(iglob)))**2 + &
+                          (stutm_y(irec)-dble(ystore(iglob)))**2)
+              inode = inode + 1
+            end do
+          end do
+          dist = sum(dist_node)
+          if(dist < distmin) then
+            distmin = dist
+            altitude_rec(1) = (dist_node(1)/dist)*elevation_node(1) + &
+                       (dist_node(2)/dist)*elevation_node(2) + &
+                       (dist_node(3)/dist)*elevation_node(3) + &
+                       (dist_node(4)/dist)*elevation_node(4)
+          endif
+        end do
+      end do
+    end if
+    !  MPI communications to determine the best slice
+    distmin_ele(1)= distmin
+    call gather_all_dp(distmin_ele,1,distmin_ele_all,1,NPROC)
+    call gather_all_dp(altitude_rec,1,elevation_all,1,NPROC)
+    if(myrank == 0) then
+      iproc = minloc(distmin_ele_all)
+      altitude_rec(1) = elevation_all(iproc(1))
+    end if
+    call bcast_all_dp(altitude_rec,1)
+    elevation(irec) = altitude_rec(1)
+
+    ! reset distance to huge initial value
+    distmin=HUGEVAL
+
+!     get the Cartesian components of n in the model: nu
+
+    ! orientation consistent with the UTM projection
+    ! X coordinate - East
+    nu(1,1,irec) = 1.d0
+    nu(1,2,irec) = 0.d0
+    nu(1,3,irec) = 0.d0
+    ! Y coordinate - North
+    nu(2,1,irec) = 0.d0
+    nu(2,2,irec) = 1.d0
+    nu(2,3,irec) = 0.d0
+    ! Z coordinate - Vertical
+    nu(3,1,irec) = 0.d0
+    nu(3,2,irec) = 0.d0
+    nu(3,3,irec) = 1.d0
+
+    x_target(irec) = stutm_x(irec)
+    y_target(irec) = stutm_y(irec)
+
+    ! receiver's Z coordinate
+    if( USE_SOURCES_RECVS_Z ) then
+      ! alternative: burial depth is given as z value directly
+      z_target(irec) = stbur(irec)
+    else
+      ! burial depth in STATIONS file given in m
+      z_target(irec) = elevation(irec) - stbur(irec)
+    endif
+    !if (myrank == 0) write(IOVTK,*) x_target(irec), y_target(irec), z_target(irec)
+
+    if (.not. SU_FORMAT) then
+       ! determines closest GLL point
+       ispec_selected_rec(irec) = 0
+       do ispec=1,NSPEC_AB
+
+         ! define the interval in which we look for points
+         if(FASTER_RECEIVERS_POINTS_ONLY) then
+           imin = 1
+           imax = NGLLX
+
+           jmin = 1
+           jmax = NGLLY
+
+           kmin = 1
+           kmax = NGLLZ
+
+         else
+           ! loop only on points inside the element
+           ! exclude edges to ensure this point is not shared with other elements
+           imin = 2
+           imax = NGLLX - 1
+
+           jmin = 2
+           jmax = NGLLY - 1
+
+           kmin = 2
+           kmax = NGLLZ - 1
+         endif
+
+         do k = kmin,kmax
+           do j = jmin,jmax
+             do i = imin,imax
+
+               iglob = ibool(i,j,k,ispec)
+
+               if (.not. RECVS_CAN_BE_BURIED_EXT_MESH) then
+                 if ((.not. iglob_is_surface_external_mesh(iglob)) .or. (.not. ispec_is_surface_external_mesh(ispec))) then
+                   cycle
+                 endif
+               endif
+
+               dist = dsqrt((x_target(irec)-dble(xstore(iglob)))**2 &
+                           +(y_target(irec)-dble(ystore(iglob)))**2 &
+                           +(z_target(irec)-dble(zstore(iglob)))**2)
+
+               ! keep this point if it is closer to the receiver
+               if(dist < distmin) then
+                 distmin = dist
+                 ispec_selected_rec(irec) = ispec
+                 ix_initial_guess(irec) = i
+                 iy_initial_guess(irec) = j
+                 iz_initial_guess(irec) = k
+
+                 xi_receiver(irec) = dble(ix_initial_guess(irec))
+                 eta_receiver(irec) = dble(iy_initial_guess(irec))
+                 gamma_receiver(irec) = dble(iz_initial_guess(irec))
+                 x_found(irec) = xstore(iglob)
+                 y_found(irec) = ystore(iglob)
+                 z_found(irec) = zstore(iglob)
+               endif
+
+             enddo
+           enddo
+         enddo
+
+         ! compute final distance between asked and found (converted to km)
+         final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
+           (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
+
+       ! end of loop on all the spectral elements in current slice
+       enddo
+    else
+       ispec_selected_rec(irec) = 0
+       ix_initial_guess(irec) = 0
+       iy_initial_guess(irec) = 0
+       iz_initial_guess(irec) = 0
+       final_distance(irec) = HUGEVAL
+       if (  (x_target(irec)>=xmin .and. x_target(irec)<=xmax) .and. &
+             (y_target(irec)>=ymin .and. y_target(irec)<=ymax) .and. &
+             (z_target(irec)>=zmin .and. z_target(irec)<=zmax) ) then
+          do ispec=1,NSPEC_AB
+             iglob_temp=reshape(ibool(:,:,:,ispec),(/NGLLX*NGLLY*NGLLZ/))
+             xmin_ELE=minval(xstore(iglob_temp))
+             xmax_ELE=maxval(xstore(iglob_temp))
+             ymin_ELE=minval(ystore(iglob_temp))
+             ymax_ELE=maxval(ystore(iglob_temp))
+             zmin_ELE=minval(zstore(iglob_temp))
+             zmax_ELE=maxval(zstore(iglob_temp))
+             if (  (x_target(irec)>=xmin_ELE .and. x_target(irec)<=xmax_ELE) .and. &
+                   (y_target(irec)>=ymin_ELE .and. y_target(irec)<=ymax_ELE) .and. &
+                   (z_target(irec)>=zmin_ELE .and. z_target(irec)<=zmax_ELE) ) then
+                ! we find the element (ispec) which "may" contain the receiver (irec)
+                ! so we only need to compute distances (which is expensive because of "dsqrt") within those elements
+                ispec_selected_rec(irec) = ispec
+                do k = kmin_temp,kmax_temp
+                  do j = jmin_temp,jmax_temp
+                    do i = imin_temp,imax_temp
+                      iglob = ibool(i,j,k,ispec)
+                      ! for comparison purpose, we don't have to do "dsqrt", which is expensive
+                      dist =      ((x_target(irec)-dble(xstore(iglob)))**2 &
+                                  +(y_target(irec)-dble(ystore(iglob)))**2 &
+                                  +(z_target(irec)-dble(zstore(iglob)))**2)
+                      if(dist < distmin) then
+                        distmin = dist
+                        ix_initial_guess(irec) = i
+                        iy_initial_guess(irec) = j
+                        iz_initial_guess(irec) = k
+                        xi_receiver(irec) = dble(ix_initial_guess(irec))
+                        eta_receiver(irec) = dble(iy_initial_guess(irec))
+                        gamma_receiver(irec) = dble(iz_initial_guess(irec))
+                        x_found(irec) = xstore(iglob)
+                        y_found(irec) = ystore(iglob)
+                        z_found(irec) = zstore(iglob)
+                      endif
+                    enddo
+                  enddo
+                enddo
+                final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
+                  (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
+             endif ! if receiver "may" be within this element
+          enddo ! do ispec=1,NSPEC_AB
+       endif ! if receiver "may" be within this proc
+    endif !if (.not. SU_FORMAT)
+
+    if (ispec_selected_rec(irec) == 0) then
+      ! receiver is NOT within this proc, assign trivial values
+      ispec_selected_rec(irec) = 1
+      ix_initial_guess(irec) = 1
+      iy_initial_guess(irec) = 1
+      iz_initial_guess(irec) = 1
+      final_distance(irec) = HUGEVAL
+    endif
+
+    ! get normal to the face of the hexaedra if receiver is on the surface
+    if ((.not. RECVS_CAN_BE_BURIED_EXT_MESH) .and. &
+       .not. (ispec_selected_rec(irec) == 0)) then
+      pt0_ix = -1
+      pt0_iy = -1
+      pt0_iz = -1
+      pt1_ix = -1
+      pt1_iy = -1
+      pt1_iz = -1
+      pt2_ix = -1
+      pt2_iy = -1
+      pt2_iz = -1
+      ! we get two vectors of the face (three points) to compute the normal
+      if (ix_initial_guess(irec) == 1 .and. &
+         iglob_is_surface_external_mesh(ibool(1,2,2,ispec_selected_rec(irec)))) then
+        pt0_ix = 1
+        pt0_iy = NGLLY
+        pt0_iz = 1
+        pt1_ix = 1
+        pt1_iy = 1
+        pt1_iz = 1
+        pt2_ix = 1
+        pt2_iy = NGLLY
+        pt2_iz = NGLLZ
+      endif
+      if (ix_initial_guess(irec) == NGLLX .and. &
+         iglob_is_surface_external_mesh(ibool(NGLLX,2,2,ispec_selected_rec(irec)))) then
+        pt0_ix = NGLLX
+        pt0_iy = 1
+        pt0_iz = 1
+        pt1_ix = NGLLX
+        pt1_iy = NGLLY
+        pt1_iz = 1
+        pt2_ix = NGLLX
+        pt2_iy = 1
+        pt2_iz = NGLLZ
+      endif
+      if (iy_initial_guess(irec) == 1 .and. &
+         iglob_is_surface_external_mesh(ibool(2,1,2,ispec_selected_rec(irec)))) then
+        pt0_ix = 1
+        pt0_iy = 1
+        pt0_iz = 1
+        pt1_ix = NGLLX
+        pt1_iy = 1
+        pt1_iz = 1
+        pt2_ix = 1
+        pt2_iy = 1
+        pt2_iz = NGLLZ
+      endif
+      if (iy_initial_guess(irec) == NGLLY .and. &
+         iglob_is_surface_external_mesh(ibool(2,NGLLY,2,ispec_selected_rec(irec)))) then
+        pt0_ix = NGLLX
+        pt0_iy = NGLLY
+        pt0_iz = 1
+        pt1_ix = 1
+        pt1_iy = NGLLY
+        pt1_iz = 1
+        pt2_ix = NGLLX
+        pt2_iy = NGLLY
+        pt2_iz = NGLLZ
+      endif
+      if (iz_initial_guess(irec) == 1 .and. &
+         iglob_is_surface_external_mesh(ibool(2,2,1,ispec_selected_rec(irec)))) then
+        pt0_ix = NGLLX
+        pt0_iy = 1
+        pt0_iz = 1
+        pt1_ix = 1
+        pt1_iy = 1
+        pt1_iz = 1
+        pt2_ix = NGLLX
+        pt2_iy = NGLLY
+        pt2_iz = 1
+      endif
+      if (iz_initial_guess(irec) == NGLLZ .and. &
+         iglob_is_surface_external_mesh(ibool(2,2,NGLLZ,ispec_selected_rec(irec)))) then
+        pt0_ix = 1
+        pt0_iy = 1
+        pt0_iz = NGLLZ
+        pt1_ix = NGLLX
+        pt1_iy = 1
+        pt1_iz = NGLLZ
+        pt2_ix = 1
+        pt2_iy = NGLLY
+        pt2_iz = NGLLZ
+      endif
+
+      if (pt0_ix<0 .or.pt0_iy<0 .or. pt0_iz<0 .or. &
+         pt1_ix<0 .or. pt1_iy<0 .or. pt1_iz<0 .or. &
+         pt2_ix<0 .or. pt2_iy<0 .or. pt2_iz<0) then
+        stop 'error in computing normal for receivers.'
+      endif
+
+      u_vector(1) = xstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
+           - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+      u_vector(2) = ystore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
+           - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+      u_vector(3) = zstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
+           - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+      v_vector(1) = xstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
+           - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+      v_vector(2) = ystore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
+           - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+      v_vector(3) = zstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
+           - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+
+      ! cross product
+      w_vector(1) = u_vector(2)*v_vector(3) - u_vector(3)*v_vector(2)
+      w_vector(2) = u_vector(3)*v_vector(1) - u_vector(1)*v_vector(3)
+      w_vector(3) = u_vector(1)*v_vector(2) - u_vector(2)*v_vector(1)
+
+      ! normalize vector w
+      w_vector(:) = w_vector(:)/sqrt(w_vector(1)**2+w_vector(2)**2+w_vector(3)**2)
+
+      ! build the two other vectors for a direct base: we normalize u, and v=w^u
+      u_vector(:) = u_vector(:)/sqrt(u_vector(1)**2+u_vector(2)**2+u_vector(3)**2)
+      v_vector(1) = w_vector(2)*u_vector(3) - w_vector(3)*u_vector(2)
+      v_vector(2) = w_vector(3)*u_vector(1) - w_vector(1)*u_vector(3)
+      v_vector(3) = w_vector(1)*u_vector(2) - w_vector(2)*u_vector(1)
+
+      ! build rotation matrice nu for seismograms
+      if (EXT_MESH_RECV_NORMAL) then
+        !     East (u)
+        nu(1,1,irec) = u_vector(1)
+        nu(1,2,irec) = v_vector(1)
+        nu(1,3,irec) = w_vector(1)
+
+        !     North (v)
+        nu(2,1,irec) = u_vector(2)
+        nu(2,2,irec) = v_vector(2)
+        nu(2,3,irec) = w_vector(2)
+
+        !     Vertical (w)
+        nu(3,1,irec) = u_vector(3)
+        nu(3,2,irec) = v_vector(3)
+        nu(3,3,irec) = w_vector(3)
+      else
+        !     East
+        nu(1,1,irec) = 1.d0
+        nu(1,2,irec) = 0.d0
+        nu(1,3,irec) = 0.d0
+
+        !     North
+        nu(2,1,irec) = 0.d0
+        nu(2,2,irec) = 1.d0
+        nu(2,3,irec) = 0.d0
+
+        !     Vertical
+        nu(3,1,irec) = 0.d0
+        nu(3,2,irec) = 0.d0
+        nu(3,3,irec) = 1.d0
+      endif
+
+    endif ! of if (.not. RECVS_CAN_BE_BURIED_EXT_MESH)
+
+  ! end of loop on all the stations
+  enddo
+
+  ! close receiver file
+  close(1)
+
+! ****************************************
+! find the best (xi,eta,gamma) for each receiver
+! ****************************************
+
+  if(.not. FASTER_RECEIVERS_POINTS_ONLY) then
+
+    ! loop on all the receivers to iterate in that slice
+    do irec = 1,nrec
+
+      ispec_iterate = ispec_selected_rec(irec)
+
+      ! use initial guess in xi and eta
+      xi = xigll(ix_initial_guess(irec))
+      eta = yigll(iy_initial_guess(irec))
+      gamma = zigll(iz_initial_guess(irec))
+
+      ! define coordinates of the control points of the element
+      do ia=1,NGNOD
+        iax = 0
+        iay = 0
+        iaz = 0
+        if(iaddx(ia) == 0) then
+          iax = 1
+        else if(iaddx(ia) == 1) then
+          iax = (NGLLX+1)/2
+        else if(iaddx(ia) == 2) then
+          iax = NGLLX
+        else
+          call exit_MPI(myrank,'incorrect value of iaddx')
+        endif
+
+        if(iaddy(ia) == 0) then
+          iay = 1
+        else if(iaddy(ia) == 1) then
+          iay = (NGLLY+1)/2
+        else if(iaddy(ia) == 2) then
+          iay = NGLLY
+        else
+          call exit_MPI(myrank,'incorrect value of iaddy')
+        endif
+
+        if(iaddz(ia) == 0) then
+          iaz = 1
+        else if(iaddz(ia) == 1) then
+          iaz = (NGLLZ+1)/2
+        else if(iaddz(ia) == 2) then
+          iaz = NGLLZ
+        else
+          call exit_MPI(myrank,'incorrect value of iaddz')
+        endif
+
+        iglob = ibool(iax,iay,iaz,ispec_iterate)
+        xelm(ia) = dble(xstore(iglob))
+        yelm(ia) = dble(ystore(iglob))
+        zelm(ia) = dble(zstore(iglob))
+
+      enddo
+
+      ! iterate to solve the non linear system
+      do iter_loop = 1,NUM_ITER
+
+        ! impose receiver exactly at the surface
+        !    gamma = 1.d0
+
+        ! recompute jacobian for the new point
+        call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+                xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+
+        ! compute distance to target location
+        dx = - (x - x_target(irec))
+        dy = - (y - y_target(irec))
+        dz = - (z - z_target(irec))
+
+        ! compute increments
+        ! gamma does not change since we know the receiver is exactly on the surface
+        dxi  = xix*dx + xiy*dy + xiz*dz
+        deta = etax*dx + etay*dy + etaz*dz
+        dgamma = gammax*dx + gammay*dy + gammaz*dz
+
+        ! update values
+        xi = xi + dxi
+        eta = eta + deta
+        gamma = gamma + dgamma
+
+        ! impose that we stay in that element
+        ! (useful if user gives a receiver outside the mesh for instance)
+        ! we can go slightly outside the [1,1] segment since with finite elements
+        ! the polynomial solution is defined everywhere
+        ! this can be useful for convergence of itertive scheme with distorted elements
+        if (xi > 1.10d0) xi = 1.10d0
+        if (xi < -1.10d0) xi = -1.10d0
+        if (eta > 1.10d0) eta = 1.10d0
+        if (eta < -1.10d0) eta = -1.10d0
+        if (gamma > 1.10d0) gamma = 1.10d0
+        if (gamma < -1.10d0) gamma = -1.10d0
+
+      ! end of non linear iterations
+      enddo
+
+      ! impose receiver exactly at the surface after final iteration
+      !  gamma = 1.d0
+
+      ! compute final coordinates of point found
+      call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+
+      ! store xi,eta and x,y,z of point found
+      xi_receiver(irec) = xi
+      eta_receiver(irec) = eta
+      gamma_receiver(irec) = gamma
+      x_found(irec) = x
+      y_found(irec) = y
+      z_found(irec) = z
+
+      ! compute final distance between asked and found (converted to km)
+      final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
+        (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
+
+    enddo
+
+  endif ! of if (.not. FASTER_RECEIVERS_POINTS_ONLY)
+
+  ! synchronize all the processes to make sure all the estimates are available
+  call sync_all()
+  ! for MPI version, gather information from all the nodes
+  if (myrank/=0) then ! gather information from other processors (one at a time)
+     call send_i(ispec_selected_rec, nrec,0,0)
+     call send_dp(xi_receiver,       nrec,0,1)
+     call send_dp(eta_receiver,      nrec,0,2)
+     call send_dp(gamma_receiver,    nrec,0,3)
+     call send_dp(final_distance,    nrec,0,4)
+     call send_dp(x_found,           nrec,0,5)
+     call send_dp(y_found,           nrec,0,6)
+     call send_dp(z_found,           nrec,0,7)
+     call send_dp(nu,            3*3*nrec,0,8)
+  else
+     islice_selected_rec(:) = 0
+     do iprocloop=1,NPROC-1
+        call recv_i(ispec_selected_rec_all, nrec,iprocloop,0)
+        call recv_dp(xi_receiver_all,       nrec,iprocloop,1)
+        call recv_dp(eta_receiver_all,      nrec,iprocloop,2)
+        call recv_dp(gamma_receiver_all,    nrec,iprocloop,3)
+        call recv_dp(final_distance_all,    nrec,iprocloop,4)
+        call recv_dp(x_found_all,           nrec,iprocloop,5)
+        call recv_dp(y_found_all,           nrec,iprocloop,6)
+        call recv_dp(z_found_all,           nrec,iprocloop,7)
+        call recv_dp(nu_all,            3*3*nrec,iprocloop,8)
+        do irec=1,nrec
+           if (final_distance_all(irec) < final_distance(irec)) then
+              final_distance(irec) = final_distance_all(irec)
+              islice_selected_rec(irec) = iprocloop
+              ispec_selected_rec(irec) = ispec_selected_rec_all(irec)
+              xi_receiver(irec) = xi_receiver_all(irec)
+              eta_receiver(irec) = eta_receiver_all(irec)
+              gamma_receiver(irec) = gamma_receiver_all(irec)
+              x_found(irec) = x_found_all(irec)
+              y_found(irec) = y_found_all(irec)
+              z_found(irec) = z_found_all(irec)
+              nu(:,:,irec) = nu_all(:,:,irec)
+           endif
+        enddo
+     enddo
+  endif
+  call sync_all()
+
+  ! this is executed by main process only
+  if(myrank == 0) then
+
+    do irec=1,nrec
+
+      write(IMAIN,*)
+      write(IMAIN,*) 'station # ',irec,'    ',station_name(irec),network_name(irec)
+
+      if(final_distance(irec) == HUGEVAL) call exit_MPI(myrank,'error locating receiver')
+
+      write(IMAIN,*) '     original latitude: ',sngl(stlat(irec))
+      write(IMAIN,*) '     original longitude: ',sngl(stlon(irec))
+      if( SUPPRESS_UTM_PROJECTION ) then
+        write(IMAIN,*) '     original x: ',sngl(stutm_x(irec))
+        write(IMAIN,*) '     original y: ',sngl(stutm_y(irec))
+      else
+        write(IMAIN,*) '     original UTM x: ',sngl(stutm_x(irec))
+        write(IMAIN,*) '     original UTM y: ',sngl(stutm_y(irec))
+      endif
+      if( USE_SOURCES_RECVS_Z ) then
+        write(IMAIN,*) '     original z: ',sngl(stbur(irec))
+      else
+        write(IMAIN,*) '     original depth: ',sngl(stbur(irec)),' m'
+      endif
+      write(IMAIN,*) '     horizontal distance: ',sngl(horiz_dist(irec))
+      write(IMAIN,*) '     target x, y, z: ',sngl(x_target(irec)),sngl(y_target(irec)),sngl(z_target(irec))
+
+      write(IMAIN,*) '     closest estimate found: ',sngl(final_distance(irec)),' m away'
+      write(IMAIN,*) '     in slice ',islice_selected_rec(irec),' in element ',ispec_selected_rec(irec)
+      if(FASTER_RECEIVERS_POINTS_ONLY) then
+        write(IMAIN,*) '     in point i,j,k = ',nint(xi_receiver(irec)),nint(eta_receiver(irec)),nint(gamma_receiver(irec))
+        write(IMAIN,*) '     nu1 = ',nu(1,:,irec)
+        write(IMAIN,*) '     nu2 = ',nu(2,:,irec)
+        write(IMAIN,*) '     nu3 = ',nu(3,:,irec)
+      else
+        write(IMAIN,*) '     at coordinates: '
+        write(IMAIN,*) '       xi    = ',xi_receiver(irec)
+        write(IMAIN,*) '       eta   = ',eta_receiver(irec)
+        write(IMAIN,*) '       gamma = ',gamma_receiver(irec)
+      endif
+      if( SUPPRESS_UTM_PROJECTION ) then
+        write(IMAIN,*) '         x: ',x_found(irec)
+        write(IMAIN,*) '         y: ',y_found(irec)
+      else
+        write(IMAIN,*) '     UTM x: ',x_found(irec)
+        write(IMAIN,*) '     UTM y: ',y_found(irec)
+      endif
+      if( USE_SOURCES_RECVS_Z ) then
+        write(IMAIN,*) '         z: ',z_found(irec)
+      else
+        write(IMAIN,*) '     depth: ',dabs(z_found(irec) - elevation(irec)),' m'
+        write(IMAIN,*) '         z: ',z_found(irec)
+      endif
+      write(IMAIN,*)
+
+
+      ! add warning if estimate is poor
+      ! (usually means receiver outside the mesh given by the user)
+      if(final_distance(irec) > 3000.d0) then
+        write(IMAIN,*) '*******************************************************'
+        write(IMAIN,*) '***** WARNING: receiver location estimate is poor *****'
+        write(IMAIN,*) '*******************************************************'
+      endif
+
+      write(IMAIN,*)
+
+    enddo
+
+    ! compute maximal distance for all the receivers
+    final_distance_max = maxval(final_distance(:))
+
+    ! display maximum error for all the receivers
+    write(IMAIN,*) 'maximum error in location of all the receivers: ',sngl(final_distance_max),' m'
+
+    ! add warning if estimate is poor
+    ! (usually means receiver outside the mesh given by the user)
+    if(final_distance_max > 1000.d0) then
+      write(IMAIN,*)
+      write(IMAIN,*) '************************************************************'
+      write(IMAIN,*) '************************************************************'
+      write(IMAIN,*) '***** WARNING: at least one receiver is poorly located *****'
+      write(IMAIN,*) '************************************************************'
+      write(IMAIN,*) '************************************************************'
+    endif
+
+    ! get the base pathname for output files
+    call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
+
+    !! write the list of stations and associated epicentral distance
+    !open(unit=27,file=trim(OUTPUT_FILES)//'/output_list_stations.txt',status='unknown')
+    !do irec=1,nrec
+    !  write(27,*) station_name(irec),'.',network_name(irec),' : ',horiz_dist(irec),' km horizontal distance'
+    !enddo
+    !close(27)
+
+    ! write the locations of stations, so that we can load them and write them to SU headers later
+    open(unit=IOUT_SU,file=trim(OUTPUT_FILES)//'/output_list_stations.txt',status='unknown')
+    do irec=1,nrec
+      write(IOUT_SU,*) x_found(irec),y_found(irec),z_found(irec)
+    enddo
+    close(IOUT_SU)
+
+    ! elapsed time since beginning of mesh generation
+    tCPU = wtime() - time_start
+    write(IMAIN,*)
+    write(IMAIN,*) 'Elapsed time for receiver detection in seconds = ',tCPU
+    write(IMAIN,*)
+    write(IMAIN,*) 'End of receiver detection - done'
+    write(IMAIN,*)
+
+  endif    ! end of section executed by main process only
+
+  ! main process broadcasts the results to all the slices
+  call bcast_all_i(islice_selected_rec,nrec)
+  call bcast_all_i(ispec_selected_rec,nrec)
+  call bcast_all_dp(xi_receiver,nrec)
+  call bcast_all_dp(eta_receiver,nrec)
+  call bcast_all_dp(gamma_receiver,nrec)
+  ! synchronize all the processes to make sure everybody has finished
+  call sync_all()
+
+  ! deallocate arrays
+  deallocate(stlat)
+  deallocate(stlon)
+  deallocate(stele)
+  deallocate(stbur)
+  deallocate(stutm_x)
+  deallocate(stutm_y)
+  deallocate(horiz_dist)
+  deallocate(ix_initial_guess)
+  deallocate(iy_initial_guess)
+  deallocate(iz_initial_guess)
+  deallocate(x_target)
+  deallocate(y_target)
+  deallocate(z_target)
+  deallocate(x_found)
+  deallocate(y_found)
+  deallocate(z_found)
+  deallocate(final_distance)
+  deallocate(ispec_selected_rec_all)
+  deallocate(xi_receiver_all)
+  deallocate(eta_receiver_all)
+  deallocate(gamma_receiver_all)
+  deallocate(x_found_all)
+  deallocate(y_found_all)
+  deallocate(z_found_all)
+  deallocate(final_distance_all)
+
+  end subroutine locate_receivers
+
+!=====================================================================
+
+
+  subroutine station_filter(SUPPRESS_UTM_PROJECTION,UTM_PROJECTION_ZONE,myrank,filename,filtered_filename,nfilter, &
+      LATITUDE_MIN, LATITUDE_MAX, LONGITUDE_MIN, LONGITUDE_MAX)
+
+  implicit none
+
+  include 'constants.h'
+
+! input
+  logical :: SUPPRESS_UTM_PROJECTION
+  integer :: UTM_PROJECTION_ZONE
+  integer :: myrank
+  character(len=*) :: filename,filtered_filename
+  double precision :: LATITUDE_MIN,LATITUDE_MAX,LONGITUDE_MIN,LONGITUDE_MAX
+
+! output
+  integer :: nfilter
+
+  integer :: nrec, nrec_filtered, ios
+
+  double precision :: stlat,stlon,stele,stbur,stutm_x,stutm_y
+  double precision :: minlat,minlon,maxlat,maxlon
+  character(len=MAX_LENGTH_STATION_NAME) :: station_name
+  character(len=MAX_LENGTH_NETWORK_NAME) :: network_name
+  character(len=256) :: dummystring
+
+  nrec = 0
+  nrec_filtered = 0
+
+  ! counts number of lines in stations file
+  open(unit=IIN, file=trim(filename), status = 'old', iostat = ios)
+  if (ios /= 0) call exit_mpi(myrank, 'No file '//trim(filename)//', exit')
+  do while(ios == 0)
+    read(IIN,"(a256)",iostat = ios) dummystring
+    if(ios /= 0) exit
+
+    if( len_trim(dummystring) > 0 ) nrec = nrec + 1
+  enddo
+  close(IIN)
+
+  ! reads in station locations
+  open(unit=IIN, file=trim(filename), status = 'old', iostat = ios)
+  !do irec = 1,nrec
+  !    read(IIN,*) station_name,network_name,stlat,stlon,stele,stbur
+  do while(ios == 0)
+    read(IIN,"(a256)",iostat = ios) dummystring
+    if( ios /= 0 ) exit
+
+    ! counts number of stations in min/max region
+    if( len_trim(dummystring) > 0 ) then
+        dummystring = trim(dummystring)
+        read(dummystring, *) station_name, network_name, stlat, stlon, stele, stbur
+
+        ! convert station location to UTM
+        call utm_geo(stlon,stlat,stutm_x,stutm_y,&
+             UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
+
+        ! counts stations within lon/lat region
+        if( stutm_y >= LATITUDE_MIN .and. stutm_y <= LATITUDE_MAX .and. &
+           stutm_x >= LONGITUDE_MIN .and. stutm_x <= LONGITUDE_MAX) &
+          nrec_filtered = nrec_filtered + 1
+     endif
+  enddo
+  close(IIN)
+
+  ! writes out filtered stations file
+  if (myrank == 0) then
+    open(unit=IIN,file=trim(filename),status='old',action='read',iostat=ios)
+    open(unit=IOUT,file=trim(filtered_filename),status='unknown')
+    do while(ios == 0)
+      read(IIN,"(a256)",iostat = ios) dummystring
+      if( ios /= 0 ) exit
+
+      !read(IIN,*) station_name,network_name,stlat,stlon,stele,stbur
+      if( len_trim(dummystring) > 0 ) then
+        dummystring = trim(dummystring)
+        read(dummystring, *) station_name, network_name, stlat, stlon, stele, stbur
+
+        ! convert station location to UTM
+        call utm_geo(stlon,stlat,stutm_x,stutm_y,&
+             UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
+
+        if( stutm_y >= LATITUDE_MIN .and. stutm_y <= LATITUDE_MAX .and. &
+           stutm_x >= LONGITUDE_MIN .and. stutm_x <= LONGITUDE_MAX) then
+
+          ! w/out formating
+          ! write(IOUT,*) trim(station_name),' ',trim(network_name),' ',sngl(stlat), &
+          !              ' ',sngl(stlon), ' ',sngl(stele), ' ',sngl(stbur)
+
+          ! w/ specific format
+          write(IOUT,'(a10,1x,a10,4e18.6)') &
+                            trim(station_name),trim(network_name), &
+                            sngl(stlat),sngl(stlon),sngl(stele),sngl(stbur)
+
+       endif
+      end if
+    enddo
+    close(IIN)
+    close(IOUT)
+
+    write(IMAIN,*)
+    write(IMAIN,*) 'there are ',nrec,' stations in file ', trim(filename)
+    write(IMAIN,*) 'saving ',nrec_filtered,' stations inside the model in file ', trim(filtered_filename)
+    write(IMAIN,*) 'excluding ',nrec - nrec_filtered,' stations located outside the model'
+    write(IMAIN,*)
+
+    if( nrec_filtered < 1 ) then
+      write(IMAIN,*) 'error filtered stations:'
+      write(IMAIN,*) '  simulation needs at least 1 station but got ',nrec_filtered
+      write(IMAIN,*)
+      write(IMAIN,*) '  check that stations in file '//trim(filename)//' are within'
+
+      if( SUPPRESS_UTM_PROJECTION ) then
+        write(IMAIN,*) '    latitude min/max : ',LATITUDE_MIN,LATITUDE_MAX
+        write(IMAIN,*) '    longitude min/max: ',LONGITUDE_MIN,LONGITUDE_MAX
+      else
+        ! convert edge locations from UTM back to lat/lon
+        call utm_geo(minlon,minlat,LONGITUDE_MIN,LATITUDE_MIN,&
+             UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
+        call utm_geo(maxlon,maxlat,LONGITUDE_MAX,LATITUDE_MAX,&
+             UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
+        write(IMAIN,*) '    longitude min/max: ',minlon,maxlon
+        write(IMAIN,*) '    latitude min/max : ',minlat,maxlat
+        write(IMAIN,*) '    UTM x min/max: ',LONGITUDE_MIN,LONGITUDE_MAX
+        write(IMAIN,*) '    UTM y min/max : ',LATITUDE_MIN,LATITUDE_MAX
+      endif
+
+      write(IMAIN,*)
+    endif
+
+  endif
+
+  nfilter = nrec_filtered
+
+  end subroutine station_filter
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_source.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_source.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_source.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,948 +1,948 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!----
-!----  locate_source finds the correct position of the source
-!----
-
-  subroutine locate_source(ibool,NSOURCES,myrank,NSPEC_AB,NGLOB_AB,xstore,ystore,zstore, &
-                 xigll,yigll,zigll,NPROC, &
-                 tshift_cmt,min_tshift_cmt_original,yr,jda,ho,mi,utm_x_source,utm_y_source, &
-                 DT,hdur,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
-                 islice_selected_source,ispec_selected_source, &
-                 xi_source,eta_source,gamma_source, &
-                 UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
-                 PRINT_SOURCE_TIME_FUNCTION, &
-                 nu_source,iglob_is_surface_external_mesh,ispec_is_surface_external_mesh, &
-                 ispec_is_acoustic,ispec_is_elastic, &
-                 num_free_surface_faces,free_surface_ispec,free_surface_ijk)
-
-  implicit none
-
-  include "constants.h"
-
-  integer NPROC,UTM_PROJECTION_ZONE
-  integer NSPEC_AB,NGLOB_AB,NSOURCES
-
-  logical PRINT_SOURCE_TIME_FUNCTION,SUPPRESS_UTM_PROJECTION
-
-  double precision DT
-
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-
-  integer myrank
-
-  ! arrays containing coordinates of the points
-  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: xstore,ystore,zstore
-
-  logical, dimension(NSPEC_AB) :: ispec_is_acoustic,ispec_is_elastic
-
-  integer yr,jda,ho,mi
-
-  double precision tshift_cmt(NSOURCES)
-  double precision sec,min_tshift_cmt_original
-
-  integer iprocloop
-
-  integer i,j,k,ispec,iglob,iglob_selected,inode,iface,isource
-  integer imin,imax,jmin,jmax,kmin,kmax,igll,jgll,kgll
-  integer iselected,jselected,iface_selected,iadjust,jadjust
-  integer iproc(1)
-
-  double precision, dimension(NSOURCES) :: utm_x_source,utm_y_source
-  double precision dist
-  double precision xi,eta,gamma,dx,dy,dz,dxi,deta
-
-  ! Gauss-Lobatto-Legendre points of integration
-  double precision xigll(NGLLX)
-  double precision yigll(NGLLY)
-  double precision zigll(NGLLZ)
-
-  ! topology of the control points of the surface element
-  integer iax,iay,iaz
-  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-  ! coordinates of the control points of the surface element
-  double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
-
-  integer iter_loop
-
-  integer ia
-  double precision x,y,z
-  double precision xix,xiy,xiz
-  double precision etax,etay,etaz
-  double precision gammax,gammay,gammaz
-  double precision dgamma
-
-  double precision final_distance_source(NSOURCES)
-
-  double precision x_target_source,y_target_source,z_target_source
-
-  double precision,dimension(1) :: altitude_source,distmin_ele
-  double precision,dimension(NPROC) :: distmin_ele_all,elevation_all
-  double precision,dimension(4) :: elevation_node,dist_node
-
-  integer islice_selected_source(NSOURCES)
-
-  ! timer MPI
-  double precision, external :: wtime
-  double precision time_start,tCPU
-
-  integer ispec_selected_source(NSOURCES)
-
-  integer ngather, ns, ne, ig, is, ng
-
-  integer, dimension(NGATHER_SOURCES,0:NPROC-1) :: ispec_selected_source_all
-  double precision, dimension(NGATHER_SOURCES,0:NPROC-1) :: xi_source_all,eta_source_all,gamma_source_all, &
-     final_distance_source_all,x_found_source_all,y_found_source_all,z_found_source_all
-  double precision, dimension(3,3,NGATHER_SOURCES,0:NPROC-1) :: nu_source_all
-
-  double precision, dimension(:), allocatable :: tmp_local
-  double precision, dimension(:,:),allocatable :: tmp_all_local
-
-  double precision hdur(NSOURCES)
-  double precision :: f0,t0_ricker
-
-  double precision, dimension(NSOURCES) :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
-  double precision, dimension(NSOURCES) :: xi_source,eta_source,gamma_source
-  double precision, dimension(3,3,NSOURCES) :: nu_source
-
-  double precision, dimension(NSOURCES) :: lat,long,depth
-  double precision moment_tensor(6,NSOURCES)
-
-  character(len=256) OUTPUT_FILES
-
-  double precision, dimension(NSOURCES) :: x_found_source,y_found_source,z_found_source
-  double precision, dimension(NSOURCES) :: elevation
-  double precision distmin
-
-  integer, dimension(:), allocatable :: tmp_i_local
-  integer, dimension(:,:),allocatable :: tmp_i_all_local
-
-  ! for surface locating and normal computing with external mesh
-  integer :: pt0_ix,pt0_iy,pt0_iz,pt1_ix,pt1_iy,pt1_iz,pt2_ix,pt2_iy,pt2_iz
-  integer :: num_free_surface_faces
-  real(kind=CUSTOM_REAL), dimension(3) :: u_vector,v_vector,w_vector
-  logical, dimension(NGLOB_AB) :: iglob_is_surface_external_mesh
-  logical, dimension(NSPEC_AB) :: ispec_is_surface_external_mesh
-  integer, dimension(num_free_surface_faces) :: free_surface_ispec
-  integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
-
-  integer ix_initial_guess_source,iy_initial_guess_source,iz_initial_guess_source
-  integer ier
-  integer, dimension(NSOURCES) :: idomain
-  integer, dimension(NGATHER_SOURCES,0:NPROC-1) :: idomain_all
-
-  ! get the base pathname for output files
-  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
-
-  ! read all the sources (note: each process reads the source file)
-  call get_cmt(yr,jda,ho,mi,sec,tshift_cmt,hdur,lat,long,depth,moment_tensor, &
-              DT,NSOURCES,min_tshift_cmt_original)
-
-  ! define topology of the control element
-  call usual_hex_nodes(iaddx,iaddy,iaddz)
-
-  ! get MPI starting time
-  time_start = wtime()
-
-  ! user output
-  if( myrank == 0 ) then
-    if(SUPPRESS_UTM_PROJECTION ) then
-      write(IMAIN,*) 'no UTM projection:'
-    else
-      write(IMAIN,*) 'UTM projection:'
-      write(IMAIN,*) '  UTM zone: ',UTM_PROJECTION_ZONE
-    endif
-    if( USE_SOURCES_RECVS_Z ) then
-      write(IMAIN,*) '  (depth) becomes directly (z) coordinate'
-    endif
-  endif
-
-  ! loop on all the sources
-  do isource = 1,NSOURCES
-
-    !
-    ! r -> z, theta -> -y, phi -> x
-    !
-    !  Mrr =  Mzz
-    !  Mtt =  Myy
-    !  Mpp =  Mxx
-    !  Mrt = -Myz
-    !  Mrp =  Mxz
-    !  Mtp = -Mxy
-
-    ! get the moment tensor
-    Mzz(isource) = + moment_tensor(1,isource)
-    Mxx(isource) = + moment_tensor(3,isource)
-    Myy(isource) = + moment_tensor(2,isource)
-    Mxz(isource) = + moment_tensor(5,isource)
-    Myz(isource) = - moment_tensor(4,isource)
-    Mxy(isource) = - moment_tensor(6,isource)
-
-    ! gets UTM x,y
-    call utm_geo(long(isource),lat(isource),utm_x_source(isource),utm_y_source(isource), &
-                   UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
-
-    ! get approximate topography elevation at source long/lat coordinates
-    ! set distance to huge initial value
-    distmin = HUGEVAL
-    if(num_free_surface_faces > 0) then
-      iglob_selected = 1
-      ! loop only on points inside the element
-      ! exclude edges to ensure this point is not shared with other elements
-      imin = 2
-      imax = NGLLX - 1
-
-      jmin = 2
-      jmax = NGLLY - 1
-
-      iselected = 0
-      jselected = 0
-      iface_selected = 0
-      do iface=1,num_free_surface_faces
-        do j=jmin,jmax
-          do i=imin,imax
-
-            ispec = free_surface_ispec(iface)
-            igll = free_surface_ijk(1,(j-1)*NGLLY+i,iface)
-            jgll = free_surface_ijk(2,(j-1)*NGLLY+i,iface)
-            kgll = free_surface_ijk(3,(j-1)*NGLLY+i,iface)
-            iglob = ibool(igll,jgll,kgll,ispec)
-
-            ! keep this point if it is closer to the receiver
-            dist = dsqrt((utm_x_source(isource)-dble(xstore(iglob)))**2 + &
-                     (utm_y_source(isource)-dble(ystore(iglob)))**2)
-            if(dist < distmin) then
-              distmin = dist
-              iglob_selected = iglob
-              iface_selected = iface
-              iselected = i
-              jselected = j
-              altitude_source(1) = zstore(iglob_selected)
-            endif
-          enddo
-        enddo
-        ! end of loop on all the elements on the free surface
-      end do
-      !  weighted mean at current point of topography elevation of the four closest nodes
-      !  set distance to huge initial value
-      distmin = HUGEVAL
-      do j=jselected,jselected+1
-        do i=iselected,iselected+1
-          inode = 1
-          do jadjust=0,1
-            do iadjust= 0,1
-              ispec = free_surface_ispec(iface_selected)
-              igll = free_surface_ijk(1,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
-              jgll = free_surface_ijk(2,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
-              kgll = free_surface_ijk(3,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
-              iglob = ibool(igll,jgll,kgll,ispec)
-
-              elevation_node(inode) = zstore(iglob)
-              dist_node(inode) = dsqrt((utm_x_source(isource)-dble(xstore(iglob)))**2 + &
-                        (utm_y_source(isource)-dble(ystore(iglob)))**2)
-              inode = inode + 1
-            end do
-          end do
-          dist = sum(dist_node)
-          if(dist < distmin) then
-            distmin = dist
-            altitude_source(1) = (dist_node(1)/dist)*elevation_node(1) + &
-                     (dist_node(2)/dist)*elevation_node(2) + &
-                     (dist_node(3)/dist)*elevation_node(3) + &
-                     (dist_node(4)/dist)*elevation_node(4)
-          endif
-        end do
-      end do
-    end if
-    !  MPI communications to determine the best slice
-    distmin_ele(1)= distmin
-    call gather_all_dp(distmin_ele,1,distmin_ele_all,1,NPROC)
-    call gather_all_dp(altitude_source,1,elevation_all,1,NPROC)
-    if(myrank == 0) then
-      iproc = minloc(distmin_ele_all)
-      altitude_source(1) = elevation_all(iproc(1))
-    end if
-    call bcast_all_dp(altitude_source,1)
-    elevation(isource) = altitude_source(1)
-
-    ! orientation consistent with the UTM projection
-    !     East
-    nu_source(1,1,isource) = 1.d0
-    nu_source(1,2,isource) = 0.d0
-    nu_source(1,3,isource) = 0.d0
-    !     North
-    nu_source(2,1,isource) = 0.d0
-    nu_source(2,2,isource) = 1.d0
-    nu_source(2,3,isource) = 0.d0
-    !     Vertical
-    nu_source(3,1,isource) = 0.d0
-    nu_source(3,2,isource) = 0.d0
-    nu_source(3,3,isource) = 1.d0
-
-    x_target_source = utm_x_source(isource)
-    y_target_source = utm_y_source(isource)
-
-    ! source Z coordinate
-    if( USE_SOURCES_RECVS_Z ) then
-      ! alternative: depth is given as z value directly
-      z_target_source = depth(isource)
-    else
-      ! depth in CMTSOLUTION given in km
-      z_target_source =  - depth(isource)*1000.0d0 + elevation(isource)
-    endif
-
-    ! set distance to huge initial value
-    distmin = HUGEVAL
-
-    ispec_selected_source(isource) = 0
-    ix_initial_guess_source = 0
-    iy_initial_guess_source = 0
-    iz_initial_guess_source = 0
-    do ispec=1,NSPEC_AB
-
-      ! define the interval in which we look for points
-      if(USE_FORCE_POINT_SOURCE) then
-        imin = 1
-        imax = NGLLX
-
-        jmin = 1
-        jmax = NGLLY
-
-        kmin = 1
-        kmax = NGLLZ
-
-      else
-        ! loop only on points inside the element
-        ! exclude edges to ensure this point is not shared with other elements
-        imin = 2
-        imax = NGLLX - 1
-
-        jmin = 2
-        jmax = NGLLY - 1
-
-        kmin = 2
-        kmax = NGLLZ - 1
-      endif
-
-      do k = kmin,kmax
-        do j = jmin,jmax
-          do i = imin,imax
-
-            iglob = ibool(i,j,k,ispec)
-
-            if (.not. SOURCES_CAN_BE_BURIED_EXT_MESH) then
-              if ((.not. iglob_is_surface_external_mesh(iglob)) .or. (.not. ispec_is_surface_external_mesh(ispec))) then
-                cycle
-              endif
-            endif
-
-            ! keep this point if it is closer to the source
-            dist = dsqrt((x_target_source-dble(xstore(iglob)))**2 &
-                  +(y_target_source-dble(ystore(iglob)))**2 &
-                  +(z_target_source-dble(zstore(iglob)))**2)
-            if(dist < distmin) then
-              distmin = dist
-              ispec_selected_source(isource) = ispec
-              ix_initial_guess_source = i
-              iy_initial_guess_source = j
-              iz_initial_guess_source = k
-
-              ! store xi,eta,gamma and x,y,z of point found
-              ! note: they have range [1.0d0,NGLLX/Y/Z], used for point sources
-              !          see e.g. in compute_add_source_elastic.f90
-              xi_source(isource) = dble(ix_initial_guess_source)
-              eta_source(isource) = dble(iy_initial_guess_source)
-              gamma_source(isource) = dble(iz_initial_guess_source)
-
-              x_found_source(isource) = xstore(iglob)
-              y_found_source(isource) = ystore(iglob)
-              z_found_source(isource) = zstore(iglob)
-
-              ! compute final distance between asked and found (converted to km)
-              final_distance_source(isource) = dsqrt((x_target_source-x_found_source(isource))**2 + &
-                (y_target_source-y_found_source(isource))**2 + (z_target_source-z_found_source(isource))**2)
-
-            endif
-
-          enddo
-        enddo
-      enddo
-
-    ! end of loop on all the elements in current slice
-    enddo
-
-    if (ispec_selected_source(isource) == 0) then
-      final_distance_source(isource) = HUGEVAL
-    endif
-
-    ! sets whether acoustic (1) or elastic (2)
-    if( ispec_is_acoustic( ispec_selected_source(isource) ) ) then
-      idomain(isource) = IDOMAIN_ACOUSTIC
-    else if( ispec_is_elastic( ispec_selected_source(isource) ) ) then
-      idomain(isource) = IDOMAIN_ELASTIC
-    else
-      idomain(isource) = 0
-    endif
-
-    ! get normal to the face of the hexahedra if receiver is on the surface
-    if ((.not. SOURCES_CAN_BE_BURIED_EXT_MESH) .and. &
-       .not. (ispec_selected_source(isource) == 0)) then
-
-      ! note: at this point, xi_source,.. are in range [1.0d0,NGLLX/Y/Z] for point sources only,
-      !            for non-point sources the range is limited to [2.0d0,NGLLX/Y/Z - 1]
-      if( .not. USE_FORCE_POINT_SOURCE ) call exit_MPI(myrank,'error locate source: no point source at surface')
-
-      ! initialize indices
-      pt0_ix = -1
-      pt0_iy = -1
-      pt0_iz = -1
-      pt1_ix = -1
-      pt1_iy = -1
-      pt1_iz = -1
-      pt2_ix = -1
-      pt2_iy = -1
-      pt2_iz = -1
-
-      ! we get two vectors of the face (three points) to compute the normal
-      if (nint(xi_source(isource)) == 1 .and. &
-         iglob_is_surface_external_mesh(ibool(1,2,2,ispec_selected_source(isource)))) then
-        pt0_ix = 1
-        pt0_iy = NGLLY
-        pt0_iz = 1
-        pt1_ix = 1
-        pt1_iy = 1
-        pt1_iz = 1
-        pt2_ix = 1
-        pt2_iy = NGLLY
-        pt2_iz = NGLLZ
-      endif
-      if (nint(xi_source(isource)) == NGLLX .and. &
-         iglob_is_surface_external_mesh(ibool(NGLLX,2,2,ispec_selected_source(isource)))) then
-        pt0_ix = NGLLX
-        pt0_iy = 1
-        pt0_iz = 1
-        pt1_ix = NGLLX
-        pt1_iy = NGLLY
-        pt1_iz = 1
-        pt2_ix = NGLLX
-        pt2_iy = 1
-        pt2_iz = NGLLZ
-      endif
-      if (nint(eta_source(isource)) == 1 .and. &
-         iglob_is_surface_external_mesh(ibool(2,1,2,ispec_selected_source(isource)))) then
-        pt0_ix = 1
-        pt0_iy = 1
-        pt0_iz = 1
-        pt1_ix = NGLLX
-        pt1_iy = 1
-        pt1_iz = 1
-        pt2_ix = 1
-        pt2_iy = 1
-        pt2_iz = NGLLZ
-      endif
-      if (nint(eta_source(isource)) == NGLLY .and. &
-         iglob_is_surface_external_mesh(ibool(2,NGLLY,2,ispec_selected_source(isource)))) then
-        pt0_ix = NGLLX
-        pt0_iy = NGLLY
-        pt0_iz = 1
-        pt1_ix = 1
-        pt1_iy = NGLLY
-        pt1_iz = 1
-        pt2_ix = NGLLX
-        pt2_iy = NGLLY
-        pt2_iz = NGLLZ
-      endif
-      if (nint(gamma_source(isource)) == 1 .and. &
-         iglob_is_surface_external_mesh(ibool(2,2,1,ispec_selected_source(isource)))) then
-        pt0_ix = NGLLX
-        pt0_iy = 1
-        pt0_iz = 1
-        pt1_ix = 1
-        pt1_iy = 1
-        pt1_iz = 1
-        pt2_ix = NGLLX
-        pt2_iy = NGLLY
-        pt2_iz = 1
-      endif
-      if (nint(gamma_source(isource)) == NGLLZ .and. &
-         iglob_is_surface_external_mesh(ibool(2,2,NGLLZ,ispec_selected_source(isource)))) then
-        pt0_ix = 1
-        pt0_iy = 1
-        pt0_iz = NGLLZ
-        pt1_ix = NGLLX
-        pt1_iy = 1
-        pt1_iz = NGLLZ
-        pt2_ix = 1
-        pt2_iy = NGLLY
-        pt2_iz = NGLLZ
-      endif
-
-      if (pt0_ix<0 .or.pt0_iy<0 .or. pt0_iz<0 .or. &
-         pt1_ix<0 .or. pt1_iy<0 .or. pt1_iz<0 .or. &
-         pt2_ix<0 .or. pt2_iy<0 .or. pt2_iz<0) then
-        call exit_mpi(myrank,'error in computing normal for sources.')
-      endif
-
-      u_vector(1) = xstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
-         - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
-      u_vector(2) = ystore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
-         - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
-      u_vector(3) = zstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
-         - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
-      v_vector(1) = xstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
-         - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
-      v_vector(2) = ystore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
-         - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
-      v_vector(3) = zstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
-         - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
-
-      ! cross product
-      w_vector(1) = u_vector(2)*v_vector(3) - u_vector(3)*v_vector(2)
-      w_vector(2) = u_vector(3)*v_vector(1) - u_vector(1)*v_vector(3)
-      w_vector(3) = u_vector(1)*v_vector(2) - u_vector(2)*v_vector(1)
-
-      ! normalize vector w
-      w_vector(:) = w_vector(:)/sqrt(w_vector(1)**2+w_vector(2)**2+w_vector(3)**2)
-
-      ! build the two other vectors for a direct base: we normalize u, and v=w^u
-      u_vector(:) = u_vector(:)/sqrt(u_vector(1)**2+u_vector(2)**2+u_vector(3)**2)
-      v_vector(1) = w_vector(2)*u_vector(3) - w_vector(3)*u_vector(2)
-      v_vector(2) = w_vector(3)*u_vector(1) - w_vector(1)*u_vector(3)
-      v_vector(3) = w_vector(1)*u_vector(2) - w_vector(2)*u_vector(1)
-
-      ! build rotation matrice nu for seismograms
-      !     East (u)
-      nu_source(1,1,isource) = u_vector(1)
-      nu_source(1,2,isource) = v_vector(1)
-      nu_source(1,3,isource) = w_vector(1)
-      !     North (v)
-      nu_source(2,1,isource) = u_vector(2)
-      nu_source(2,2,isource) = v_vector(2)
-      nu_source(2,3,isource) = w_vector(2)
-      !     Vertical (w)
-      nu_source(3,1,isource) = u_vector(3)
-      nu_source(3,2,isource) = v_vector(3)
-      nu_source(3,3,isource) = w_vector(3)
-
-    endif ! of if (.not. SOURCES_CAN_BE_BURIED_EXT_MESH)
-
-! *******************************************
-! find the best (xi,eta,gamma) for the source
-! *******************************************
-
-    ! for point sources, the location will be exactly at a GLL point
-    ! otherwise this tries to find best location
-    if(.not. USE_FORCE_POINT_SOURCE) then
-
-      ! uses actual location interpolators, in range [-1,1]
-      xi = xigll(ix_initial_guess_source)
-      eta = yigll(iy_initial_guess_source)
-      gamma = zigll(iz_initial_guess_source)
-
-      ! define coordinates of the control points of the element
-      do ia=1,NGNOD
-        iax = 0
-        iay = 0
-        iaz = 0
-        if(iaddx(ia) == 0) then
-          iax = 1
-        else if(iaddx(ia) == 1) then
-          iax = (NGLLX+1)/2
-        else if(iaddx(ia) == 2) then
-          iax = NGLLX
-        else
-          call exit_MPI(myrank,'incorrect value of iaddx')
-        endif
-
-        if(iaddy(ia) == 0) then
-          iay = 1
-        else if(iaddy(ia) == 1) then
-          iay = (NGLLY+1)/2
-        else if(iaddy(ia) == 2) then
-          iay = NGLLY
-        else
-          call exit_MPI(myrank,'incorrect value of iaddy')
-        endif
-
-        if(iaddz(ia) == 0) then
-          iaz = 1
-        else if(iaddz(ia) == 1) then
-          iaz = (NGLLZ+1)/2
-        else if(iaddz(ia) == 2) then
-          iaz = NGLLZ
-        else
-          call exit_MPI(myrank,'incorrect value of iaddz')
-        endif
-
-        iglob = ibool(iax,iay,iaz,ispec_selected_source(isource))
-        xelm(ia) = dble(xstore(iglob))
-        yelm(ia) = dble(ystore(iglob))
-        zelm(ia) = dble(zstore(iglob))
-
-      enddo
-
-      ! iterate to solve the non linear system
-      do iter_loop = 1,NUM_ITER
-
-        ! recompute jacobian for the new point
-        call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
-           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
-
-        ! compute distance to target location
-        dx = - (x - x_target_source)
-        dy = - (y - y_target_source)
-        dz = - (z - z_target_source)
-
-        ! compute increments
-        dxi  = xix*dx + xiy*dy + xiz*dz
-        deta = etax*dx + etay*dy + etaz*dz
-        dgamma = gammax*dx + gammay*dy + gammaz*dz
-
-        ! update values
-        xi = xi + dxi
-        eta = eta + deta
-        gamma = gamma + dgamma
-
-        ! impose that we stay in that element
-        ! (useful if user gives a source outside the mesh for instance)
-        if (xi > 1.d0) xi = 1.d0
-        if (xi < -1.d0) xi = -1.d0
-        if (eta > 1.d0) eta = 1.d0
-        if (eta < -1.d0) eta = -1.d0
-        if (gamma > 1.d0) gamma = 1.d0
-        if (gamma < -1.d0) gamma = -1.d0
-
-      enddo
-
-      ! compute final coordinates of point found
-      call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
-         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
-
-      ! store xi,eta,gamma and x,y,z of point found
-      ! note: xi/eta/gamma will be in range [-1,1]
-      xi_source(isource) = xi
-      eta_source(isource) = eta
-      gamma_source(isource) = gamma
-      x_found_source(isource) = x
-      y_found_source(isource) = y
-      z_found_source(isource) = z
-
-      ! compute final distance between asked and found (converted to km)
-      final_distance_source(isource) = dsqrt((x_target_source-x_found_source(isource))**2 + &
-        (y_target_source-y_found_source(isource))**2 + (z_target_source-z_found_source(isource))**2)
-
-    endif ! of if (.not. USE_FORCE_POINT_SOURCE)
-
-  ! end of loop on all the sources
-  enddo
-
-  ! now gather information from all the nodes
-  ngather = NSOURCES/NGATHER_SOURCES
-  if (mod(NSOURCES,NGATHER_SOURCES)/= 0) ngather = ngather+1
-  do ig = 1, ngather
-    ns = (ig-1) * NGATHER_SOURCES + 1
-    ne = min(ig*NGATHER_SOURCES, NSOURCES)
-    ng = ne - ns + 1
-
-    ispec_selected_source_all(:,:) = -1
-
-    ! avoids warnings about temporary creations of arrays for function call by compiler
-    allocate(tmp_i_local(ng),tmp_i_all_local(ng,0:NPROC-1),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array tmp_i_local'
-    tmp_i_local(:) = ispec_selected_source(ns:ne)
-    call gather_all_i(tmp_i_local,ng,tmp_i_all_local,ng,NPROC)
-    ispec_selected_source_all(1:ng,:) = tmp_i_all_local(:,:)
-
-    ! acoustic/elastic domain
-    tmp_i_local(:) = idomain(ns:ne)
-    call gather_all_i(tmp_i_local,ng,tmp_i_all_local,ng,NPROC)
-    idomain_all(1:ng,:) = tmp_i_all_local(:,:)
-
-    deallocate(tmp_i_local,tmp_i_all_local)
-
-    ! avoids warnings about temporary creations of arrays for function call by compiler
-    allocate(tmp_local(ng),tmp_all_local(ng,0:NPROC-1),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array tmp_local'
-    tmp_local(:) = xi_source(ns:ne)
-    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
-    xi_source_all(1:ng,:) = tmp_all_local(:,:)
-
-    tmp_local(:) = eta_source(ns:ne)
-    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
-    eta_source_all(1:ng,:) = tmp_all_local(:,:)
-
-    tmp_local(:) = gamma_source(ns:ne)
-    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
-    gamma_source_all(1:ng,:) = tmp_all_local(:,:)
-
-    tmp_local(:) = final_distance_source(ns:ne)
-    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
-    final_distance_source_all(1:ng,:) = tmp_all_local(:,:)
-
-    tmp_local(:) = x_found_source(ns:ne)
-    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
-    x_found_source_all(1:ng,:) = tmp_all_local(:,:)
-
-    tmp_local(:) = y_found_source(ns:ne)
-    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
-    y_found_source_all(1:ng,:) = tmp_all_local(:,:)
-
-    tmp_local(:) = z_found_source(ns:ne)
-    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
-    z_found_source_all(1:ng,:) = tmp_all_local(:,:)
-
-    do i=1,3
-      do j=1,3
-        tmp_local(:) = nu_source(i,j,ns:ne)
-        call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
-        nu_source_all(i,j,1:ng,:) = tmp_all_local(:,:)
-      enddo
-    enddo
-    deallocate(tmp_local,tmp_all_local)
-
-    ! this is executed by main process only
-    if(myrank == 0) then
-
-      ! check that the gather operation went well
-      if(any(ispec_selected_source_all(1:ng,:) == -1)) call exit_MPI(myrank,'gather operation failed for source')
-
-      ! loop on all the sources
-      do is = 1,ng
-        isource = ns + is - 1
-
-        ! loop on all the results to determine the best slice
-        distmin = HUGEVAL
-        do iprocloop = 0,NPROC-1
-          if(final_distance_source_all(is,iprocloop) < distmin) then
-            distmin = final_distance_source_all(is,iprocloop)
-            islice_selected_source(isource) = iprocloop
-            ispec_selected_source(isource) = ispec_selected_source_all(is,iprocloop)
-            xi_source(isource) = xi_source_all(is,iprocloop)
-            eta_source(isource) = eta_source_all(is,iprocloop)
-            gamma_source(isource) = gamma_source_all(is,iprocloop)
-            x_found_source(isource) = x_found_source_all(is,iprocloop)
-            y_found_source(isource) = y_found_source_all(is,iprocloop)
-            z_found_source(isource) = z_found_source_all(is,iprocloop)
-            nu_source(:,:,isource) = nu_source_all(:,:,isource,iprocloop)
-            idomain(isource) = idomain_all(is,iprocloop)
-          endif
-        enddo
-        final_distance_source(isource) = distmin
-
-      enddo
-    endif !myrank
-  enddo ! ngather
-
-  if (myrank == 0) then
-
-    do isource = 1,NSOURCES
-
-      if(SHOW_DETAILS_LOCATE_SOURCE .or. NSOURCES == 1) then
-
-        write(IMAIN,*)
-        write(IMAIN,*) '*************************************'
-        write(IMAIN,*) ' locating source ',isource
-        write(IMAIN,*) '*************************************'
-        write(IMAIN,*)
-        write(IMAIN,*) 'source located in slice ',islice_selected_source(isource)
-        write(IMAIN,*) '               in element ',ispec_selected_source(isource)
-
-        if( idomain(isource) == IDOMAIN_ACOUSTIC ) then
-          write(IMAIN,*) '               in acoustic domain'
-        else if( idomain(isource) == IDOMAIN_ELASTIC ) then
-          write(IMAIN,*) '               in elastic domain'
-        else
-          write(IMAIN,*) '               in unknown domain'
-        endif
-
-        write(IMAIN,*)
-        if(USE_FORCE_POINT_SOURCE) then
-          write(IMAIN,*) '  i index of source in that element: ',nint(xi_source(isource))
-          write(IMAIN,*) '  j index of source in that element: ',nint(eta_source(isource))
-          write(IMAIN,*) '  k index of source in that element: ',nint(gamma_source(isource))
-          write(IMAIN,*)
-          write(IMAIN,*) '  component direction: ',COMPONENT_FORCE_SOURCE
-          write(IMAIN,*)
-          write(IMAIN,*) '  nu1 = ',nu_source(1,:,isource)
-          write(IMAIN,*) '  nu2 = ',nu_source(2,:,isource)
-          write(IMAIN,*) '  nu3 = ',nu_source(3,:,isource)
-          write(IMAIN,*)
-          write(IMAIN,*) '  at (x,y,z) coordinates = ',x_found_source(isource),y_found_source(isource),z_found_source(isource)
-
-          ! prints frequency content for point forces
-          f0 = hdur(isource)
-          t0_ricker = 1.2d0/f0
-          write(IMAIN,*) '  using a source of dominant frequency ',f0
-          write(IMAIN,*) '  lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
-          write(IMAIN,*) '  lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
-          write(IMAIN,*) '  t0_ricker = ',t0_ricker,'tshift_cmt = ',tshift_cmt(isource)
-          write(IMAIN,*)
-          write(IMAIN,*) '  half duration -> frequency: ',hdur(isource),' seconds**(-1)'
-        else
-          write(IMAIN,*) '  xi coordinate of source in that element: ',xi_source(isource)
-          write(IMAIN,*) '  eta coordinate of source in that element: ',eta_source(isource)
-          write(IMAIN,*) '  gamma coordinate of source in that element: ',gamma_source(isource)
-          write(IMAIN,*)
-          ! add message if source is a Heaviside
-          if(hdur(isource) <= 5.*DT) then
-            write(IMAIN,*)
-            write(IMAIN,*) 'Source time function is a Heaviside, convolve later'
-            write(IMAIN,*)
-          endif
-          write(IMAIN,*) '  half duration: ',hdur(isource),' seconds'
-        endif
-        write(IMAIN,*) '  time shift: ',tshift_cmt(isource),' seconds'
-        write(IMAIN,*)
-        write(IMAIN,*) 'original (requested) position of the source:'
-        write(IMAIN,*)
-        write(IMAIN,*) '          latitude: ',lat(isource)
-        write(IMAIN,*) '         longitude: ',long(isource)
-        write(IMAIN,*)
-        if( SUPPRESS_UTM_PROJECTION ) then
-          write(IMAIN,*) '             x: ',utm_x_source(isource)
-          write(IMAIN,*) '             y: ',utm_y_source(isource)
-        else
-          write(IMAIN,*) '         UTM x: ',utm_x_source(isource)
-          write(IMAIN,*) '         UTM y: ',utm_y_source(isource)
-        endif
-        if( USE_SOURCES_RECVS_Z ) then
-          write(IMAIN,*) '         z: ',depth(isource),' km'
-        else
-          write(IMAIN,*) '         depth: ',depth(isource),' km'
-          write(IMAIN,*) 'topo elevation: ',elevation(isource)
-        endif
-
-        write(IMAIN,*)
-        write(IMAIN,*) 'position of the source that will be used:'
-        write(IMAIN,*)
-        if( SUPPRESS_UTM_PROJECTION ) then
-          write(IMAIN,*) '             x: ',x_found_source(isource)
-          write(IMAIN,*) '             y: ',y_found_source(isource)
-        else
-          write(IMAIN,*) '         UTM x: ',x_found_source(isource)
-          write(IMAIN,*) '         UTM y: ',y_found_source(isource)
-        endif
-        if( USE_SOURCES_RECVS_Z ) then
-          write(IMAIN,*) '             z: ',z_found_source(isource)
-        else
-          write(IMAIN,*) '         depth: ',dabs(z_found_source(isource) - elevation(isource))/1000.,' km'
-          write(IMAIN,*) '             z: ',z_found_source(isource)
-        endif
-        write(IMAIN,*)
-
-        ! display error in location estimate
-        write(IMAIN,*) 'error in location of the source: ',sngl(final_distance_source(isource)),' m'
-
-        ! add warning if estimate is poor
-        ! (usually means source outside the mesh given by the user)
-        if(final_distance_source(isource) > 3000.d0) then
-          write(IMAIN,*)
-          write(IMAIN,*) '*****************************************************'
-          write(IMAIN,*) '*****************************************************'
-          write(IMAIN,*) '***** WARNING: source location estimate is poor *****'
-          write(IMAIN,*) '*****************************************************'
-          write(IMAIN,*) '*****************************************************'
-        endif
-
-      endif  ! end of detailed output to locate source
-
-      ! checks CMTSOLUTION format for acoustic case
-      if( idomain(isource) == IDOMAIN_ACOUSTIC ) then
-        if( Mxx(isource) /= Myy(isource) .or. Myy(isource) /= Mzz(isource) .or. &
-           Mxy(isource) > TINYVAL .or. Mxz(isource) > TINYVAL .or. Myz(isource) > TINYVAL ) then
-            write(IMAIN,*)
-            write(IMAIN,*) ' error CMTSOLUTION format for acoustic source:'
-            write(IMAIN,*) '   acoustic source needs explosive moment tensor with'
-            write(IMAIN,*) '      Mrr = Mtt = Mpp '
-            write(IMAIN,*) '   and '
-            write(IMAIN,*) '      Mrt = Mrp = Mtp = zero'
-            write(IMAIN,*)
-            call exit_mpi(myrank,'error acoustic source')
-        endif
-      endif
-
-      ! checks source domain
-      if( idomain(isource) /= IDOMAIN_ACOUSTIC .and. idomain(isource) /= IDOMAIN_ELASTIC ) then
-        ! only acoustic/elastic domain implement yet
-        call exit_MPI(myrank,'source located in unknown domain')
-      endif
-
-    ! end of loop on all the sources
-    enddo
-
-    if( .not. SHOW_DETAILS_LOCATE_SOURCE .and. NSOURCES > 1 ) then
-        write(IMAIN,*)
-        write(IMAIN,*) '*************************************'
-        write(IMAIN,*) ' using sources ',NSOURCES
-        write(IMAIN,*) '*************************************'
-        write(IMAIN,*)
-    endif
-
-    if(PRINT_SOURCE_TIME_FUNCTION) then
-      write(IMAIN,*)
-      write(IMAIN,*) 'printing the source-time function'
-    endif
-
-    ! display maximum error in location estimate
-    write(IMAIN,*)
-    write(IMAIN,*) 'maximum error in location of the sources: ',sngl(maxval(final_distance_source)),' m'
-    write(IMAIN,*)
-
-    ! sets new utm coordinates for best locations
-    utm_x_source(:) = x_found_source(:)
-    utm_y_source(:) = y_found_source(:)
-
-  endif     ! end of section executed by main process only
-
-  ! main process broadcasts the results to all the slices
-  call bcast_all_i(islice_selected_source,NSOURCES)
-  call bcast_all_i(ispec_selected_source,NSOURCES)
-  call bcast_all_dp(xi_source,NSOURCES)
-  call bcast_all_dp(eta_source,NSOURCES)
-  call bcast_all_dp(gamma_source,NSOURCES)
-  call bcast_all_dp(utm_x_source,NSOURCES)
-  call bcast_all_dp(utm_y_source,NSOURCES)
-
-  ! elapsed time since beginning of source detection
-  if(myrank == 0) then
-    tCPU = wtime() - time_start
-    write(IMAIN,*)
-    write(IMAIN,*) 'Elapsed time for detection of sources in seconds = ',tCPU
-    write(IMAIN,*)
-    write(IMAIN,*) 'End of source detection - done'
-    write(IMAIN,*)
-    ! output source information to a file so that we can load it and write to SU headers later
-    open(unit=IOUT_SU,file=trim(OUTPUT_FILES)//'/output_list_sources.txt',status='unknown')
-    do isource=1,NSOURCES
-      write(IOUT_SU,*) x_found_source(isource),y_found_source(isource),z_found_source(isource)
-    enddo
-    close(IOUT_SU)
-  endif
-
-  end subroutine locate_source
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!----
+!----  locate_source finds the correct position of the source
+!----
+
+  subroutine locate_source(ibool,NSOURCES,myrank,NSPEC_AB,NGLOB_AB,xstore,ystore,zstore, &
+                 xigll,yigll,zigll,NPROC, &
+                 tshift_cmt,min_tshift_cmt_original,yr,jda,ho,mi,utm_x_source,utm_y_source, &
+                 DT,hdur,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
+                 islice_selected_source,ispec_selected_source, &
+                 xi_source,eta_source,gamma_source, &
+                 UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+                 PRINT_SOURCE_TIME_FUNCTION, &
+                 nu_source,iglob_is_surface_external_mesh,ispec_is_surface_external_mesh, &
+                 ispec_is_acoustic,ispec_is_elastic, &
+                 num_free_surface_faces,free_surface_ispec,free_surface_ijk)
+
+  implicit none
+
+  include "constants.h"
+
+  integer NPROC,UTM_PROJECTION_ZONE
+  integer NSPEC_AB,NGLOB_AB,NSOURCES
+
+  logical PRINT_SOURCE_TIME_FUNCTION,SUPPRESS_UTM_PROJECTION
+
+  double precision DT
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+
+  integer myrank
+
+  ! arrays containing coordinates of the points
+  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: xstore,ystore,zstore
+
+  logical, dimension(NSPEC_AB) :: ispec_is_acoustic,ispec_is_elastic
+
+  integer yr,jda,ho,mi
+
+  double precision tshift_cmt(NSOURCES)
+  double precision sec,min_tshift_cmt_original
+
+  integer iprocloop
+
+  integer i,j,k,ispec,iglob,iglob_selected,inode,iface,isource
+  integer imin,imax,jmin,jmax,kmin,kmax,igll,jgll,kgll
+  integer iselected,jselected,iface_selected,iadjust,jadjust
+  integer iproc(1)
+
+  double precision, dimension(NSOURCES) :: utm_x_source,utm_y_source
+  double precision dist
+  double precision xi,eta,gamma,dx,dy,dz,dxi,deta
+
+  ! Gauss-Lobatto-Legendre points of integration
+  double precision xigll(NGLLX)
+  double precision yigll(NGLLY)
+  double precision zigll(NGLLZ)
+
+  ! topology of the control points of the surface element
+  integer iax,iay,iaz
+  integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+  ! coordinates of the control points of the surface element
+  double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
+
+  integer iter_loop
+
+  integer ia
+  double precision x,y,z
+  double precision xix,xiy,xiz
+  double precision etax,etay,etaz
+  double precision gammax,gammay,gammaz
+  double precision dgamma
+
+  double precision final_distance_source(NSOURCES)
+
+  double precision x_target_source,y_target_source,z_target_source
+
+  double precision,dimension(1) :: altitude_source,distmin_ele
+  double precision,dimension(NPROC) :: distmin_ele_all,elevation_all
+  double precision,dimension(4) :: elevation_node,dist_node
+
+  integer islice_selected_source(NSOURCES)
+
+  ! timer MPI
+  double precision, external :: wtime
+  double precision time_start,tCPU
+
+  integer ispec_selected_source(NSOURCES)
+
+  integer ngather, ns, ne, ig, is, ng
+
+  integer, dimension(NGATHER_SOURCES,0:NPROC-1) :: ispec_selected_source_all
+  double precision, dimension(NGATHER_SOURCES,0:NPROC-1) :: xi_source_all,eta_source_all,gamma_source_all, &
+     final_distance_source_all,x_found_source_all,y_found_source_all,z_found_source_all
+  double precision, dimension(3,3,NGATHER_SOURCES,0:NPROC-1) :: nu_source_all
+
+  double precision, dimension(:), allocatable :: tmp_local
+  double precision, dimension(:,:),allocatable :: tmp_all_local
+
+  double precision hdur(NSOURCES)
+  double precision :: f0,t0_ricker
+
+  double precision, dimension(NSOURCES) :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
+  double precision, dimension(NSOURCES) :: xi_source,eta_source,gamma_source
+  double precision, dimension(3,3,NSOURCES) :: nu_source
+
+  double precision, dimension(NSOURCES) :: lat,long,depth
+  double precision moment_tensor(6,NSOURCES)
+
+  character(len=256) OUTPUT_FILES
+
+  double precision, dimension(NSOURCES) :: x_found_source,y_found_source,z_found_source
+  double precision, dimension(NSOURCES) :: elevation
+  double precision distmin
+
+  integer, dimension(:), allocatable :: tmp_i_local
+  integer, dimension(:,:),allocatable :: tmp_i_all_local
+
+  ! for surface locating and normal computing with external mesh
+  integer :: pt0_ix,pt0_iy,pt0_iz,pt1_ix,pt1_iy,pt1_iz,pt2_ix,pt2_iy,pt2_iz
+  integer :: num_free_surface_faces
+  real(kind=CUSTOM_REAL), dimension(3) :: u_vector,v_vector,w_vector
+  logical, dimension(NGLOB_AB) :: iglob_is_surface_external_mesh
+  logical, dimension(NSPEC_AB) :: ispec_is_surface_external_mesh
+  integer, dimension(num_free_surface_faces) :: free_surface_ispec
+  integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
+
+  integer ix_initial_guess_source,iy_initial_guess_source,iz_initial_guess_source
+  integer ier
+  integer, dimension(NSOURCES) :: idomain
+  integer, dimension(NGATHER_SOURCES,0:NPROC-1) :: idomain_all
+
+  ! get the base pathname for output files
+  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
+
+  ! read all the sources (note: each process reads the source file)
+  call get_cmt(yr,jda,ho,mi,sec,tshift_cmt,hdur,lat,long,depth,moment_tensor, &
+              DT,NSOURCES,min_tshift_cmt_original)
+
+  ! define topology of the control element
+  call usual_hex_nodes(iaddx,iaddy,iaddz)
+
+  ! get MPI starting time
+  time_start = wtime()
+
+  ! user output
+  if( myrank == 0 ) then
+    if(SUPPRESS_UTM_PROJECTION ) then
+      write(IMAIN,*) 'no UTM projection:'
+    else
+      write(IMAIN,*) 'UTM projection:'
+      write(IMAIN,*) '  UTM zone: ',UTM_PROJECTION_ZONE
+    endif
+    if( USE_SOURCES_RECVS_Z ) then
+      write(IMAIN,*) '  (depth) becomes directly (z) coordinate'
+    endif
+  endif
+
+  ! loop on all the sources
+  do isource = 1,NSOURCES
+
+    !
+    ! r -> z, theta -> -y, phi -> x
+    !
+    !  Mrr =  Mzz
+    !  Mtt =  Myy
+    !  Mpp =  Mxx
+    !  Mrt = -Myz
+    !  Mrp =  Mxz
+    !  Mtp = -Mxy
+
+    ! get the moment tensor
+    Mzz(isource) = + moment_tensor(1,isource)
+    Mxx(isource) = + moment_tensor(3,isource)
+    Myy(isource) = + moment_tensor(2,isource)
+    Mxz(isource) = + moment_tensor(5,isource)
+    Myz(isource) = - moment_tensor(4,isource)
+    Mxy(isource) = - moment_tensor(6,isource)
+
+    ! gets UTM x,y
+    call utm_geo(long(isource),lat(isource),utm_x_source(isource),utm_y_source(isource), &
+                   UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
+
+    ! get approximate topography elevation at source long/lat coordinates
+    ! set distance to huge initial value
+    distmin = HUGEVAL
+    if(num_free_surface_faces > 0) then
+      iglob_selected = 1
+      ! loop only on points inside the element
+      ! exclude edges to ensure this point is not shared with other elements
+      imin = 2
+      imax = NGLLX - 1
+
+      jmin = 2
+      jmax = NGLLY - 1
+
+      iselected = 0
+      jselected = 0
+      iface_selected = 0
+      do iface=1,num_free_surface_faces
+        do j=jmin,jmax
+          do i=imin,imax
+
+            ispec = free_surface_ispec(iface)
+            igll = free_surface_ijk(1,(j-1)*NGLLY+i,iface)
+            jgll = free_surface_ijk(2,(j-1)*NGLLY+i,iface)
+            kgll = free_surface_ijk(3,(j-1)*NGLLY+i,iface)
+            iglob = ibool(igll,jgll,kgll,ispec)
+
+            ! keep this point if it is closer to the receiver
+            dist = dsqrt((utm_x_source(isource)-dble(xstore(iglob)))**2 + &
+                     (utm_y_source(isource)-dble(ystore(iglob)))**2)
+            if(dist < distmin) then
+              distmin = dist
+              iglob_selected = iglob
+              iface_selected = iface
+              iselected = i
+              jselected = j
+              altitude_source(1) = zstore(iglob_selected)
+            endif
+          enddo
+        enddo
+        ! end of loop on all the elements on the free surface
+      end do
+      !  weighted mean at current point of topography elevation of the four closest nodes
+      !  set distance to huge initial value
+      distmin = HUGEVAL
+      do j=jselected,jselected+1
+        do i=iselected,iselected+1
+          inode = 1
+          do jadjust=0,1
+            do iadjust= 0,1
+              ispec = free_surface_ispec(iface_selected)
+              igll = free_surface_ijk(1,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+              jgll = free_surface_ijk(2,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+              kgll = free_surface_ijk(3,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+              iglob = ibool(igll,jgll,kgll,ispec)
+
+              elevation_node(inode) = zstore(iglob)
+              dist_node(inode) = dsqrt((utm_x_source(isource)-dble(xstore(iglob)))**2 + &
+                        (utm_y_source(isource)-dble(ystore(iglob)))**2)
+              inode = inode + 1
+            end do
+          end do
+          dist = sum(dist_node)
+          if(dist < distmin) then
+            distmin = dist
+            altitude_source(1) = (dist_node(1)/dist)*elevation_node(1) + &
+                     (dist_node(2)/dist)*elevation_node(2) + &
+                     (dist_node(3)/dist)*elevation_node(3) + &
+                     (dist_node(4)/dist)*elevation_node(4)
+          endif
+        end do
+      end do
+    end if
+    !  MPI communications to determine the best slice
+    distmin_ele(1)= distmin
+    call gather_all_dp(distmin_ele,1,distmin_ele_all,1,NPROC)
+    call gather_all_dp(altitude_source,1,elevation_all,1,NPROC)
+    if(myrank == 0) then
+      iproc = minloc(distmin_ele_all)
+      altitude_source(1) = elevation_all(iproc(1))
+    end if
+    call bcast_all_dp(altitude_source,1)
+    elevation(isource) = altitude_source(1)
+
+    ! orientation consistent with the UTM projection
+    !     East
+    nu_source(1,1,isource) = 1.d0
+    nu_source(1,2,isource) = 0.d0
+    nu_source(1,3,isource) = 0.d0
+    !     North
+    nu_source(2,1,isource) = 0.d0
+    nu_source(2,2,isource) = 1.d0
+    nu_source(2,3,isource) = 0.d0
+    !     Vertical
+    nu_source(3,1,isource) = 0.d0
+    nu_source(3,2,isource) = 0.d0
+    nu_source(3,3,isource) = 1.d0
+
+    x_target_source = utm_x_source(isource)
+    y_target_source = utm_y_source(isource)
+
+    ! source Z coordinate
+    if( USE_SOURCES_RECVS_Z ) then
+      ! alternative: depth is given as z value directly
+      z_target_source = depth(isource)
+    else
+      ! depth in CMTSOLUTION given in km
+      z_target_source =  - depth(isource)*1000.0d0 + elevation(isource)
+    endif
+
+    ! set distance to huge initial value
+    distmin = HUGEVAL
+
+    ispec_selected_source(isource) = 0
+    ix_initial_guess_source = 0
+    iy_initial_guess_source = 0
+    iz_initial_guess_source = 0
+    do ispec=1,NSPEC_AB
+
+      ! define the interval in which we look for points
+      if(USE_FORCE_POINT_SOURCE) then
+        imin = 1
+        imax = NGLLX
+
+        jmin = 1
+        jmax = NGLLY
+
+        kmin = 1
+        kmax = NGLLZ
+
+      else
+        ! loop only on points inside the element
+        ! exclude edges to ensure this point is not shared with other elements
+        imin = 2
+        imax = NGLLX - 1
+
+        jmin = 2
+        jmax = NGLLY - 1
+
+        kmin = 2
+        kmax = NGLLZ - 1
+      endif
+
+      do k = kmin,kmax
+        do j = jmin,jmax
+          do i = imin,imax
+
+            iglob = ibool(i,j,k,ispec)
+
+            if (.not. SOURCES_CAN_BE_BURIED_EXT_MESH) then
+              if ((.not. iglob_is_surface_external_mesh(iglob)) .or. (.not. ispec_is_surface_external_mesh(ispec))) then
+                cycle
+              endif
+            endif
+
+            ! keep this point if it is closer to the source
+            dist = dsqrt((x_target_source-dble(xstore(iglob)))**2 &
+                  +(y_target_source-dble(ystore(iglob)))**2 &
+                  +(z_target_source-dble(zstore(iglob)))**2)
+            if(dist < distmin) then
+              distmin = dist
+              ispec_selected_source(isource) = ispec
+              ix_initial_guess_source = i
+              iy_initial_guess_source = j
+              iz_initial_guess_source = k
+
+              ! store xi,eta,gamma and x,y,z of point found
+              ! note: they have range [1.0d0,NGLLX/Y/Z], used for point sources
+              !          see e.g. in compute_add_source_elastic.f90
+              xi_source(isource) = dble(ix_initial_guess_source)
+              eta_source(isource) = dble(iy_initial_guess_source)
+              gamma_source(isource) = dble(iz_initial_guess_source)
+
+              x_found_source(isource) = xstore(iglob)
+              y_found_source(isource) = ystore(iglob)
+              z_found_source(isource) = zstore(iglob)
+
+              ! compute final distance between asked and found (converted to km)
+              final_distance_source(isource) = dsqrt((x_target_source-x_found_source(isource))**2 + &
+                (y_target_source-y_found_source(isource))**2 + (z_target_source-z_found_source(isource))**2)
+
+            endif
+
+          enddo
+        enddo
+      enddo
+
+    ! end of loop on all the elements in current slice
+    enddo
+
+    if (ispec_selected_source(isource) == 0) then
+      final_distance_source(isource) = HUGEVAL
+    endif
+
+    ! sets whether acoustic (1) or elastic (2)
+    if( ispec_is_acoustic( ispec_selected_source(isource) ) ) then
+      idomain(isource) = IDOMAIN_ACOUSTIC
+    else if( ispec_is_elastic( ispec_selected_source(isource) ) ) then
+      idomain(isource) = IDOMAIN_ELASTIC
+    else
+      idomain(isource) = 0
+    endif
+
+    ! get normal to the face of the hexahedra if receiver is on the surface
+    if ((.not. SOURCES_CAN_BE_BURIED_EXT_MESH) .and. &
+       .not. (ispec_selected_source(isource) == 0)) then
+
+      ! note: at this point, xi_source,.. are in range [1.0d0,NGLLX/Y/Z] for point sources only,
+      !            for non-point sources the range is limited to [2.0d0,NGLLX/Y/Z - 1]
+      if( .not. USE_FORCE_POINT_SOURCE ) call exit_MPI(myrank,'error locate source: no point source at surface')
+
+      ! initialize indices
+      pt0_ix = -1
+      pt0_iy = -1
+      pt0_iz = -1
+      pt1_ix = -1
+      pt1_iy = -1
+      pt1_iz = -1
+      pt2_ix = -1
+      pt2_iy = -1
+      pt2_iz = -1
+
+      ! we get two vectors of the face (three points) to compute the normal
+      if (nint(xi_source(isource)) == 1 .and. &
+         iglob_is_surface_external_mesh(ibool(1,2,2,ispec_selected_source(isource)))) then
+        pt0_ix = 1
+        pt0_iy = NGLLY
+        pt0_iz = 1
+        pt1_ix = 1
+        pt1_iy = 1
+        pt1_iz = 1
+        pt2_ix = 1
+        pt2_iy = NGLLY
+        pt2_iz = NGLLZ
+      endif
+      if (nint(xi_source(isource)) == NGLLX .and. &
+         iglob_is_surface_external_mesh(ibool(NGLLX,2,2,ispec_selected_source(isource)))) then
+        pt0_ix = NGLLX
+        pt0_iy = 1
+        pt0_iz = 1
+        pt1_ix = NGLLX
+        pt1_iy = NGLLY
+        pt1_iz = 1
+        pt2_ix = NGLLX
+        pt2_iy = 1
+        pt2_iz = NGLLZ
+      endif
+      if (nint(eta_source(isource)) == 1 .and. &
+         iglob_is_surface_external_mesh(ibool(2,1,2,ispec_selected_source(isource)))) then
+        pt0_ix = 1
+        pt0_iy = 1
+        pt0_iz = 1
+        pt1_ix = NGLLX
+        pt1_iy = 1
+        pt1_iz = 1
+        pt2_ix = 1
+        pt2_iy = 1
+        pt2_iz = NGLLZ
+      endif
+      if (nint(eta_source(isource)) == NGLLY .and. &
+         iglob_is_surface_external_mesh(ibool(2,NGLLY,2,ispec_selected_source(isource)))) then
+        pt0_ix = NGLLX
+        pt0_iy = NGLLY
+        pt0_iz = 1
+        pt1_ix = 1
+        pt1_iy = NGLLY
+        pt1_iz = 1
+        pt2_ix = NGLLX
+        pt2_iy = NGLLY
+        pt2_iz = NGLLZ
+      endif
+      if (nint(gamma_source(isource)) == 1 .and. &
+         iglob_is_surface_external_mesh(ibool(2,2,1,ispec_selected_source(isource)))) then
+        pt0_ix = NGLLX
+        pt0_iy = 1
+        pt0_iz = 1
+        pt1_ix = 1
+        pt1_iy = 1
+        pt1_iz = 1
+        pt2_ix = NGLLX
+        pt2_iy = NGLLY
+        pt2_iz = 1
+      endif
+      if (nint(gamma_source(isource)) == NGLLZ .and. &
+         iglob_is_surface_external_mesh(ibool(2,2,NGLLZ,ispec_selected_source(isource)))) then
+        pt0_ix = 1
+        pt0_iy = 1
+        pt0_iz = NGLLZ
+        pt1_ix = NGLLX
+        pt1_iy = 1
+        pt1_iz = NGLLZ
+        pt2_ix = 1
+        pt2_iy = NGLLY
+        pt2_iz = NGLLZ
+      endif
+
+      if (pt0_ix<0 .or.pt0_iy<0 .or. pt0_iz<0 .or. &
+         pt1_ix<0 .or. pt1_iy<0 .or. pt1_iz<0 .or. &
+         pt2_ix<0 .or. pt2_iy<0 .or. pt2_iz<0) then
+        call exit_mpi(myrank,'error in computing normal for sources.')
+      endif
+
+      u_vector(1) = xstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
+         - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+      u_vector(2) = ystore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
+         - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+      u_vector(3) = zstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
+         - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+      v_vector(1) = xstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
+         - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+      v_vector(2) = ystore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
+         - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+      v_vector(3) = zstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
+         - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+
+      ! cross product
+      w_vector(1) = u_vector(2)*v_vector(3) - u_vector(3)*v_vector(2)
+      w_vector(2) = u_vector(3)*v_vector(1) - u_vector(1)*v_vector(3)
+      w_vector(3) = u_vector(1)*v_vector(2) - u_vector(2)*v_vector(1)
+
+      ! normalize vector w
+      w_vector(:) = w_vector(:)/sqrt(w_vector(1)**2+w_vector(2)**2+w_vector(3)**2)
+
+      ! build the two other vectors for a direct base: we normalize u, and v=w^u
+      u_vector(:) = u_vector(:)/sqrt(u_vector(1)**2+u_vector(2)**2+u_vector(3)**2)
+      v_vector(1) = w_vector(2)*u_vector(3) - w_vector(3)*u_vector(2)
+      v_vector(2) = w_vector(3)*u_vector(1) - w_vector(1)*u_vector(3)
+      v_vector(3) = w_vector(1)*u_vector(2) - w_vector(2)*u_vector(1)
+
+      ! build rotation matrice nu for seismograms
+      !     East (u)
+      nu_source(1,1,isource) = u_vector(1)
+      nu_source(1,2,isource) = v_vector(1)
+      nu_source(1,3,isource) = w_vector(1)
+      !     North (v)
+      nu_source(2,1,isource) = u_vector(2)
+      nu_source(2,2,isource) = v_vector(2)
+      nu_source(2,3,isource) = w_vector(2)
+      !     Vertical (w)
+      nu_source(3,1,isource) = u_vector(3)
+      nu_source(3,2,isource) = v_vector(3)
+      nu_source(3,3,isource) = w_vector(3)
+
+    endif ! of if (.not. SOURCES_CAN_BE_BURIED_EXT_MESH)
+
+! *******************************************
+! find the best (xi,eta,gamma) for the source
+! *******************************************
+
+    ! for point sources, the location will be exactly at a GLL point
+    ! otherwise this tries to find best location
+    if(.not. USE_FORCE_POINT_SOURCE) then
+
+      ! uses actual location interpolators, in range [-1,1]
+      xi = xigll(ix_initial_guess_source)
+      eta = yigll(iy_initial_guess_source)
+      gamma = zigll(iz_initial_guess_source)
+
+      ! define coordinates of the control points of the element
+      do ia=1,NGNOD
+        iax = 0
+        iay = 0
+        iaz = 0
+        if(iaddx(ia) == 0) then
+          iax = 1
+        else if(iaddx(ia) == 1) then
+          iax = (NGLLX+1)/2
+        else if(iaddx(ia) == 2) then
+          iax = NGLLX
+        else
+          call exit_MPI(myrank,'incorrect value of iaddx')
+        endif
+
+        if(iaddy(ia) == 0) then
+          iay = 1
+        else if(iaddy(ia) == 1) then
+          iay = (NGLLY+1)/2
+        else if(iaddy(ia) == 2) then
+          iay = NGLLY
+        else
+          call exit_MPI(myrank,'incorrect value of iaddy')
+        endif
+
+        if(iaddz(ia) == 0) then
+          iaz = 1
+        else if(iaddz(ia) == 1) then
+          iaz = (NGLLZ+1)/2
+        else if(iaddz(ia) == 2) then
+          iaz = NGLLZ
+        else
+          call exit_MPI(myrank,'incorrect value of iaddz')
+        endif
+
+        iglob = ibool(iax,iay,iaz,ispec_selected_source(isource))
+        xelm(ia) = dble(xstore(iglob))
+        yelm(ia) = dble(ystore(iglob))
+        zelm(ia) = dble(zstore(iglob))
+
+      enddo
+
+      ! iterate to solve the non linear system
+      do iter_loop = 1,NUM_ITER
+
+        ! recompute jacobian for the new point
+        call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+
+        ! compute distance to target location
+        dx = - (x - x_target_source)
+        dy = - (y - y_target_source)
+        dz = - (z - z_target_source)
+
+        ! compute increments
+        dxi  = xix*dx + xiy*dy + xiz*dz
+        deta = etax*dx + etay*dy + etaz*dz
+        dgamma = gammax*dx + gammay*dy + gammaz*dz
+
+        ! update values
+        xi = xi + dxi
+        eta = eta + deta
+        gamma = gamma + dgamma
+
+        ! impose that we stay in that element
+        ! (useful if user gives a source outside the mesh for instance)
+        if (xi > 1.d0) xi = 1.d0
+        if (xi < -1.d0) xi = -1.d0
+        if (eta > 1.d0) eta = 1.d0
+        if (eta < -1.d0) eta = -1.d0
+        if (gamma > 1.d0) gamma = 1.d0
+        if (gamma < -1.d0) gamma = -1.d0
+
+      enddo
+
+      ! compute final coordinates of point found
+      call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+
+      ! store xi,eta,gamma and x,y,z of point found
+      ! note: xi/eta/gamma will be in range [-1,1]
+      xi_source(isource) = xi
+      eta_source(isource) = eta
+      gamma_source(isource) = gamma
+      x_found_source(isource) = x
+      y_found_source(isource) = y
+      z_found_source(isource) = z
+
+      ! compute final distance between asked and found (converted to km)
+      final_distance_source(isource) = dsqrt((x_target_source-x_found_source(isource))**2 + &
+        (y_target_source-y_found_source(isource))**2 + (z_target_source-z_found_source(isource))**2)
+
+    endif ! of if (.not. USE_FORCE_POINT_SOURCE)
+
+  ! end of loop on all the sources
+  enddo
+
+  ! now gather information from all the nodes
+  ngather = NSOURCES/NGATHER_SOURCES
+  if (mod(NSOURCES,NGATHER_SOURCES)/= 0) ngather = ngather+1
+  do ig = 1, ngather
+    ns = (ig-1) * NGATHER_SOURCES + 1
+    ne = min(ig*NGATHER_SOURCES, NSOURCES)
+    ng = ne - ns + 1
+
+    ispec_selected_source_all(:,:) = -1
+
+    ! avoids warnings about temporary creations of arrays for function call by compiler
+    allocate(tmp_i_local(ng),tmp_i_all_local(ng,0:NPROC-1),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array tmp_i_local'
+    tmp_i_local(:) = ispec_selected_source(ns:ne)
+    call gather_all_i(tmp_i_local,ng,tmp_i_all_local,ng,NPROC)
+    ispec_selected_source_all(1:ng,:) = tmp_i_all_local(:,:)
+
+    ! acoustic/elastic domain
+    tmp_i_local(:) = idomain(ns:ne)
+    call gather_all_i(tmp_i_local,ng,tmp_i_all_local,ng,NPROC)
+    idomain_all(1:ng,:) = tmp_i_all_local(:,:)
+
+    deallocate(tmp_i_local,tmp_i_all_local)
+
+    ! avoids warnings about temporary creations of arrays for function call by compiler
+    allocate(tmp_local(ng),tmp_all_local(ng,0:NPROC-1),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array tmp_local'
+    tmp_local(:) = xi_source(ns:ne)
+    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+    xi_source_all(1:ng,:) = tmp_all_local(:,:)
+
+    tmp_local(:) = eta_source(ns:ne)
+    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+    eta_source_all(1:ng,:) = tmp_all_local(:,:)
+
+    tmp_local(:) = gamma_source(ns:ne)
+    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+    gamma_source_all(1:ng,:) = tmp_all_local(:,:)
+
+    tmp_local(:) = final_distance_source(ns:ne)
+    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+    final_distance_source_all(1:ng,:) = tmp_all_local(:,:)
+
+    tmp_local(:) = x_found_source(ns:ne)
+    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+    x_found_source_all(1:ng,:) = tmp_all_local(:,:)
+
+    tmp_local(:) = y_found_source(ns:ne)
+    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+    y_found_source_all(1:ng,:) = tmp_all_local(:,:)
+
+    tmp_local(:) = z_found_source(ns:ne)
+    call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+    z_found_source_all(1:ng,:) = tmp_all_local(:,:)
+
+    do i=1,3
+      do j=1,3
+        tmp_local(:) = nu_source(i,j,ns:ne)
+        call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+        nu_source_all(i,j,1:ng,:) = tmp_all_local(:,:)
+      enddo
+    enddo
+    deallocate(tmp_local,tmp_all_local)
+
+    ! this is executed by main process only
+    if(myrank == 0) then
+
+      ! check that the gather operation went well
+      if(any(ispec_selected_source_all(1:ng,:) == -1)) call exit_MPI(myrank,'gather operation failed for source')
+
+      ! loop on all the sources
+      do is = 1,ng
+        isource = ns + is - 1
+
+        ! loop on all the results to determine the best slice
+        distmin = HUGEVAL
+        do iprocloop = 0,NPROC-1
+          if(final_distance_source_all(is,iprocloop) < distmin) then
+            distmin = final_distance_source_all(is,iprocloop)
+            islice_selected_source(isource) = iprocloop
+            ispec_selected_source(isource) = ispec_selected_source_all(is,iprocloop)
+            xi_source(isource) = xi_source_all(is,iprocloop)
+            eta_source(isource) = eta_source_all(is,iprocloop)
+            gamma_source(isource) = gamma_source_all(is,iprocloop)
+            x_found_source(isource) = x_found_source_all(is,iprocloop)
+            y_found_source(isource) = y_found_source_all(is,iprocloop)
+            z_found_source(isource) = z_found_source_all(is,iprocloop)
+            nu_source(:,:,isource) = nu_source_all(:,:,isource,iprocloop)
+            idomain(isource) = idomain_all(is,iprocloop)
+          endif
+        enddo
+        final_distance_source(isource) = distmin
+
+      enddo
+    endif !myrank
+  enddo ! ngather
+
+  if (myrank == 0) then
+
+    do isource = 1,NSOURCES
+
+      if(SHOW_DETAILS_LOCATE_SOURCE .or. NSOURCES == 1) then
+
+        write(IMAIN,*)
+        write(IMAIN,*) '*************************************'
+        write(IMAIN,*) ' locating source ',isource
+        write(IMAIN,*) '*************************************'
+        write(IMAIN,*)
+        write(IMAIN,*) 'source located in slice ',islice_selected_source(isource)
+        write(IMAIN,*) '               in element ',ispec_selected_source(isource)
+
+        if( idomain(isource) == IDOMAIN_ACOUSTIC ) then
+          write(IMAIN,*) '               in acoustic domain'
+        else if( idomain(isource) == IDOMAIN_ELASTIC ) then
+          write(IMAIN,*) '               in elastic domain'
+        else
+          write(IMAIN,*) '               in unknown domain'
+        endif
+
+        write(IMAIN,*)
+        if(USE_FORCE_POINT_SOURCE) then
+          write(IMAIN,*) '  i index of source in that element: ',nint(xi_source(isource))
+          write(IMAIN,*) '  j index of source in that element: ',nint(eta_source(isource))
+          write(IMAIN,*) '  k index of source in that element: ',nint(gamma_source(isource))
+          write(IMAIN,*)
+          write(IMAIN,*) '  component direction: ',COMPONENT_FORCE_SOURCE
+          write(IMAIN,*)
+          write(IMAIN,*) '  nu1 = ',nu_source(1,:,isource)
+          write(IMAIN,*) '  nu2 = ',nu_source(2,:,isource)
+          write(IMAIN,*) '  nu3 = ',nu_source(3,:,isource)
+          write(IMAIN,*)
+          write(IMAIN,*) '  at (x,y,z) coordinates = ',x_found_source(isource),y_found_source(isource),z_found_source(isource)
+
+          ! prints frequency content for point forces
+          f0 = hdur(isource)
+          t0_ricker = 1.2d0/f0
+          write(IMAIN,*) '  using a source of dominant frequency ',f0
+          write(IMAIN,*) '  lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+          write(IMAIN,*) '  lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+          write(IMAIN,*) '  t0_ricker = ',t0_ricker,'tshift_cmt = ',tshift_cmt(isource)
+          write(IMAIN,*)
+          write(IMAIN,*) '  half duration -> frequency: ',hdur(isource),' seconds**(-1)'
+        else
+          write(IMAIN,*) '  xi coordinate of source in that element: ',xi_source(isource)
+          write(IMAIN,*) '  eta coordinate of source in that element: ',eta_source(isource)
+          write(IMAIN,*) '  gamma coordinate of source in that element: ',gamma_source(isource)
+          write(IMAIN,*)
+          ! add message if source is a Heaviside
+          if(hdur(isource) <= 5.*DT) then
+            write(IMAIN,*)
+            write(IMAIN,*) 'Source time function is a Heaviside, convolve later'
+            write(IMAIN,*)
+          endif
+          write(IMAIN,*) '  half duration: ',hdur(isource),' seconds'
+        endif
+        write(IMAIN,*) '  time shift: ',tshift_cmt(isource),' seconds'
+        write(IMAIN,*)
+        write(IMAIN,*) 'original (requested) position of the source:'
+        write(IMAIN,*)
+        write(IMAIN,*) '          latitude: ',lat(isource)
+        write(IMAIN,*) '         longitude: ',long(isource)
+        write(IMAIN,*)
+        if( SUPPRESS_UTM_PROJECTION ) then
+          write(IMAIN,*) '             x: ',utm_x_source(isource)
+          write(IMAIN,*) '             y: ',utm_y_source(isource)
+        else
+          write(IMAIN,*) '         UTM x: ',utm_x_source(isource)
+          write(IMAIN,*) '         UTM y: ',utm_y_source(isource)
+        endif
+        if( USE_SOURCES_RECVS_Z ) then
+          write(IMAIN,*) '         z: ',depth(isource),' km'
+        else
+          write(IMAIN,*) '         depth: ',depth(isource),' km'
+          write(IMAIN,*) 'topo elevation: ',elevation(isource)
+        endif
+
+        write(IMAIN,*)
+        write(IMAIN,*) 'position of the source that will be used:'
+        write(IMAIN,*)
+        if( SUPPRESS_UTM_PROJECTION ) then
+          write(IMAIN,*) '             x: ',x_found_source(isource)
+          write(IMAIN,*) '             y: ',y_found_source(isource)
+        else
+          write(IMAIN,*) '         UTM x: ',x_found_source(isource)
+          write(IMAIN,*) '         UTM y: ',y_found_source(isource)
+        endif
+        if( USE_SOURCES_RECVS_Z ) then
+          write(IMAIN,*) '             z: ',z_found_source(isource)
+        else
+          write(IMAIN,*) '         depth: ',dabs(z_found_source(isource) - elevation(isource))/1000.,' km'
+          write(IMAIN,*) '             z: ',z_found_source(isource)
+        endif
+        write(IMAIN,*)
+
+        ! display error in location estimate
+        write(IMAIN,*) 'error in location of the source: ',sngl(final_distance_source(isource)),' m'
+
+        ! add warning if estimate is poor
+        ! (usually means source outside the mesh given by the user)
+        if(final_distance_source(isource) > 3000.d0) then
+          write(IMAIN,*)
+          write(IMAIN,*) '*****************************************************'
+          write(IMAIN,*) '*****************************************************'
+          write(IMAIN,*) '***** WARNING: source location estimate is poor *****'
+          write(IMAIN,*) '*****************************************************'
+          write(IMAIN,*) '*****************************************************'
+        endif
+
+      endif  ! end of detailed output to locate source
+
+      ! checks CMTSOLUTION format for acoustic case
+      if( idomain(isource) == IDOMAIN_ACOUSTIC ) then
+        if( Mxx(isource) /= Myy(isource) .or. Myy(isource) /= Mzz(isource) .or. &
+           Mxy(isource) > TINYVAL .or. Mxz(isource) > TINYVAL .or. Myz(isource) > TINYVAL ) then
+            write(IMAIN,*)
+            write(IMAIN,*) ' error CMTSOLUTION format for acoustic source:'
+            write(IMAIN,*) '   acoustic source needs explosive moment tensor with'
+            write(IMAIN,*) '      Mrr = Mtt = Mpp '
+            write(IMAIN,*) '   and '
+            write(IMAIN,*) '      Mrt = Mrp = Mtp = zero'
+            write(IMAIN,*)
+            call exit_mpi(myrank,'error acoustic source')
+        endif
+      endif
+
+      ! checks source domain
+      if( idomain(isource) /= IDOMAIN_ACOUSTIC .and. idomain(isource) /= IDOMAIN_ELASTIC ) then
+        ! only acoustic/elastic domain implement yet
+        call exit_MPI(myrank,'source located in unknown domain')
+      endif
+
+    ! end of loop on all the sources
+    enddo
+
+    if( .not. SHOW_DETAILS_LOCATE_SOURCE .and. NSOURCES > 1 ) then
+        write(IMAIN,*)
+        write(IMAIN,*) '*************************************'
+        write(IMAIN,*) ' using sources ',NSOURCES
+        write(IMAIN,*) '*************************************'
+        write(IMAIN,*)
+    endif
+
+    if(PRINT_SOURCE_TIME_FUNCTION) then
+      write(IMAIN,*)
+      write(IMAIN,*) 'printing the source-time function'
+    endif
+
+    ! display maximum error in location estimate
+    write(IMAIN,*)
+    write(IMAIN,*) 'maximum error in location of the sources: ',sngl(maxval(final_distance_source)),' m'
+    write(IMAIN,*)
+
+    ! sets new utm coordinates for best locations
+    utm_x_source(:) = x_found_source(:)
+    utm_y_source(:) = y_found_source(:)
+
+  endif     ! end of section executed by main process only
+
+  ! main process broadcasts the results to all the slices
+  call bcast_all_i(islice_selected_source,NSOURCES)
+  call bcast_all_i(ispec_selected_source,NSOURCES)
+  call bcast_all_dp(xi_source,NSOURCES)
+  call bcast_all_dp(eta_source,NSOURCES)
+  call bcast_all_dp(gamma_source,NSOURCES)
+  call bcast_all_dp(utm_x_source,NSOURCES)
+  call bcast_all_dp(utm_y_source,NSOURCES)
+
+  ! elapsed time since beginning of source detection
+  if(myrank == 0) then
+    tCPU = wtime() - time_start
+    write(IMAIN,*)
+    write(IMAIN,*) 'Elapsed time for detection of sources in seconds = ',tCPU
+    write(IMAIN,*)
+    write(IMAIN,*) 'End of source detection - done'
+    write(IMAIN,*)
+    ! output source information to a file so that we can load it and write to SU headers later
+    open(unit=IOUT_SU,file=trim(OUTPUT_FILES)//'/output_list_sources.txt',status='unknown')
+    do isource=1,NSOURCES
+      write(IOUT_SU,*) x_found_source(isource),y_found_source(isource),z_found_source(isource)
+    enddo
+    close(IOUT_SU)
+  endif
+
+  end subroutine locate_source
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -75,7 +75,7 @@
 
   end subroutine noise_distribution_direction
 
-subroutine noise_distribution_direction_non_uniform(xcoord_in,ycoord_in,zcoord_in, &
+  subroutine noise_distribution_dir_non_uni(xcoord_in,ycoord_in,zcoord_in, &
                   normal_x_noise_out,normal_y_noise_out,normal_z_noise_out, &
                   mask_noise_out)
   implicit none
@@ -88,7 +88,7 @@
 !PB VARIABLES TO DEFINE THE REGION OF NOISE
   real(kind=CUSTOM_REAL) :: xcoord,ycoord,zcoord,xcoord_center,ycoord_center
   real :: lon,lat,colat,lon_cn,lat_cn,dsigma,d,dmax
-  
+
   ! coordinates "x/y/zcoord_in" actually contain r theta phi, therefore convert back to x y z
   ! call rthetaphi_2_xyz(xcoord,ycoord,zcoord, xcoord_in,ycoord_in,zcoord_in)
   xcoord=xcoord_in
@@ -111,7 +111,9 @@
    colat=atan(sqrt(xcoord**2+ycoord**2)/zcoord)
    lat=(PI/2)-colat
 
-!PB CALCULATE THE DISTANCE BETWEEN CENTER OF NOISE REGION AND EACH POINT OF THE MODEL'S FREE SURFACE  !PB dsigma IS THE "3D" ANGLE BETWEEN THE TWO POINTS, THEN d = R*dsigma   
+!PB CALCULATE THE DISTANCE BETWEEN CENTER OF NOISE REGION AND EACH
+! POINT OF THE MODEL'S FREE SURFACE  !PB dsigma IS THE "3D" ANGLE BETWEEN
+! THE TWO POINTS, THEN d = R*dsigma
   dsigma=acos(sin(lon)*sin(lon_cn)+cos(lon)*cos(lon_cn)*cos(lat-lat_cn))
   d=sqrt(xcoord**2+ycoord**2+zcoord**2)*dsigma
 
@@ -155,8 +157,8 @@
   !******************************** change your noise characteristics above ****************************************
   !*****************************************************************************************************************
 
-  end subroutine noise_distribution_direction_non_uniform
-  
+  end subroutine noise_distribution_dir_non_uni
+
 ! =============================================================================================================
 ! =============================================================================================================
 ! =============================================================================================================
@@ -165,7 +167,7 @@
   subroutine read_parameters_noise(myrank,nrec,NSTEP,nmovie_points, &
                                    islice_selected_rec,xi_receiver,eta_receiver,gamma_receiver,nu, &
                                    noise_sourcearray,xigll,yigll,zigll, &
-                                   ibool, &                                   
+                                   ibool, &
                                    xstore,ystore,zstore, &
                                    irec_master_noise,normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
                                    NSPEC_AB_VAL,NGLOB_AB_VAL, &
@@ -174,9 +176,9 @@
   implicit none
   include "constants.h"
   ! input parameters
-  integer :: myrank, nrec, NSTEP, nmovie_points 
+  integer :: myrank, nrec, NSTEP, nmovie_points
   integer :: NSPEC_AB_VAL,NGLOB_AB_VAL
-  
+
   integer, dimension(nrec) :: islice_selected_rec
 
   integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB_VAL) :: ibool
@@ -187,23 +189,23 @@
   double precision, dimension(NDIM,NDIM,nrec) :: nu
   real(kind=CUSTOM_REAL), dimension(NGLOB_AB_VAL) :: xstore,ystore,zstore
 
-  integer :: num_free_surface_faces 
+  integer :: num_free_surface_faces
   integer, dimension(num_free_surface_faces) :: free_surface_ispec
   integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
-  
+
   logical, dimension(NSPEC_AB_VAL) :: ispec_is_acoustic
 
 !daniel: from global code...
   !integer, dimension(NSPEC2D_TOP_VAL) :: ibelm_top ! equals free_surface_ispec
   !integer :: NSPEC2D_TOP_VAL ! equals num_free_surface_faces
   !integer :: nspec_top ! equals num_free_surface_faces
-  
+
   ! output parameters
   integer :: irec_master_noise
   real(kind=CUSTOM_REAL) :: noise_sourcearray(NDIM,NGLLX,NGLLY,NGLLZ,NSTEP)
   real(kind=CUSTOM_REAL), dimension(nmovie_points) :: normal_x_noise,normal_y_noise,normal_z_noise,mask_noise
   ! local parameters
-  integer :: ipoin,ispec,i,j,k,iglob,ios,iface,igll 
+  integer :: ipoin,ispec,i,j,k,iglob,ios,iface,igll
   real(kind=CUSTOM_REAL) :: normal_x_noise_out,normal_y_noise_out,normal_z_noise_out,mask_noise_out
   character(len=256) :: filename
 
@@ -212,7 +214,7 @@
   open(unit=IIN_NOISE,file=trim(filename),status='old',action='read',iostat=ios)
   if( ios /= 0 ) &
     call exit_MPI(myrank, 'file '//trim(filename)//' does NOT exist! This file contains the ID of the master receiver')
-  
+
   read(IIN_NOISE,*,iostat=ios) irec_master_noise
   if( ios /= 0 ) call exit_MPI(myrank,'error reading file irec_master_noise')
   close(IIN_NOISE)
@@ -274,11 +276,11 @@
         !           mask_noise_out)
 
         ! Setup for NOISE_TOMOGRAPHY by Piero Basini
-        call noise_distribution_direction_non_uniform(xstore(iglob), &
+        call noise_distribution_dir_non_uni(xstore(iglob), &
              ystore(iglob),zstore(iglob), &
              normal_x_noise_out,normal_y_noise_out,normal_z_noise_out, &
              mask_noise_out)
-        
+
         normal_x_noise(ipoin) = normal_x_noise_out
         normal_y_noise(ipoin) = normal_y_noise_out
         normal_z_noise(ipoin) = normal_z_noise_out
@@ -401,19 +403,19 @@
 
     ! size of single record
     reclen=CUSTOM_REAL*NDIM*NGLLSQUARE*NSPEC_TOP
-     
-    ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer 
+
+    ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer
     if( NSPEC_TOP > 2147483647 / (CUSTOM_REAL * NGLLSQUARE * NDIM) ) then
       print *,'reclen of noise surface_movie needed exceeds integer 4-byte limit: ',reclen
       print *,'  ',CUSTOM_REAL, NDIM, NGLLSQUARE, NSPEC_TOP
       print*,'bit size fortran: ',bit_size(NSPEC_TOP)
       call exit_MPI(myrank,"error NSPEC_TOP integer limit")
     endif
-     
+
     ! total file size
     filesize = reclen
     filesize = filesize*NSTEP
-     
+
     write(outputname,"('/proc',i6.6,'_surface_movie')") myrank
     if (NOISE_TOMOGRAPHY==1) call open_file_abs_w(2,trim(LOCAL_PATH)//trim(outputname), &
                                       len_trim(trim(LOCAL_PATH)//trim(outputname)), &
@@ -551,9 +553,9 @@
 
   ! adds noise source (only if this proc carries the noise)
   if(myrank == islice_selected_rec(irec_master_noise)) then
-    
+
     ispec = ispec_selected_rec(irec_master_noise)
-    
+
     ! adds nosie source contributions
     do k=1,NGLLZ
       do j=1,NGLLY
@@ -598,15 +600,15 @@
   !integer :: NSPEC2D_TOP_VAL ! equals num_free_surface_faces
   !integer, dimension(NSPEC2D_TOP_VAL) :: ibelm_top ! equals free_surface_ispec
   !integer :: ispec2D ! equals iface
-  
+
   ! local parameters
-  integer :: ispec,i,j,k,iglob,iface,igll 
+  integer :: ispec,i,j,k,iglob,iface,igll
   real(kind=CUSTOM_REAL),dimension(NDIM,NGLLSQUARE,num_free_surface_faces) :: noise_surface_movie
   integer(kind=8) :: Mesh_pointer
-  logical :: GPU_MODE  
+  logical :: GPU_MODE
 
   if(.NOT. GPU_MODE) then
-     ! loops over surface points   
+     ! loops over surface points
      ! get coordinates of surface mesh and surface displacement
      do iface = 1, num_free_surface_faces
 
@@ -658,7 +660,7 @@
   real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB_VAL) :: accel ! both input and output
   real(kind=CUSTOM_REAL), dimension(nmovie_points) :: normal_x_noise,normal_y_noise,normal_z_noise, mask_noise
 
-  integer :: num_free_surface_faces 
+  integer :: num_free_surface_faces
   integer, dimension(num_free_surface_faces) :: free_surface_ispec
   integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
   real(kind=CUSTOM_REAL) :: free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces)
@@ -667,10 +669,10 @@
   !integer :: nspec_top ! equals num_free_surface_faces
   !integer :: NSPEC2D_TOP_VAL ! equal num_free_surface_faces
   !integer, dimension(NSPEC2D_TOP_VAL) :: ibelm_top ! equals free_surface_ispec
-  !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_VAL) :: jacobian2D_top 
+  !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_VAL) :: jacobian2D_top
                     ! equals to:                   free_surface_jacobian2Dw including weights wgllwgll
   !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  
+
   ! local parameters
   integer :: ipoin,ispec,i,j,k,iglob,iface,igll
   real(kind=CUSTOM_REAL) :: eta
@@ -680,19 +682,19 @@
   integer(kind=8) :: Mesh_pointer
   logical :: GPU_MODE
   integer :: NOISE_TOMOGRAPHY
-  
+
   ! read surface movie
   call read_abs(2,noise_surface_movie,CUSTOM_REAL*NDIM*NGLLSQUARE*num_free_surface_faces,it)
 
   if(GPU_MODE) then
-     call noise_read_add_surface_movie_cuda(Mesh_pointer, noise_surface_movie,&
-          num_free_surface_faces,NOISE_TOMOGRAPHY)
+     call noise_read_add_surface_movie_cu(Mesh_pointer, noise_surface_movie,&
+                                          num_free_surface_faces,NOISE_TOMOGRAPHY)
   else ! GPU_MODE==0
 
      ! get coordinates of surface mesh and surface displacement
      ipoin = 0
 
-     ! loops over surface points      
+     ! loops over surface points
      ! puts noise distrubution and direction onto the surface points
      do iface = 1, num_free_surface_faces
 
@@ -711,7 +713,7 @@
                 noise_surface_movie(3,igll,iface) * normal_z_noise(ipoin)
 
            accel(1,iglob) = accel(1,iglob) + eta * mask_noise(ipoin) * normal_x_noise(ipoin) &
-                * free_surface_jacobian2Dw(igll,iface) 
+                * free_surface_jacobian2Dw(igll,iface)
            accel(2,iglob) = accel(2,iglob) + eta * mask_noise(ipoin) * normal_y_noise(ipoin) &
                 * free_surface_jacobian2Dw(igll,iface)
            accel(3,iglob) = accel(3,iglob) + eta * mask_noise(ipoin) * normal_z_noise(ipoin) &
@@ -742,13 +744,13 @@
   integer :: it
   integer :: nmovie_points
   integer :: NSPEC_AB_VAL,NGLOB_AB_VAL
-  
+
   integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB_VAL) :: ibool
   real(kind=CUSTOM_REAL) :: deltat
   real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB_VAL) :: displ
   real(kind=CUSTOM_REAL), dimension(nmovie_points) :: normal_x_noise,normal_y_noise,normal_z_noise
 
-  integer :: num_free_surface_faces 
+  integer :: num_free_surface_faces
   integer, dimension(num_free_surface_faces) :: free_surface_ispec
   integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
 
@@ -768,7 +770,7 @@
   ! GPU_MODE parameters
   integer(kind=8) :: Mesh_pointer
   logical :: GPU_MODE
-  
+
   ! read surface movie, needed for Sigma_kl
   call read_abs(2,noise_surface_movie,CUSTOM_REAL*NDIM*NGLLSQUARE*num_free_surface_faces,it)
 
@@ -806,7 +808,7 @@
      enddo
 
   else ! GPU_MODE==1
-     call compute_kernels_strength_noise_cuda(Mesh_pointer, noise_surface_movie,num_free_surface_faces,deltat)
+     call compute_kernels_strgth_noise_cu(Mesh_pointer, noise_surface_movie,num_free_surface_faces,deltat)
   endif ! GPU_MODE
 
   end subroutine compute_kernels_strength_noise
@@ -828,7 +830,7 @@
   character(len=256) :: prname
 
   call create_name_database(prname,myrank,LOCAL_PATH)
-  
+
   open(unit=IOUT_NOISE,file=trim(prname)//'sigma_kernel.bin',status='unknown', &
         form='unformatted',action='write')
   write(IOUT_NOISE) Sigma_kl

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -158,7 +158,7 @@
     seismograms_a(:,:,:) = 0._CUSTOM_REAL
   endif
 
-  ! synchronize all the processes 
+  ! synchronize all the processes
   call sync_all()
 
   ! prepares attenuation arrays
@@ -542,21 +542,21 @@
         ! size of single record
         b_reclen_field = CUSTOM_REAL * NDIM * NGLLSQUARE * num_abs_boundary_faces
 
-        ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer 
+        ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer
         if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NDIM * NGLLSQUARE) ) then
           print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_field
           print *,'  ',CUSTOM_REAL, NDIM, NGLLSQUARE, num_abs_boundary_faces
           print*,'bit size fortran: ',bit_size(b_reclen_field)
           call exit_MPI(myrank,"error b_reclen_field integer limit")
         endif
-        
+
         ! total file size
         filesize = b_reclen_field
         filesize = filesize*NSTEP
 
         if (SIMULATION_TYPE == 3) then
           ! opens existing files
-          
+
           ! uses fortran routines for reading
           !open(unit=IOABS,file=trim(prname)//'absorb_field.bin',status='old',&
           !      action='read',form='unformatted',access='direct', &
@@ -591,14 +591,14 @@
         ! size of single record
         b_reclen_potential = CUSTOM_REAL * NGLLSQUARE * num_abs_boundary_faces
 
-        ! check integer size limit: size of b_reclen_potential must fit onto an 4-byte integer 
+        ! check integer size limit: size of b_reclen_potential must fit onto an 4-byte integer
         if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NGLLSQUARE) ) then
           print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_potential
           print *,'  ',CUSTOM_REAL, NGLLSQUARE, num_abs_boundary_faces
           print*,'bit size fortran: ',bit_size(b_reclen_potential)
           call exit_MPI(myrank,"error b_reclen_potential integer limit")
         endif
-        
+
         ! total file size (two lines to implicitly convert to 8-byte integers)
         filesize = b_reclen_potential
         filesize = filesize*NSTEP
@@ -610,7 +610,7 @@
         !  print*,'file size fortran: ',filesize
         !  print*,'file bit size fortran: ',bit_size(filesize)
         !endif
-        
+
         if (SIMULATION_TYPE == 3) then
           ! opens existing files
           ! uses fortran routines for reading
@@ -618,7 +618,7 @@
           !      action='read',form='unformatted',access='direct', &
           !      recl=b_reclen_potential+2*4,iostat=ier )
           !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_potential.bin file')
-          
+
           ! uses c routines for faster reading
           call open_file_abs_r(1,trim(prname)//'absorb_potential.bin', &
                               len_trim(trim(prname)//'absorb_potential.bin'), &
@@ -638,9 +638,8 @@
 
         endif
       endif
-
     else
-      ! dummy array
+      ! needs dummy array
       b_num_abs_boundary_faces = 1
       if( ELASTIC_SIMULATION ) then
         allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
@@ -651,10 +650,22 @@
         allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
         if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
       endif
+    endif
+  else ! ABSORBING_CONDITIONS
+    ! needs dummy array
+    b_num_abs_boundary_faces = 1
+    if( ELASTIC_SIMULATION ) then
+      allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+      if( ier /= 0 ) stop 'error allocating array b_absorb_field'
+    endif
 
+    if( ACOUSTIC_SIMULATION ) then
+      allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+      if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
     endif
   endif
 
+
   end subroutine prepare_timerun_adjoint
 
 !
@@ -677,11 +688,11 @@
   ! for noise simulations
   if ( NOISE_TOMOGRAPHY /= 0 ) then
 
-    ! checks if free surface is defined  
+    ! checks if free surface is defined
     if( num_free_surface_faces == 0 ) then
       stop 'error: noise simulations need a free surface'
     endif
-    
+
     ! allocates arrays
     allocate(noise_sourcearray(NDIM,NGLLX,NGLLY,NGLLZ,NSTEP),stat=ier)
     if( ier /= 0 ) call exit_mpi(myrank,'error allocating noise source array')
@@ -738,15 +749,16 @@
 
   implicit none
   real :: free_mb,used_mb,total_mb
-  
+  integer :: ncuda_devices,ncuda_devices_min,ncuda_devices_max
+
   ! GPU_MODE now defined in Par_file
   if(myrank == 0 ) then
     write(IMAIN,*)
     write(IMAIN,*) "GPU_MODE Active. Preparing Fields and Constants on Device."
     write(IMAIN,*)
   endif
-  
-  ! prepares general fields on GPU  
+
+  ! prepares general fields on GPU
   call prepare_constants_device(Mesh_pointer, &
                                   NGLLX, NSPEC_AB, NGLOB_AB, &
                                   xix, xiy, xiz, etax,etay,etaz, gammax, gammay, gammaz, &
@@ -762,56 +774,85 @@
                                   abs_boundary_jacobian2Dw, &
                                   num_abs_boundary_faces, &
                                   ispec_is_inner, &
-                                  NSOURCES, sourcearrays, islice_selected_source, ispec_selected_source, &
-                                  number_receiver_global, ispec_selected_rec, nrec, nrec_local, &
-                                  SIMULATION_TYPE)
+                                  NSOURCES, nsources_local, &
+                                  sourcearrays, islice_selected_source, ispec_selected_source, &
+                                  number_receiver_global, ispec_selected_rec, &
+                                  nrec, nrec_local, &
+                                  SIMULATION_TYPE,ncuda_devices)
 
-  ! prepares fields on GPU for acoustic simulations 
-  if( ACOUSTIC_SIMULATION ) &
+  call min_all_i(ncuda_devices,ncuda_devices_min)
+  call max_all_i(ncuda_devices,ncuda_devices_max)
+
+  ! prepares fields on GPU for acoustic simulations
+  if( ACOUSTIC_SIMULATION ) then
     call prepare_fields_acoustic_device(Mesh_pointer,rmass_acoustic,rhostore,kappastore, &
                                   num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
                                   ispec_is_acoustic, &
-                                  NOISE_TOMOGRAPHY,num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
+                                  NOISE_TOMOGRAPHY,num_free_surface_faces, &
+                                  free_surface_ispec,free_surface_ijk, &
                                   ABSORBING_CONDITIONS,b_reclen_potential,b_absorb_potential, &
-                                  SIMULATION_TYPE,rho_ac_kl,kappa_ac_kl, &
                                   ELASTIC_SIMULATION, num_coupling_ac_el_faces, &
                                   coupling_ac_el_ispec,coupling_ac_el_ijk, &
                                   coupling_ac_el_normal,coupling_ac_el_jacobian2Dw)
-  
-  ! prepares fields on GPU for elastic simulations 
-  if( ELASTIC_SIMULATION ) &
+
+    if( SIMULATION_TYPE == 3 ) &
+      call prepare_fields_acoustic_adj_dev(Mesh_pointer, &
+                                  SIMULATION_TYPE,rho_ac_kl,kappa_ac_kl, &
+                                  APPROXIMATE_HESS_KL)
+
+  endif
+
+  ! prepares fields on GPU for elastic simulations
+  if( ELASTIC_SIMULATION ) then
     call prepare_fields_elastic_device(Mesh_pointer, NDIM*NGLOB_AB, &
                                   rmass,rho_vp,rho_vs, &
                                   num_phase_ispec_elastic,phase_ispec_inner_elastic, &
                                   ispec_is_elastic, &
                                   ABSORBING_CONDITIONS,b_absorb_field,b_reclen_field, &
-                                  SIMULATION_TYPE,rho_kl,mu_kl,kappa_kl, &
+                                  SIMULATION_TYPE,SAVE_FORWARD, &
                                   COMPUTE_AND_STORE_STRAIN, &
-                                  epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
+                                  epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                                  epsilondev_xz,epsilondev_yz, &
+                                  ATTENUATION, &
+                                  size(R_xx), &
+                                  R_xx,R_yy,R_xy,R_xz,R_yz, &
+                                  one_minus_sum_beta,factor_common, &
+                                  alphaval,betaval,gammaval, &
+                                  OCEANS,rmass_ocean_load, &
+                                  NOISE_TOMOGRAPHY, &
+                                  free_surface_normal,free_surface_ispec,free_surface_ijk, &
+                                  num_free_surface_faces)
+
+    if( SIMULATION_TYPE == 3 ) &
+      call prepare_fields_elastic_adj_dev(Mesh_pointer, NDIM*NGLOB_AB, &
+                                  SIMULATION_TYPE, &
+                                  rho_kl,mu_kl,kappa_kl, &
+                                  COMPUTE_AND_STORE_STRAIN, &
                                   epsilon_trace_over_3, &
-                                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
+                                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                                  b_epsilondev_xz,b_epsilondev_yz, &
                                   b_epsilon_trace_over_3, &
                                   ATTENUATION,size(R_xx), &
-                                  R_xx,R_yy,R_xy,R_xz,R_yz, &
                                   b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                                  one_minus_sum_beta,factor_common, &
-                                  alphaval,betaval,gammaval, &
                                   b_alphaval,b_betaval,b_gammaval, &
-                                  OCEANS,rmass_ocean_load, &
-                                  free_surface_normal,num_free_surface_faces)
+                                  APPROXIMATE_HESS_KL)
 
+  endif
+
   ! prepares needed receiver array for adjoint runs
   if( SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3 ) &
-    call prepare_adjoint_sim2_or_3_constants_device(Mesh_pointer, &
+    call prepare_sim2_or_3_const_device(Mesh_pointer, &
                                   islice_selected_rec,size(islice_selected_rec))
-  
-  ! prepares fields on GPU for noise simulations      
+
+  ! prepares fields on GPU for noise simulations
   if ( NOISE_TOMOGRAPHY > 0 ) then
     ! note: noise tomography is only supported for elastic domains so far.
-    
+
     ! copies noise  arrays to GPU
     call prepare_fields_noise_device(Mesh_pointer, NSPEC_AB, NGLOB_AB, &
-                                  free_surface_ispec,free_surface_ijk,num_free_surface_faces,size(free_surface_ijk), &
+                                  free_surface_ispec, &
+                                  free_surface_ijk, &
+                                  num_free_surface_faces, &
                                   SIMULATION_TYPE,NOISE_TOMOGRAPHY, &
                                   NSTEP,noise_sourcearray, &
                                   normal_x_noise,normal_y_noise,normal_z_noise, &
@@ -821,34 +862,39 @@
   endif ! NOISE_TOMOGRAPHY
 
   ! sends initial data to device
-  
+
   ! puts acoustic initial fields onto GPU
   if( ACOUSTIC_SIMULATION ) then
-    call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
-                          potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)    
+    call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+                          potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
     if( SIMULATION_TYPE == 3 ) &
-      call transfer_b_fields_acoustic_to_device(NGLOB_AB,b_potential_acoustic, &
-                          b_potential_dot_acoustic,b_potential_dot_dot_acoustic,Mesh_pointer)    
+      call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
+                          b_potential_dot_acoustic,b_potential_dot_dot_acoustic,Mesh_pointer)
   endif
 
   ! puts elastic initial fields onto GPU
   if( ELASTIC_SIMULATION ) then
     ! transfer forward and backward fields to device with initial values
-    call transfer_fields_to_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+    call transfer_fields_el_to_device(NDIM*NGLOB_AB,displ,veloc,accel,Mesh_pointer)
     if(SIMULATION_TYPE == 3) &
-      call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc, b_accel,Mesh_pointer)
+      call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
   endif
 
-  ! outputs usage 
+  ! outputs GPU usage to files for all processes
+  call output_free_device_memory(myrank)
+
+  ! outputs usage for main process
   if( myrank == 0 ) then
+    write(IMAIN,*)"  GPU number of devices per node: min =",ncuda_devices_min
+    write(IMAIN,*)"                                  max =",ncuda_devices_max
+    write(IMAIN,*)
+
     call get_free_device_memory(free_mb,used_mb,total_mb)
     write(IMAIN,*)"  GPU usage: free  =",free_mb," MB",nint(free_mb/total_mb*100.0),"%"
     write(IMAIN,*)"             used  =",used_mb," MB",nint(used_mb/total_mb*100.0),"%"
     write(IMAIN,*)"             total =",total_mb," MB",nint(total_mb/total_mb*100.0),"%"
     write(IMAIN,*)
-  endif    
+  endif
 
-  ! outputs GPU usage to files for all processes
-  call output_free_device_memory(myrank)
-  
+
   end subroutine prepare_timerun_GPU

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -34,7 +34,7 @@
   use specfem_par_poroelastic
   implicit none
   real(kind=CUSTOM_REAL):: minl,maxl,min_all,max_all
-  integer :: ier
+  integer :: ier,inum
 
 ! start reading the databasesa
 
@@ -163,14 +163,21 @@
             factor_common(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB),stat=ier)
     if( ier /= 0 ) stop 'error allocating array one_minus_sum_beta etc.'
 
+    ! reads mass matrices
     read(27) rmass
+
     if( OCEANS ) then
       ! ocean mass matrix
       allocate(rmass_ocean_load(NGLOB_AB),stat=ier)
       if( ier /= 0 ) stop 'error allocating array rmass_ocean_load'
       read(27) rmass_ocean_load
+    else
+      ! dummy allocation
+      allocate(rmass_ocean_load(1),stat=ier)
+      if( ier /= 0 ) stop 'error allocating dummy array rmass_ocean_load'
     endif
-    !pll
+
+    !pll material parameters for stacey conditions
     read(27) rho_vp
     read(27) rho_vs
 
@@ -209,11 +216,13 @@
           abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces), &
           abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces), &
           abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array abs_boundary_ispec etc.'
-  read(27) abs_boundary_ispec
-  read(27) abs_boundary_ijk
-  read(27) abs_boundary_jacobian2Dw
-  read(27) abs_boundary_normal
+  if( ier /= 0 ) stop 'error allocating array abs_boundary_ispec etc.'
+  if( num_abs_boundary_faces > 0 ) then
+    read(27) abs_boundary_ispec
+    read(27) abs_boundary_ijk
+    read(27) abs_boundary_jacobian2Dw
+    read(27) abs_boundary_normal
+  endif
 
 ! free surface
   read(27) num_free_surface_faces
@@ -221,34 +230,43 @@
           free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces), &
           free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces), &
           free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array free_surface_ispec etc.'
-  read(27) free_surface_ispec
-  read(27) free_surface_ijk
-  read(27) free_surface_jacobian2Dw
-  read(27) free_surface_normal
-
+  if( ier /= 0 ) stop 'error allocating array free_surface_ispec etc.'
+  if( num_free_surface_faces > 0 ) then
+    read(27) free_surface_ispec
+    read(27) free_surface_ijk
+    read(27) free_surface_jacobian2Dw
+    read(27) free_surface_normal
+  endif
 ! acoustic-elastic coupling surface
   read(27) num_coupling_ac_el_faces
   allocate(coupling_ac_el_normal(NDIM,NGLLSQUARE,num_coupling_ac_el_faces), &
           coupling_ac_el_jacobian2Dw(NGLLSQUARE,num_coupling_ac_el_faces), &
           coupling_ac_el_ijk(3,NGLLSQUARE,num_coupling_ac_el_faces), &
           coupling_ac_el_ispec(num_coupling_ac_el_faces),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array coupling_ac_el_normal etc.'
-  read(27) coupling_ac_el_ispec
-  read(27) coupling_ac_el_ijk
-  read(27) coupling_ac_el_jacobian2Dw
-  read(27) coupling_ac_el_normal
+  if( ier /= 0 ) stop 'error allocating array coupling_ac_el_normal etc.'
+  if( num_coupling_ac_el_faces > 0 ) then
+    read(27) coupling_ac_el_ispec
+    read(27) coupling_ac_el_ijk
+    read(27) coupling_ac_el_jacobian2Dw
+    read(27) coupling_ac_el_normal
+  endif
 
 ! MPI interfaces
   read(27) num_interfaces_ext_mesh
-  read(27) max_nibool_interfaces_ext_mesh
   allocate(my_neighbours_ext_mesh(num_interfaces_ext_mesh), &
-    nibool_interfaces_ext_mesh(num_interfaces_ext_mesh), &
-    ibool_interfaces_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+          nibool_interfaces_ext_mesh(num_interfaces_ext_mesh),stat=ier)
   if( ier /= 0 ) stop 'error allocating array my_neighbours_ext_mesh etc.'
-  read(27) my_neighbours_ext_mesh
-  read(27) nibool_interfaces_ext_mesh
-  read(27) ibool_interfaces_ext_mesh
+  if( num_interfaces_ext_mesh > 0 ) then
+    read(27) max_nibool_interfaces_ext_mesh
+    allocate(ibool_interfaces_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array ibool_interfaces_ext_mesh'
+    read(27) my_neighbours_ext_mesh
+    read(27) nibool_interfaces_ext_mesh
+    read(27) ibool_interfaces_ext_mesh
+  else
+    max_nibool_interfaces_ext_mesh = 0
+    allocate(ibool_interfaces_ext_mesh(0,0),stat=ier)
+  endif
 
   if( ANISOTROPY ) then
     read(27) c11store
@@ -276,6 +294,21 @@
 
   close(27)
 
+  call sum_all_i(count(ispec_is_acoustic(:)),inum)
+  if( myrank == 0 ) then
+    write(IMAIN,*) 'total acoustic elements:',inum
+  endif
+  call sum_all_i(count(ispec_is_elastic(:)),inum)
+  if( myrank == 0 ) then
+    write(IMAIN,*) 'total elastic elements :',inum
+  endif
+  call sum_all_i(num_interfaces_ext_mesh,inum)
+  if(myrank == 0) then
+    write(IMAIN,*) 'number of MPI partition interfaces: ',inum
+    write(IMAIN,*)
+  endif
+
+
 ! MPI communications
   allocate(buffer_send_vector_ext_mesh(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh), &
     buffer_recv_vector_ext_mesh(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh), &
@@ -290,6 +323,9 @@
 ! locate inner and outer elements
   call rmd_setup_inner_outer_elemnts()
 
+!daniel: todo mesh coloring
+!  call rmd_setup_color_perm()
+
 ! gets model dimensions
   minl = minval( xstore )
   maxl = maxval( xstore )
@@ -307,8 +343,10 @@
 
 ! check courant criteria on mesh
   if( ELASTIC_SIMULATION ) then
-    call check_mesh_resolution(myrank,NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore, &
-                        kappastore,mustore,rho_vp,rho_vs,DT,model_speed_max,min_resolved_period )
+    call check_mesh_resolution(myrank,NSPEC_AB,NGLOB_AB, &
+                              ibool,xstore,ystore,zstore, &
+                              kappastore,mustore,rho_vp,rho_vs, &
+                              DT,model_speed_max,min_resolved_period )
   else if( ACOUSTIC_SIMULATION ) then
       allocate(rho_vp(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
       if( ier /= 0 ) stop 'error allocating array rho_vp'
@@ -316,8 +354,10 @@
       if( ier /= 0 ) stop 'error allocating array rho_vs'
       rho_vp = sqrt( kappastore / rhostore ) * rhostore
       rho_vs = 0.0_CUSTOM_REAL
-      call check_mesh_resolution(myrank,NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore, &
-                        kappastore,mustore,rho_vp,rho_vs,DT,model_speed_max,min_resolved_period )
+      call check_mesh_resolution(myrank,NSPEC_AB,NGLOB_AB, &
+                                ibool,xstore,ystore,zstore, &
+                                kappastore,mustore,rho_vp,rho_vs, &
+                                DT,model_speed_max,min_resolved_period )
       deallocate(rho_vp,rho_vs)
   endif
 
@@ -340,6 +380,7 @@
   integer :: iinterface,ier
   character(len=256) :: filename
   logical,dimension(:),allocatable :: iglob_is_inner
+  real :: percentage_edge
 
   ! allocates arrays
   allocate(ispec_is_inner(NSPEC_AB),stat=ier)
@@ -363,7 +404,7 @@
       do j = 1, NGLLY
         do i = 1, NGLLX
           iglob = ibool(i,j,k,ispec)
-          ispec_is_inner(ispec) = iglob_is_inner(iglob) .and. ispec_is_inner(ispec)
+          ispec_is_inner(ispec) = ( iglob_is_inner(iglob) .and. ispec_is_inner(ispec) )
         enddo
       enddo
     enddo
@@ -397,6 +438,8 @@
     ! stores indices of inner and outer elements for faster(?) computation
     num_phase_ispec_acoustic = max(nspec_inner_acoustic,nspec_outer_acoustic)
     allocate( phase_ispec_inner_acoustic(num_phase_ispec_acoustic,2),stat=ier)
+    phase_ispec_inner_acoustic(:,:) = 0
+
     if( ier /= 0 ) stop 'error allocating array phase_ispec_inner_acoustic'
     nspec_inner_acoustic = 0
     nspec_outer_acoustic = 0
@@ -432,8 +475,12 @@
 
     ! stores indices of inner and outer elements for faster(?) computation
     num_phase_ispec_elastic = max(nspec_inner_elastic,nspec_outer_elastic)
+    if( num_phase_ispec_elastic <= 0 ) stop 'error elastic simulation: num_phase_ispec_elastic is zero'
+
     allocate( phase_ispec_inner_elastic(num_phase_ispec_elastic,2),stat=ier)
     if( ier /= 0 ) stop 'error allocating array phase_ispec_inner_elastic'
+    phase_ispec_inner_elastic(:,:) = 0
+
     nspec_inner_elastic = 0
     nspec_outer_elastic = 0
     do ispec = 1, NSPEC_AB
@@ -451,9 +498,301 @@
     !print *,'rank ',myrank,' elastic outer spec: ',nspec_outer_elastic
   endif
 
+  if(myrank == 0) then
+    percentage_edge = 100.*count(ispec_is_inner(:))/real(NSPEC_AB)
+    write(IMAIN,*) 'for overlapping of communications with calculations:'
+    write(IMAIN,*) '  percentage of   edge elements ',100. -percentage_edge,'%'
+    write(IMAIN,*) '  percentage of volume elements ',percentage_edge,'%'
+    write(IMAIN,*)
+  endif
+
+
   end subroutine rmd_setup_inner_outer_elemnts
 
+!
+!-------------------------------------------------------------------------------------------------
+!
 
+!daniel: todo mesh coloring
+  subroutine rmd_setup_color_perm()
+
+  use specfem_par
+  use specfem_par_elastic
+  use specfem_par_acoustic
+  implicit none
+  ! local parameters
+  ! added for color permutation
+  integer :: nb_colors_outer_elements,nb_colors_inner_elements
+  integer, dimension(:), allocatable :: perm
+  integer, dimension(:), allocatable :: first_elem_number_in_this_color
+  integer, dimension(:), allocatable :: num_of_elems_in_this_color
+
+  integer :: icolor,ispec,ispec_counter
+  integer :: nspec_outer,nspec_outer_min_global,nspec_outer_max_global
+
+  integer :: ispec_inner_acoustic,ispec_outer_acoustic
+  integer :: ispec_inner_elastic,ispec_outer_elastic
+  integer :: nspec,nglob
+
+  ! added for sorting
+  integer, dimension(:,:,:,:), allocatable :: temp_array_int
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: temp_array_real
+  logical, dimension(:), allocatable :: temp_array_logical_1D
+  !integer, dimension(:), allocatable :: temp_array_int_1D
+
+  nspec = NSPEC_AB
+  nglob = NGLOB_AB
+
+  !!!! David Michea: detection of the edges, coloring and permutation separately
+  allocate(perm(nspec))
+
+  ! implement mesh coloring for GPUs if needed, to create subsets of disconnected elements
+  ! to remove dependencies and the need for atomic operations in the sum of
+  ! elemental contributions in the solver
+  if(USE_MESH_COLORING_GPU) then
+
+    allocate(first_elem_number_in_this_color(MAX_NUMBER_OF_COLORS + 1))
+
+    call get_perm_color_faster(ispec_is_inner,ibool,perm,nspec,nglob, &
+                              nb_colors_outer_elements,nb_colors_inner_elements, &
+                              nspec_outer,first_elem_number_in_this_color,myrank)
+
+    ! for the last color, the next color is fictitious and its first (fictitious) element number is nspec + 1
+    first_elem_number_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements + 1) = nspec + 1
+
+    allocate(num_of_elems_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements))
+
+    ! save mesh coloring
+    open(unit=99,file=prname(1:len_trim(prname))//'num_of_elems_in_this_color.dat',status='unknown')
+
+    ! number of colors for outer elements
+    write(99,*) nb_colors_outer_elements
+
+    ! number of colors for inner elements
+    write(99,*) nb_colors_inner_elements
+
+    ! number of elements in each color
+    do icolor = 1, nb_colors_outer_elements + nb_colors_inner_elements
+      num_of_elems_in_this_color(icolor) = first_elem_number_in_this_color(icolor+1) - first_elem_number_in_this_color(icolor)
+      write(99,*) num_of_elems_in_this_color(icolor)
+    enddo
+    close(99)
+
+    ! check that the sum of all the numbers of elements found in each color is equal
+    ! to the total number of elements in the mesh
+    if(sum(num_of_elems_in_this_color) /= nspec) then
+      print *,'nspec = ',nspec
+      print *,'total number of elements in all the colors of the mesh = ',sum(num_of_elems_in_this_color)
+      call exit_MPI(myrank, 'incorrect total number of elements in all the colors of the mesh')
+    endif
+
+    ! check that the sum of all the numbers of elements found in each color for the outer elements is equal
+    ! to the total number of outer elements found in the mesh
+    if(sum(num_of_elems_in_this_color(1:nb_colors_outer_elements)) /= nspec_outer) then
+      print *,'nspec_outer = ',nspec_outer
+      print *,'total number of elements in all the colors of the mesh for outer elements = ',sum(num_of_elems_in_this_color)
+      call exit_MPI(myrank, 'incorrect total number of elements in all the colors of the mesh for outer elements')
+    endif
+
+    deallocate(first_elem_number_in_this_color)
+    deallocate(num_of_elems_in_this_color)
+
+  else
+
+!! DK DK for regular C + MPI version for CPUs: do not use colors but nonetheless put all the outer elements
+!! DK DK first in order to be able to overlap non-blocking MPI communications with calculations
+
+!! DK DK nov 2010, for Rosa Badia / StarSs:
+!! no need for mesh coloring, but need to implement inner/outer subsets for non blocking MPI for StarSs
+    ispec_counter = 0
+    perm(:) = 0
+
+    ! first generate all the outer elements
+    do ispec = 1,nspec
+      if( ispec_is_inner(ispec) .eqv. .false.) then
+        ispec_counter = ispec_counter + 1
+        perm(ispec) = ispec_counter
+      endif
+    enddo
+
+    ! make sure we have detected some outer elements
+    !if(ispec_counter <= 0) call exit_MPI(myrank, 'fatal error: no outer elements detected!')
+
+    ! store total number of outer elements
+    nspec_outer = ispec_counter
+
+    ! then generate all the inner elements
+    do ispec = 1,nspec
+      if( ispec_is_inner(ispec) .eqv. .true.) then
+        ispec_counter = ispec_counter + 1
+        perm(ispec) = ispec_counter - nspec_outer ! starts again at 1
+      endif
+    enddo
+
+    ! test that all the elements have been used once and only once
+    if(ispec_counter /= nspec) call exit_MPI(myrank, 'fatal error: ispec_counter not equal to nspec')
+
+    ! do basic checks
+    if(minval(perm) /= 1) call exit_MPI(myrank, 'minval(perm) should be 1')
+    !if(maxval(perm) /= nspec) call exit_MPI(myrank, 'maxval(perm) should be nspec')
+
+
+  endif
+
+  ! sets up elements for loops in acoustic simulations
+  if( ACOUSTIC_SIMULATION ) then
+    ispec_inner_acoustic = 0
+    ispec_outer_acoustic = 0
+    do ispec = 1, NSPEC_AB
+      if( ispec_is_acoustic(ispec) ) then
+        if( ispec_is_inner(ispec) .eqv. .true. ) then
+          !ispec_inner_acoustic = ispec_inner_acoustic + 1
+          ispec_inner_acoustic = perm(ispec)
+          if( ispec_inner_acoustic < 1 .or. ispec_inner_acoustic > num_phase_ispec_acoustic ) &
+            call exit_MPI(myrank,'error inner acoustic permutation')
+
+          phase_ispec_inner_acoustic(ispec_inner_acoustic,2) = ispec
+
+        else
+          !ispec_outer_acoustic = ispec_outer_acoustic + 1
+          ispec_outer_acoustic = perm(ispec)
+          if( ispec_outer_acoustic < 1 .or. ispec_outer_acoustic > num_phase_ispec_acoustic ) &
+            call exit_MPI(myrank,'error outer acoustic permutation')
+
+          phase_ispec_inner_acoustic(ispec_outer_acoustic,1) = ispec
+
+        endif
+      endif
+    enddo
+  endif
+
+  ! sets up elements for loops in elastic simulations
+  if( ELASTIC_SIMULATION ) then
+    ispec_inner_elastic = 0
+    ispec_outer_elastic = 0
+    do ispec = 1, NSPEC_AB
+      if( ispec_is_elastic(ispec) ) then
+        if( ispec_is_inner(ispec) .eqv. .true. ) then
+          !ispec_inner_elastic = ispec_inner_elastic + 1
+          ispec_inner_elastic = perm(ispec)
+          if( ispec_inner_elastic < 1 .or. ispec_inner_elastic > num_phase_ispec_elastic ) then
+            print*,'error: inner elastic permutation',ispec_inner_elastic,num_phase_ispec_elastic,nspec_outer
+            call exit_MPI(myrank,'error inner elastic permutation')
+          endif
+          phase_ispec_inner_elastic(ispec_inner_elastic,2) = ispec
+
+        else
+          !ispec_outer_elastic = ispec_outer_elastic + 1
+          ispec_outer_elastic = perm(ispec)
+          if( ispec_outer_elastic < 1 .or. ispec_outer_elastic > num_phase_ispec_elastic ) then
+            print*,'error: outer elastic permutation',ispec_outer_elastic,num_phase_ispec_elastic,nspec_outer
+            call exit_MPI(myrank,'error outer elastic permutation')
+          endif
+          phase_ispec_inner_elastic(ispec_outer_elastic,1) = ispec
+
+        endif
+      endif
+    enddo
+  endif
+
+  ! sorts array according to permutation
+  ! SORT_MESH_INNER_OUTER
+!daniel
+  if( .false. ) then
+
+    ! permutation of ibool
+    allocate(temp_array_int(NGLLX,NGLLY,NGLLZ,nspec))
+    call permute_elements_integer(ibool,temp_array_int,perm,nspec)
+    deallocate(temp_array_int)
+
+    ! element domain flags
+    allocate(temp_array_logical_1D(nglob))
+    temp_array_logical_1D(:) = ispec_is_acoustic
+    do ispec = 1, nspec
+      ispec_is_acoustic(perm(ispec)) = temp_array_logical_1D(ispec)
+    enddo
+    temp_array_logical_1D(:) = ispec_is_elastic
+    do ispec = 1, nspec
+      ispec_is_elastic(perm(ispec)) = temp_array_logical_1D(ispec)
+    enddo
+    !temp_array_logical_1D(:) = ispec_is_poroelastic
+    !do ispec = 1, nspec
+    !  ispec_is_poroelastic(perm(ispec)) = temp_array_logical_1D(ispec)
+    !enddo
+    deallocate(temp_array_logical_1D)
+
+    ! mesh arrays
+    allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec))
+    call permute_elements_real(xix,temp_array_real,perm,nspec)
+    call permute_elements_real(xiy,temp_array_real,perm,nspec)
+    call permute_elements_real(xiz,temp_array_real,perm,nspec)
+    call permute_elements_real(etax,temp_array_real,perm,nspec)
+    call permute_elements_real(etay,temp_array_real,perm,nspec)
+    call permute_elements_real(etaz,temp_array_real,perm,nspec)
+    call permute_elements_real(gammax,temp_array_real,perm,nspec)
+    call permute_elements_real(gammay,temp_array_real,perm,nspec)
+    call permute_elements_real(gammaz,temp_array_real,perm,nspec)
+    call permute_elements_real(jacobian,temp_array_real,perm,nspec)
+
+    call permute_elements_real(kappastore,temp_array_real,perm,nspec)
+    call permute_elements_real(mustore,temp_array_real,perm,nspec)
+
+    ! acoustic arrays
+    if( ACOUSTIC_SIMULATION ) then
+      call permute_elements_real(rhostore,temp_array_real,perm,nspec)
+    endif
+
+    ! elastic arrays
+    if( ELASTIC_SIMULATION ) then
+      call permute_elements_real(rho_vp,temp_array_real,perm,nspec)
+      call permute_elements_real(rho_vs,temp_array_real,perm,nspec)
+      if( ANISOTROPY ) then
+        call permute_elements_real(c11store,temp_array_real,perm,nspec)
+        call permute_elements_real(c12store,temp_array_real,perm,nspec)
+        call permute_elements_real(c13store,temp_array_real,perm,nspec)
+        call permute_elements_real(c14store,temp_array_real,perm,nspec)
+        call permute_elements_real(c15store,temp_array_real,perm,nspec)
+        call permute_elements_real(c16store,temp_array_real,perm,nspec)
+        call permute_elements_real(c22store,temp_array_real,perm,nspec)
+        call permute_elements_real(c23store,temp_array_real,perm,nspec)
+        call permute_elements_real(c24store,temp_array_real,perm,nspec)
+        call permute_elements_real(c25store,temp_array_real,perm,nspec)
+        call permute_elements_real(c33store,temp_array_real,perm,nspec)
+        call permute_elements_real(c34store,temp_array_real,perm,nspec)
+        call permute_elements_real(c35store,temp_array_real,perm,nspec)
+        call permute_elements_real(c36store,temp_array_real,perm,nspec)
+        call permute_elements_real(c44store,temp_array_real,perm,nspec)
+        call permute_elements_real(c45store,temp_array_real,perm,nspec)
+        call permute_elements_real(c46store,temp_array_real,perm,nspec)
+        call permute_elements_real(c55store,temp_array_real,perm,nspec)
+        call permute_elements_real(c56store,temp_array_real,perm,nspec)
+        call permute_elements_real(c66store,temp_array_real,perm,nspec)
+      endif
+    endif
+
+    deallocate(temp_array_real)
+  endif
+
+  ! user output
+  !call MPI_ALLREDUCE(nspec_outer,nspec_outer_min_global,1,MPI_INTEGER,MPI_MIN,MPI_COMM_WORLD,ier)
+  !call MPI_ALLREDUCE(nspec_outer,nspec_outer_max_global,1,MPI_INTEGER,MPI_MAX,MPI_COMM_WORLD,ier)
+  call min_all_i(nspec_outer,nspec_outer_min_global)
+  call max_all_i(nspec_outer,nspec_outer_max_global)
+  call min_all_i(nspec_outer,nspec_outer_min_global)
+  call max_all_i(nspec_outer,nspec_outer_max_global)
+
+  if(myrank == 0) then
+    write(IMAIN,*) 'mesh coloring:'
+    write(IMAIN,*) '  permutation   : use coloring = ',USE_MESH_COLORING_GPU
+    write(IMAIN,*) '  outer elements: min = ',nspec_outer_min_global
+    write(IMAIN,*) '                  max = ',nspec_outer_max_global
+    write(IMAIN,*)
+  endif
+
+  deallocate(perm)
+
+  end subroutine rmd_setup_color_perm
+
 !
 !-------------------------------------------------------------------------------------------------
 !
@@ -513,7 +852,11 @@
     ! preconditioner
     if ( APPROXIMATE_HESS_KL ) then
       allocate(hess_kl(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT),stat=ier)
-      if( ier /= 0 ) stop 'error allocating array hess_kl'    
+      if( ier /= 0 ) stop 'error allocating array hess_kl'
+    else
+      ! dummy allocation
+      allocate(hess_kl(0,0,0,0),stat=ier)
+      if( ier /= 0 ) stop 'error allocating dummy array hess_kl'
     endif
 
     ! MPI handling
@@ -569,13 +912,17 @@
             kappa_ac_kl(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT), &
             alpha_ac_kl(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT),stat=ier)
     if( ier /= 0 ) stop 'error allocating array rho_ac_kl etc.'
-    
+
     ! preconditioner
     if ( APPROXIMATE_HESS_KL ) then
       allocate(hess_ac_kl(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT),stat=ier)
-      if( ier /= 0 ) stop 'error allocating array hess_ac_kl'    
+      if( ier /= 0 ) stop 'error allocating array hess_ac_kl'
+    else
+      ! dummy allocation
+      allocate(hess_ac_kl(0,0,0,0),stat=ier)
+      if( ier /= 0 ) stop 'error allocating dummy array hess_ac_kl'
     endif
-    
+
     ! MPI handling
     allocate(b_request_send_scalar_ext_mesh(num_interfaces_ext_mesh), &
       b_request_recv_scalar_ext_mesh(num_interfaces_ext_mesh), &

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/save_adjoint_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/save_adjoint_kernels.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/save_adjoint_kernels.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -196,9 +196,9 @@
   if ( APPROXIMATE_HESS_KL ) then
     call save_kernels_hessian()
   endif
-  
+
   end subroutine save_adjoint_kernels
-  
+
 !
 !-------------------------------------------------------------------------------------------------
 !
@@ -211,7 +211,7 @@
 
   implicit none
   integer :: ier
-  
+
   ! acoustic domains
   if( ACOUSTIC_SIMULATION ) then
     ! scales approximate hessian
@@ -223,7 +223,7 @@
     if( ier /= 0 ) stop 'error opening file hess_acoustic_kernel.bin'
     write(27) hess_ac_kl
     close(27)
-  endif  
+  endif
 
   ! elastic domains
   if( ELASTIC_SIMULATION ) then
@@ -236,7 +236,7 @@
     if( ier /= 0 ) stop 'error opening file hess_kernel.bin'
     write(27) hess_kl
     close(27)
-  endif  
-  
+  endif
+
   end subroutine save_kernels_hessian
-  
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_GLL_points.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_GLL_points.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_GLL_points.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -41,9 +41,9 @@
 
 ! set up GLL points, weights and derivation matrices for reference element (between -1,1)
   call define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
-         hprime_xx,hprime_yy,hprime_zz, &
-         hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
-         wgllwgll_xy,wgllwgll_xz,wgllwgll_yz)
+                                hprime_xx,hprime_yy,hprime_zz, &
+                                hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+                                wgllwgll_xy,wgllwgll_xz,wgllwgll_yz)
 
 ! define transpose of derivation matrix
   do j = 1,NGLLY

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -206,6 +206,12 @@
     call exit_mpi(myrank,'error negative USER_T0 parameter in constants.h')
   endif
 
+  ! count number of sources located in this slice
+  nsources_local = 0
+  do isource = 1, NSOURCES
+    if(myrank == islice_selected_source(isource)) nsources_local = nsources_local + 1
+  enddo
+
   ! checks if source is in an acoustic element and exactly on the free surface because pressure is zero there
   call setup_sources_check_acoustic()
 
@@ -514,7 +520,7 @@
   real(kind=CUSTOM_REAL) :: junk
   integer :: isource,ispec
   integer :: irec !,irec_local
-  integer :: i,j,k,iglob  
+  integer :: i,j,k,iglob
   integer :: icomp,itime,nadj_files_found,nadj_files_found_tot,ier
   character(len=3),dimension(NDIM) :: comp ! = (/ "BHE", "BHN", "BHZ" /)
   character(len=256) :: filename
@@ -569,9 +575,9 @@
           iglob = ibool(nint(xi_source(isource)), &
                         nint(eta_source(isource)), &
                         nint(gamma_source(isource)), &
-                        ispec)            
+                        ispec)
           ! sets sourcearrays
-          sourcearray(:,:,:,:) = 0.0          
+          sourcearray(:,:,:,:) = 0.0
           do k=1,NGLLZ
             do j=1,NGLLY
               do i=1,NGLLX
@@ -589,13 +595,18 @@
               enddo
             enddo
           enddo
-        endif                
+        endif
 
         ! stores source excitations
         sourcearrays(isource,:,:,:,:) = sourcearray(:,:,:,:)
 
       endif
     enddo
+  else
+    ! SIMULATION_TYPE == 2
+    ! allocate dummy array (needed for subroutine calls)
+    allocate(sourcearrays(0,0,0,0,0),stat=ier)
+    if( ier /= 0 ) stop 'error allocating dummy sourcearrays'
   endif
 
   ! ADJOINT simulations
@@ -693,17 +704,19 @@
 
   integer :: irec,irec_local,isource,ier
 
-! stores local receivers interpolation factors
+  ! needs to be allocate for subroutine calls (even if nrec_local == 0)
+  allocate(number_receiver_global(nrec_local),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array number_receiver_global'
+
+  ! stores local receivers interpolation factors
   if (nrec_local > 0) then
-  ! allocate Lagrange interpolators for receivers
+    ! allocate Lagrange interpolators for receivers
     allocate(hxir_store(nrec_local,NGLLX), &
             hetar_store(nrec_local,NGLLY), &
             hgammar_store(nrec_local,NGLLZ),stat=ier)
     if( ier /= 0 ) stop 'error allocating array hxir_store etc.'
 
-  ! define local to global receiver numbering mapping
-    allocate(number_receiver_global(nrec_local),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array number_reciever_global'
+    ! define local to global receiver numbering mapping
     irec_local = 0
     if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
       do irec = 1,nrec
@@ -776,7 +789,11 @@
   double precision :: xmesh,ymesh,zmesh
   real(kind=CUSTOM_REAL),dimension(NGNOD) :: xelm,yelm,zelm
   integer :: ia,ispec,isource,irec,ier,totalpoints
-  INTEGER(kind=4) :: system_command_status
+
+  !INTEGER(kind=4) :: system_command_status
+  !integer :: ret
+  !integer,external :: system
+
   character(len=256) :: filename,filename_new,system_command,system_command1,system_command2
 
   ! determines number of points for vtk file
@@ -800,7 +817,7 @@
   endif
 
   ! sources
-  if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then  
+  if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
     do isource=1,NSOURCES
       ! spectral element id
       ispec = ispec_selected_source(isource)
@@ -855,7 +872,7 @@
       endif
     enddo ! NSOURCES
   endif
-  
+
   ! receivers
   do irec=1,nrec
     ispec = ispec_selected_rec(irec)
@@ -906,38 +923,49 @@
     close(IOVTK)
 
     ! creates additional receiver and source files
-    if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then    
+    if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
     ! extracts receiver locations
     filename = trim(OUTPUT_FILES)//'/sr.vtk'
     filename_new = trim(OUTPUT_FILES)//'/receiver.vtk'
+
+    ! vtk file for receivers only
     write(system_command, &
   "('awk ',a1,'{if(NR<5) print $0;if(NR==5)print ',a1,'POINTS',i6,' float',a1,';if(NR>5+',i6,')print $0}',a1,' < ',a,' > ',a)")&
       "'",'"',nrec,'"',NSOURCES,"'",trim(filename),trim(filename_new)
-      call system(system_command,system_command_status)
 
+!daniel:
+! gfortran
+!      call system(trim(system_command),system_command_status)
+! ifort
+!      ret = system(trim(system_command))
+
       ! extracts source locations
       !"('awk ',a1,'{if(NR< 6 + ',i6,') print $0}END{print}',a1,' < ',a,' > ',a)")&
       filename_new = trim(OUTPUT_FILES)//'/source.vtk'
-    
+
       write(system_command1, &
   "('awk ',a1,'{if(NR<5) print $0;if(NR==5)print ',a1,'POINTS',i6,' float',a1,';')") &
         "'",'"',NSOURCES,'"'
 
       !daniel
-      !print*,'command 1:',trim(system_command1)  
+      !print*,'command 1:',trim(system_command1)
 
       write(system_command2, &
   "('if(NR>5 && NR <6+',i6,')print $0}END{print ',a,'}',a1,' < ',a,' > ',a)") &
         NSOURCES,'" "',"'",trim(filename),trim(filename_new)
-    
-      !print*,'command 2:',trim(system_command2)  
-          
+
+      !print*,'command 2:',trim(system_command2)
+
       system_command = trim(system_command1)//trim(system_command2)
 
-      !print*,'command:',trim(system_command)  
+      !print*,'command:',trim(system_command)
+!daniel:
+! gfortran
+!      call system(trim(system_command),system_command_status)
+! ifort
+!      ret = system(trim(system_command))
 
-      call system(trim(system_command),system_command_status)
-    endif  
+    endif
   endif
 
 end subroutine setup_sources_receivers_VTKfile

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,339 +1,339 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-!
-! United States and French Government Sponsorship Acknowledged.
-
-  subroutine specfem3D()
-
-  use specfem_par
-
-
-!=============================================================================!
-!                                                                             !
-!  specfem3D is a 3-D spectral-element solver for a local or regional model.  !
-!  It uses a mesh generated by program generate_databases                     !
-!                                                                             !
-!=============================================================================!
-!
-! If you use this code for your own research, please cite at least one article
-! written by the developers of the package, for instance:
-!
-! @ARTICLE{TrKoLi08,
-! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
-! title = {Spectral-Element and Adjoint Methods in Seismology},
-! journal = {Communications in Computational Physics},
-! year = {2008},
-! volume = {3},
-! pages = {1-32},
-! number = {1}}
-!
-! or
-!
-! @ARTICLE{VaCaSaKoVi99,
-! author = {R. Vai and J. M. Castillo-Covarrubias and F. J. S\'anchez-Sesma and
-! D. Komatitsch and J. P. Vilotte},
-! title = {Elastic wave propagation in an irregularly layered medium},
-! journal = {Soil Dynamics and Earthquake Engineering},
-! year = {1999},
-! volume = {18},
-! pages = {11-18},
-! number = {1},
-! doi = {10.1016/S0267-7261(98)00027-X}}
-!
-! @ARTICLE{LeChKoHuTr09,
-! author = {Shiann Jong Lee and Yu Chang Chan and Dimitri Komatitsch and Bor
-! Shouh Huang and Jeroen Tromp},
-! title = {Effects of realistic surface topography on seismic ground motion
-! in the {Y}angminshan region of {T}aiwan based upon the spectral-element
-! method and {LiDAR DTM}},
-! journal = {Bull. Seismol. Soc. Am.},
-! year = {2009},
-! volume = {99},
-! pages = {681-693},
-! number = {2A},
-! doi = {10.1785/0120080264}}
-!
-! @ARTICLE{LeChLiKoHuTr08,
-! author = {Shiann Jong Lee and How Wei Chen and Qinya Liu and Dimitri Komatitsch
-! and Bor Shouh Huang and Jeroen Tromp},
-! title = {Three-Dimensional Simulations of Seismic Wave Propagation in the
-! {T}aipei Basin with Realistic Topography Based upon the Spectral-Element Method},
-! journal = {Bull. Seismol. Soc. Am.},
-! year = {2008},
-! volume = {98},
-! pages = {253-264},
-! number = {1},
-! doi = {10.1785/0120070033}}
-!
-! @ARTICLE{LeKoHuTr09,
-! author = {S. J. Lee and Dimitri Komatitsch and B. S. Huang and J. Tromp},
-! title = {Effects of topography on seismic wave propagation: An example from
-! northern {T}aiwan},
-! journal = {Bull. Seismol. Soc. Am.},
-! year = {2009},
-! volume = {99},
-! pages = {314-325},
-! number = {1},
-! doi = {10.1785/0120080020}}
-!
-! @ARTICLE{KoErGoMi10,
-! author = {Dimitri Komatitsch and Gordon Erlebacher and Dominik G\"oddeke and
-! David Mich\'ea},
-! title = {High-order finite-element seismic wave propagation modeling with
-! {MPI} on a large {GPU} cluster},
-! journal = {J. Comput. Phys.},
-! year = {2010},
-! volume = {229},
-! pages = {7692-7714},
-! number = {20},
-! doi = {10.1016/j.jcp.2010.06.024}}
-!
-! @ARTICLE{KoGoErMi10,
-! author = {Dimitri Komatitsch and Dominik G\"oddeke and Gordon Erlebacher and
-! David Mich\'ea},
-! title = {Modeling the propagation of elastic waves using spectral elements
-! on a cluster of 192 {GPU}s},
-! journal = {Computer Science Research and Development},
-! year = {2010},
-! volume = {25},
-! pages = {75-82},
-! number = {1-2},
-! doi = {10.1007/s00450-010-0109-1}}
-!
-! @ARTICLE{KoMiEr09,
-! author = {Dimitri Komatitsch and David Mich\'ea and Gordon Erlebacher},
-! title = {Porting a high-order finite-element earthquake modeling application
-! to {NVIDIA} graphics cards using {CUDA}},
-! journal = {Journal of Parallel and Distributed Computing},
-! year = {2009},
-! volume = {69},
-! pages = {451-460},
-! number = {5},
-! doi = {10.1016/j.jpdc.2009.01.006}}
-!
-! @ARTICLE{LiPoKoTr04,
-! author = {Qinya Liu and Jascha Polet and Dimitri Komatitsch and Jeroen Tromp},
-! title = {Spectral-element moment tensor inversions for earthquakes in {S}outhern {C}alifornia},
-! journal={Bull. Seismol. Soc. Am.},
-! year = {2004},
-! volume = {94},
-! pages = {1748-1761},
-! number = {5},
-! doi = {10.1785/012004038}}
-!
-! @INCOLLECTION{ChKoViCaVaFe07,
-! author = {Emmanuel Chaljub and Dimitri Komatitsch and Jean-Pierre Vilotte and
-! Yann Capdeville and Bernard Valette and Gaetano Festa},
-! title = {Spectral Element Analysis in Seismology},
-! booktitle = {Advances in Wave Propagation in Heterogeneous Media},
-! publisher = {Elsevier - Academic Press},
-! year = {2007},
-! editor = {Ru-Shan Wu and Val\'erie Maupin},
-! volume = {48},
-! series = {Advances in Geophysics},
-! pages = {365-419}}
-!
-! @ARTICLE{KoVi98,
-! author={D. Komatitsch and J. P. Vilotte},
-! title={The spectral-element method: an efficient tool to simulate the seismic response of 2{D} and 3{D} geological structures},
-! journal={Bull. Seismol. Soc. Am.},
-! year=1998,
-! volume=88,
-! number=2,
-! pages={368-392}}
-!
-! @ARTICLE{KoTr99,
-! author={D. Komatitsch and J. Tromp},
-! year=1999,
-! title={Introduction to the spectral-element method for 3-{D} seismic wave propagation},
-! journal={Geophys. J. Int.},
-! volume=139,
-! number=3,
-! pages={806-822},
-! doi={10.1046/j.1365-246x.1999.00967.x}}
-!
-! @ARTICLE{KoLiTrSuStSh04,
-! author={Dimitri Komatitsch and Qinya Liu and Jeroen Tromp and Peter S\"{u}ss
-!   and Christiane Stidham and John H. Shaw},
-! year=2004,
-! title={Simulations of Ground Motion in the {L}os {A}ngeles {B}asin
-!   based upon the Spectral-Element Method},
-! journal={Bull. Seism. Soc. Am.},
-! volume=94,
-! number=1,
-! pages={187-206}}
-!
-! and/or another article from http://web.univ-pau.fr/~dkomati1/publications.html
-!
-!
-! If you use the kernel capabilities of the code, please cite at least one article
-! written by the developers of the package, for instance:
-!
-! @ARTICLE{TrKoLi08,
-! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
-! title = {Spectral-Element and Adjoint Methods in Seismology},
-! journal = {Communications in Computational Physics},
-! year = {2008},
-! volume = {3},
-! pages = {1-32},
-! number = {1}}
-!
-! or
-!
-! @ARTICLE{LiTr06,
-! author={Qinya Liu and Jeroen Tromp},
-! title={Finite-frequency kernels based on adjoint methods},
-! journal={Bull. Seismol. Soc. Am.},
-! year=2006,
-! volume=96,
-! number=6,
-! pages={2383-2397},
-! doi={10.1785/0120060041}}
-!
-!
-! Reference frame - convention:
-! ----------------------------
-!
-! The code uses the following convention for the reference frame:
-!
-!  - X axis is East
-!  - Y axis is North
-!  - Z axis is up
-!
-! Note that this convention is different from both the Aki-Richards convention
-! and the Harvard CMT convention.
-!
-! Let us recall that the Aki-Richards convention is:
-!
-!  - X axis is North
-!  - Y axis is East
-!  - Z axis is down
-!
-! and that the Harvard CMT convention is:
-!
-!  - X axis is South
-!  - Y axis is East
-!  - Z axis is up
-!
-! To report bugs or suggest improvements to the code, please send an email
-! to Jeroen Tromp <jtromp AT princeton.edu> and/or use our online
-! bug tracking system at http://www.geodynamics.org/roundup .
-!
-! Evolution of the code:
-! ---------------------
-!
-! MPI v. 2.0 "Sesame" (Spectral ElementS on Any MEsh), November 2010:
-! Dimitri Komatitsch, Nicolas Le Goff, Roland Martin and Pieyre Le Loher, University of Pau, France,
-! Jeroen Tromp and the Princeton group of developers, Princeton University, USA,
-! and Emanuele Casarotti, INGV Roma, Italy:
-!  support for CUBIT meshes decomposed by SCOTCH, METIS or ZOLTAN;
-!  much faster solver using Michel Deville's inlined matrix products.
-!
-! MPI v. 1.4 Dimitri Komatitsch, University of Pau, Qinya Liu and others, Caltech, September 2006:
-!  better adjoint and kernel calculations, faster and better I/Os
-!  on very large systems, many small improvements and bug fixes
-!
-! MPI v. 1.3 Dimitri Komatitsch, University of Pau, and Qinya Liu, Caltech, July 2005:
-!  serial version, regular mesh, adjoint and kernel calculations, ParaView support
-!
-! MPI v. 1.2 Min Chen and Dimitri Komatitsch, Caltech, July 2004:
-!  full anisotropy, volume movie
-!
-! MPI v. 1.1 Dimitri Komatitsch, Caltech, October 2002: Zhu's Moho map, scaling
-!  of Vs with depth, Hauksson's regional model, attenuation, oceans, movies
-!
-! MPI v. 1.0 Dimitri Komatitsch, Caltech, USA, May 2002: first MPI version based on global code
-!
-! Dimitri Komatitsch, IPG Paris, France, December 1996: first 3-D solver for the CM-5 Connection Machine,
-!    parallelized on 128 processors using Connection Machine Fortran
-!
-
-! in case of an acoustic medium, a potential Chi of (density * displacement) is used as in Chaljub and Valette,
-! Geophysical Journal International, vol. 158, p. 131-141 (2004) and *NOT* a velocity potential
-! as in Komatitsch and Tromp, Geophysical Journal International, vol. 150, p. 303-318 (2002).
-!
-! This permits acoustic-elastic coupling based on a non-iterative time scheme.
-! Displacement is then:
-!     u = grad(Chi) / rho
-! Velocity is then:
-!     v = grad(Chi_dot) / rho
-! (Chi_dot being the time derivative of Chi)
-! and pressure is:
-!     p = - Chi_dot_dot
-! (Chi_dot_dot being the time second derivative of Chi).
-!
-! The source in an acoustic element is a pressure source.
-!
-! First-order acoustic-acoustic discontinuities are also handled automatically
-! because pressure is continuous at such an interface, therefore Chi_dot_dot
-! is continuous, therefore Chi is also continuous, which is consistent with
-! the spectral-element basis functions and with the assembling process.
-! This is the reason why a simple displacement potential u = grad(Chi) would
-! not work because it would be discontinuous at such an interface and would
-! therefore not be consistent with the basis functions.
-
-! ************** PROGRAM STARTS HERE **************
-
-  ! force Flush-To-Zero if available to avoid very slow Gradual Underflow trapping
-  call force_ftz()
-
-! reads in parameters
-  call initialize_simulation()
-
-
-! reads in external mesh
-  call read_mesh_databases()
-
-
-! sets up reference element GLL points/weights/derivatives
-  call setup_GLL_points()
-
-
-! detects surfaces
-  call detect_mesh_surfaces()
-
-
-! reads topography & bathymetry
-  call read_topography_bathymetry()
-
-
-! prepares sources and receivers
-  call setup_sources_receivers()
-
-
-! sets up and precomputes simulation arrays
-  call prepare_timerun()
-
-
-! steps through time iterations
-  call iterate_time()
-
-
-! saves last time frame and finishes kernel calculations
-  call finalize_simulation()
-
-  end subroutine specfem3D
-
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+!
+! United States and French Government Sponsorship Acknowledged.
+
+  subroutine specfem3D()
+
+  use specfem_par
+
+
+!=============================================================================!
+!                                                                             !
+!  specfem3D is a 3-D spectral-element solver for a local or regional model.  !
+!  It uses a mesh generated by program generate_databases                     !
+!                                                                             !
+!=============================================================================!
+!
+! If you use this code for your own research, please cite at least one article
+! written by the developers of the package, for instance:
+!
+! @ARTICLE{TrKoLi08,
+! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
+! title = {Spectral-Element and Adjoint Methods in Seismology},
+! journal = {Communications in Computational Physics},
+! year = {2008},
+! volume = {3},
+! pages = {1-32},
+! number = {1}}
+!
+! or
+!
+! @ARTICLE{VaCaSaKoVi99,
+! author = {R. Vai and J. M. Castillo-Covarrubias and F. J. S\'anchez-Sesma and
+! D. Komatitsch and J. P. Vilotte},
+! title = {Elastic wave propagation in an irregularly layered medium},
+! journal = {Soil Dynamics and Earthquake Engineering},
+! year = {1999},
+! volume = {18},
+! pages = {11-18},
+! number = {1},
+! doi = {10.1016/S0267-7261(98)00027-X}}
+!
+! @ARTICLE{LeChKoHuTr09,
+! author = {Shiann Jong Lee and Yu Chang Chan and Dimitri Komatitsch and Bor
+! Shouh Huang and Jeroen Tromp},
+! title = {Effects of realistic surface topography on seismic ground motion
+! in the {Y}angminshan region of {T}aiwan based upon the spectral-element
+! method and {LiDAR DTM}},
+! journal = {Bull. Seismol. Soc. Am.},
+! year = {2009},
+! volume = {99},
+! pages = {681-693},
+! number = {2A},
+! doi = {10.1785/0120080264}}
+!
+! @ARTICLE{LeChLiKoHuTr08,
+! author = {Shiann Jong Lee and How Wei Chen and Qinya Liu and Dimitri Komatitsch
+! and Bor Shouh Huang and Jeroen Tromp},
+! title = {Three-Dimensional Simulations of Seismic Wave Propagation in the
+! {T}aipei Basin with Realistic Topography Based upon the Spectral-Element Method},
+! journal = {Bull. Seismol. Soc. Am.},
+! year = {2008},
+! volume = {98},
+! pages = {253-264},
+! number = {1},
+! doi = {10.1785/0120070033}}
+!
+! @ARTICLE{LeKoHuTr09,
+! author = {S. J. Lee and Dimitri Komatitsch and B. S. Huang and J. Tromp},
+! title = {Effects of topography on seismic wave propagation: An example from
+! northern {T}aiwan},
+! journal = {Bull. Seismol. Soc. Am.},
+! year = {2009},
+! volume = {99},
+! pages = {314-325},
+! number = {1},
+! doi = {10.1785/0120080020}}
+!
+! @ARTICLE{KoErGoMi10,
+! author = {Dimitri Komatitsch and Gordon Erlebacher and Dominik G\"oddeke and
+! David Mich\'ea},
+! title = {High-order finite-element seismic wave propagation modeling with
+! {MPI} on a large {GPU} cluster},
+! journal = {J. Comput. Phys.},
+! year = {2010},
+! volume = {229},
+! pages = {7692-7714},
+! number = {20},
+! doi = {10.1016/j.jcp.2010.06.024}}
+!
+! @ARTICLE{KoGoErMi10,
+! author = {Dimitri Komatitsch and Dominik G\"oddeke and Gordon Erlebacher and
+! David Mich\'ea},
+! title = {Modeling the propagation of elastic waves using spectral elements
+! on a cluster of 192 {GPU}s},
+! journal = {Computer Science Research and Development},
+! year = {2010},
+! volume = {25},
+! pages = {75-82},
+! number = {1-2},
+! doi = {10.1007/s00450-010-0109-1}}
+!
+! @ARTICLE{KoMiEr09,
+! author = {Dimitri Komatitsch and David Mich\'ea and Gordon Erlebacher},
+! title = {Porting a high-order finite-element earthquake modeling application
+! to {NVIDIA} graphics cards using {CUDA}},
+! journal = {Journal of Parallel and Distributed Computing},
+! year = {2009},
+! volume = {69},
+! pages = {451-460},
+! number = {5},
+! doi = {10.1016/j.jpdc.2009.01.006}}
+!
+! @ARTICLE{LiPoKoTr04,
+! author = {Qinya Liu and Jascha Polet and Dimitri Komatitsch and Jeroen Tromp},
+! title = {Spectral-element moment tensor inversions for earthquakes in {S}outhern {C}alifornia},
+! journal={Bull. Seismol. Soc. Am.},
+! year = {2004},
+! volume = {94},
+! pages = {1748-1761},
+! number = {5},
+! doi = {10.1785/012004038}}
+!
+! @INCOLLECTION{ChKoViCaVaFe07,
+! author = {Emmanuel Chaljub and Dimitri Komatitsch and Jean-Pierre Vilotte and
+! Yann Capdeville and Bernard Valette and Gaetano Festa},
+! title = {Spectral Element Analysis in Seismology},
+! booktitle = {Advances in Wave Propagation in Heterogeneous Media},
+! publisher = {Elsevier - Academic Press},
+! year = {2007},
+! editor = {Ru-Shan Wu and Val\'erie Maupin},
+! volume = {48},
+! series = {Advances in Geophysics},
+! pages = {365-419}}
+!
+! @ARTICLE{KoVi98,
+! author={D. Komatitsch and J. P. Vilotte},
+! title={The spectral-element method: an efficient tool to simulate the seismic response of 2{D} and 3{D} geological structures},
+! journal={Bull. Seismol. Soc. Am.},
+! year=1998,
+! volume=88,
+! number=2,
+! pages={368-392}}
+!
+! @ARTICLE{KoTr99,
+! author={D. Komatitsch and J. Tromp},
+! year=1999,
+! title={Introduction to the spectral-element method for 3-{D} seismic wave propagation},
+! journal={Geophys. J. Int.},
+! volume=139,
+! number=3,
+! pages={806-822},
+! doi={10.1046/j.1365-246x.1999.00967.x}}
+!
+! @ARTICLE{KoLiTrSuStSh04,
+! author={Dimitri Komatitsch and Qinya Liu and Jeroen Tromp and Peter S\"{u}ss
+!   and Christiane Stidham and John H. Shaw},
+! year=2004,
+! title={Simulations of Ground Motion in the {L}os {A}ngeles {B}asin
+!   based upon the Spectral-Element Method},
+! journal={Bull. Seism. Soc. Am.},
+! volume=94,
+! number=1,
+! pages={187-206}}
+!
+! and/or another article from http://web.univ-pau.fr/~dkomati1/publications.html
+!
+!
+! If you use the kernel capabilities of the code, please cite at least one article
+! written by the developers of the package, for instance:
+!
+! @ARTICLE{TrKoLi08,
+! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
+! title = {Spectral-Element and Adjoint Methods in Seismology},
+! journal = {Communications in Computational Physics},
+! year = {2008},
+! volume = {3},
+! pages = {1-32},
+! number = {1}}
+!
+! or
+!
+! @ARTICLE{LiTr06,
+! author={Qinya Liu and Jeroen Tromp},
+! title={Finite-frequency kernels based on adjoint methods},
+! journal={Bull. Seismol. Soc. Am.},
+! year=2006,
+! volume=96,
+! number=6,
+! pages={2383-2397},
+! doi={10.1785/0120060041}}
+!
+!
+! Reference frame - convention:
+! ----------------------------
+!
+! The code uses the following convention for the reference frame:
+!
+!  - X axis is East
+!  - Y axis is North
+!  - Z axis is up
+!
+! Note that this convention is different from both the Aki-Richards convention
+! and the Harvard CMT convention.
+!
+! Let us recall that the Aki-Richards convention is:
+!
+!  - X axis is North
+!  - Y axis is East
+!  - Z axis is down
+!
+! and that the Harvard CMT convention is:
+!
+!  - X axis is South
+!  - Y axis is East
+!  - Z axis is up
+!
+! To report bugs or suggest improvements to the code, please send an email
+! to Jeroen Tromp <jtromp AT princeton.edu> and/or use our online
+! bug tracking system at http://www.geodynamics.org/roundup .
+!
+! Evolution of the code:
+! ---------------------
+!
+! MPI v. 2.0 "Sesame" (Spectral ElementS on Any MEsh), November 2010:
+! Dimitri Komatitsch, Nicolas Le Goff, Roland Martin and Pieyre Le Loher, University of Pau, France,
+! Jeroen Tromp and the Princeton group of developers, Princeton University, USA,
+! and Emanuele Casarotti, INGV Roma, Italy:
+!  support for CUBIT meshes decomposed by SCOTCH, METIS or ZOLTAN;
+!  much faster solver using Michel Deville's inlined matrix products.
+!
+! MPI v. 1.4 Dimitri Komatitsch, University of Pau, Qinya Liu and others, Caltech, September 2006:
+!  better adjoint and kernel calculations, faster and better I/Os
+!  on very large systems, many small improvements and bug fixes
+!
+! MPI v. 1.3 Dimitri Komatitsch, University of Pau, and Qinya Liu, Caltech, July 2005:
+!  serial version, regular mesh, adjoint and kernel calculations, ParaView support
+!
+! MPI v. 1.2 Min Chen and Dimitri Komatitsch, Caltech, July 2004:
+!  full anisotropy, volume movie
+!
+! MPI v. 1.1 Dimitri Komatitsch, Caltech, October 2002: Zhu's Moho map, scaling
+!  of Vs with depth, Hauksson's regional model, attenuation, oceans, movies
+!
+! MPI v. 1.0 Dimitri Komatitsch, Caltech, USA, May 2002: first MPI version based on global code
+!
+! Dimitri Komatitsch, IPG Paris, France, December 1996: first 3-D solver for the CM-5 Connection Machine,
+!    parallelized on 128 processors using Connection Machine Fortran
+!
+
+! in case of an acoustic medium, a potential Chi of (density * displacement) is used as in Chaljub and Valette,
+! Geophysical Journal International, vol. 158, p. 131-141 (2004) and *NOT* a velocity potential
+! as in Komatitsch and Tromp, Geophysical Journal International, vol. 150, p. 303-318 (2002).
+!
+! This permits acoustic-elastic coupling based on a non-iterative time scheme.
+! Displacement is then:
+!     u = grad(Chi) / rho
+! Velocity is then:
+!     v = grad(Chi_dot) / rho
+! (Chi_dot being the time derivative of Chi)
+! and pressure is:
+!     p = - Chi_dot_dot
+! (Chi_dot_dot being the time second derivative of Chi).
+!
+! The source in an acoustic element is a pressure source.
+!
+! First-order acoustic-acoustic discontinuities are also handled automatically
+! because pressure is continuous at such an interface, therefore Chi_dot_dot
+! is continuous, therefore Chi is also continuous, which is consistent with
+! the spectral-element basis functions and with the assembling process.
+! This is the reason why a simple displacement potential u = grad(Chi) would
+! not work because it would be discontinuous at such an interface and would
+! therefore not be consistent with the basis functions.
+
+! ************** PROGRAM STARTS HERE **************
+
+  ! force Flush-To-Zero if available to avoid very slow Gradual Underflow trapping
+  call force_ftz()
+
+! reads in parameters
+  call initialize_simulation()
+
+
+! reads in external mesh
+  call read_mesh_databases()
+
+
+! sets up reference element GLL points/weights/derivatives
+  call setup_GLL_points()
+
+
+! detects surfaces
+  call detect_mesh_surfaces()
+
+
+! reads topography & bathymetry
+  call read_topography_bathymetry()
+
+
+! prepares sources and receivers
+  call setup_sources_receivers()
+
+
+! sets up and precomputes simulation arrays
+  call prepare_timerun()
+
+
+! steps through time iterations
+  call iterate_time()
+
+
+! saves last time frame and finishes kernel calculations
+  call finalize_simulation()
+
+  end subroutine specfem3D
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -42,14 +42,27 @@
 
   implicit none
 
-! attenuation
-  integer :: NSPEC_ATTENUATION_AB
+! parameters deduced from parameters read from file
+  integer :: NPROC
+  integer :: NSPEC_AB, NGLOB_AB
 
+! mesh parameters
+  integer, dimension(:,:,:,:), allocatable :: ibool
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: xstore,ystore,zstore
+
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: &
+        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz,jacobian
+
+! material properties
+  ! isotropic
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: kappastore,mustore
+
 ! CUDA mesh pointer<->integer wrapper
   integer(kind=8) :: Mesh_pointer
+
 ! Global GPU toggle. Set in Par_file
   logical :: GPU_MODE
-  
+
 ! use integer array to store topography values
   integer :: NX_TOPO,NY_TOPO
   double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
@@ -70,17 +83,11 @@
   integer, dimension(:), allocatable :: free_surface_ispec
   integer :: num_free_surface_faces
 
-! mesh parameters
-  integer, dimension(:,:,:,:), allocatable :: ibool
-  real(kind=CUSTOM_REAL), dimension(:), allocatable :: xstore,ystore,zstore
+! attenuation
+  integer :: NSPEC_ATTENUATION_AB
+  character(len=256) prname_Q
 
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz,jacobian
 
-! material properties
-  ! isotropic
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: kappastore,mustore
-
 ! additional mass matrix for ocean load
   real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_ocean_load
 
@@ -102,14 +109,15 @@
   double precision, external :: comp_source_time_function
   double precision :: t0
   real(kind=CUSTOM_REAL) :: stf_used_total
-  integer :: NSOURCES
+  integer :: NSOURCES,nsources_local
 
 ! receiver information
   character(len=256) :: rec_filename,filtered_rec_filename,dummystring
   integer :: nrec,nrec_local,nrec_tot_found
   integer :: nrec_simulation
-  integer, allocatable, dimension(:) :: islice_selected_rec,ispec_selected_rec,number_receiver_global
-  double precision, allocatable, dimension(:) :: xi_receiver,eta_receiver,gamma_receiver
+  integer, dimension(:), allocatable :: islice_selected_rec,ispec_selected_rec
+  integer, dimension(:), allocatable :: number_receiver_global
+  double precision, dimension(:), allocatable :: xi_receiver,eta_receiver,gamma_receiver
   double precision, dimension(:,:), allocatable :: hpxir_store,hpetar_store,hpgammar_store
 
 ! timing information for the stations
@@ -144,13 +152,11 @@
   double precision, external :: wtime
   double precision :: time_start
 
-! parameters read from parameter file
-  integer :: NPROC_XI,NPROC_ETA
+! parameters
+  integer :: SIMULATION_TYPE
   integer :: NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,UTM_PROJECTION_ZONE
-  integer :: SIMULATION_TYPE
 
   double precision :: DT
-  double precision :: LATITUDE_MIN,LATITUDE_MAX,LONGITUDE_MIN,LONGITUDE_MAX
 
   logical :: ATTENUATION,USE_OLSEN_ATTENUATION, &
             OCEANS,TOPOGRAPHY,ABSORBING_CONDITIONS,ANISOTROPY
@@ -161,11 +167,11 @@
 
   integer :: NTSTEP_BETWEEN_OUTPUT_INFO
 
-  character(len=256) OUTPUT_FILES,LOCAL_PATH,prname,prname_Q
+! parameters read from mesh parameter file
+  integer :: NPROC_XI,NPROC_ETA
+  double precision :: LATITUDE_MIN,LATITUDE_MAX,LONGITUDE_MIN,LONGITUDE_MAX
 
-! parameters deduced from parameters read from file
-  integer :: NPROC
-  integer :: NSPEC_AB, NGLOB_AB
+  character(len=256) OUTPUT_FILES,LOCAL_PATH,prname
 
 ! names of the data files for all the processors in MPI
   character(len=256) outputname

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -31,7 +31,7 @@
   use specfem_par
   use specfem_par_movie
   use specfem_par_elastic
-  use specfem_par_acoustic    
+  use specfem_par_acoustic
   implicit none
 
   ! gets resulting array values onto CPU
@@ -42,17 +42,17 @@
       ( MOVIE_SURFACE .and. mod(it,NTSTEP_BETWEEN_FRAMES) == 0) .or. &
       ( MOVIE_VOLUME .and. mod(it,NTSTEP_BETWEEN_FRAMES) == 0) .or. &
       ( PNM_GIF_IMAGE .and. mod(it,NTSTEP_BETWEEN_FRAMES) == 0) &
-     ) ) then 
+     ) ) then
     ! acoustic domains
     if( ACOUSTIC_SIMULATION ) then
-      ! transfers whole fields      
-      call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
-                potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)        
+      ! transfers whole fields
+      call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+                potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
     endif
     ! elastic domains
     if( ELASTIC_SIMULATION ) then
-      ! transfers whole fields      
-      call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+      ! transfers whole fields
+      call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
     endif
   endif
 
@@ -1224,7 +1224,7 @@
       open(unit=27,file=trim(LOCAL_PATH)//trim(outputname),status='unknown',form='unformatted',iostat=ier)
       if( ier /= 0 ) stop 'error opening file movie output velocity z'
       write(27) tmpdata
-      close(27)    
+      close(27)
     endif
 
     ! norm of velocity
@@ -1235,9 +1235,9 @@
     if( ier /= 0 ) stop 'error opening file movie output velocity z'
     write(27) tmpdata
     close(27)
-    
+
     deallocate(tmpdata)
-    
+
   endif
 
   end subroutine wmo_movie_volume_output

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90	2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90	2011-10-30 02:25:28 UTC (rev 19129)
@@ -46,37 +46,39 @@
   double precision :: stf
 
   ! gets resulting array values onto CPU
-  if(GPU_MODE) then 
-    ! this transfers fields only in elements with stations for efficiency
-    if( ACOUSTIC_SIMULATION ) then
-      ! only copy corresponding elements to CPU host
-      ! timing: Elapsed time: 5.230904e-04
-      call transfer_station_fields_acoustic_from_device( &
-                      potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
-                      b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
-                      Mesh_pointer,number_receiver_global, &
-                      ispec_selected_rec,ispec_selected_source,ibool,SIMULATION_TYPE)
+  if(GPU_MODE) then
+    if( nrec_local > 0 ) then
+      ! this transfers fields only in elements with stations for efficiency
+      if( ACOUSTIC_SIMULATION ) then
+        ! only copy corresponding elements to CPU host
+        ! timing: Elapsed time: 5.230904e-04
+        call transfer_station_ac_from_device( &
+                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
+                        b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
+                        Mesh_pointer,number_receiver_global, &
+                        ispec_selected_rec,ispec_selected_source,ibool,SIMULATION_TYPE)
 
-      ! alternative: transfers whole fields      
-      ! timing: Elapsed time: 4.138947e-03
-      !call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
-      !          potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)        
-    endif
-    
-    ! this transfers fields only in elements with stations for efficiency
-    if( ELASTIC_SIMULATION ) then
-      call transfer_station_fields_from_device( &
-                  displ,veloc,accel, &
-                  b_displ,b_veloc,b_accel, &
-                  Mesh_pointer,number_receiver_global, &
-                  ispec_selected_rec,ispec_selected_source, &
-                  ibool,SIMULATION_TYPE)
+        ! alternative: transfers whole fields
+        ! timing: Elapsed time: 4.138947e-03
+        !call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+        !          potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
+      endif
 
-      ! alternative: transfers whole fields      
-      !  call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)      
+      ! this transfers fields only in elements with stations for efficiency
+      if( ELASTIC_SIMULATION ) then
+        call transfer_station_el_from_device( &
+                    displ,veloc,accel, &
+                    b_displ,b_veloc,b_accel, &
+                    Mesh_pointer,number_receiver_global, &
+                    ispec_selected_rec,ispec_selected_source, &
+                    ibool,SIMULATION_TYPE)
+
+        ! alternative: transfers whole fields
+        !  call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+      endif
     endif
   endif
-  
+
   do irec_local = 1,nrec_local
 
     ! gets global number of that receiver
@@ -282,7 +284,7 @@
 ! write ONE binary file for all receivers (nrec_local) within one proc
 ! SU format, with 240-byte-header for each trace
   if ((mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0 .or. it==NSTEP) .and. SU_FORMAT) &
-     call write_seismograms_su() 
+     call write_seismograms_su()
 
   end subroutine write_seismograms
 
@@ -788,7 +790,7 @@
 !=====================================================================
 
  subroutine write_seismograms_su()
- 
+
  use specfem_par
  use specfem_par_acoustic
  use specfem_par_elastic
@@ -799,7 +801,7 @@
  character(len=256) procname,final_LOCAL_PATH
  integer :: irec_local,irec,ios
  real :: x_station,y_station
- 
+
  ! headers
  integer,parameter :: nheader=240      ! 240 bytes
  integer(kind=2) :: i2head(nheader/2)  ! 2-byte-integer
@@ -846,7 +848,7 @@
    i2head(58) =NSTEP
    i2head(59) =DT*1.0d6
    write(IOUT_SU,rec=irec_local) r4head, seismograms_d(1,irec_local,:)
- enddo 
+ enddo
  close(IOUT_SU)
  ! write seismograms (dy)
  open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dy_SU' ,&
@@ -863,7 +865,7 @@
    i2head(58) =NSTEP
    i2head(59) =DT*1.0d6
    write(IOUT_SU,rec=irec_local) r4head, seismograms_d(2,irec_local,:)
- enddo 
+ enddo
  close(IOUT_SU)
 
  ! write seismograms (dz)
@@ -881,7 +883,7 @@
    i2head(58) =NSTEP
    i2head(59) =DT*1.0d6
    write(IOUT_SU,rec=irec_local) r4head, seismograms_d(3,irec_local,:)
- enddo 
+ enddo
  close(IOUT_SU)
 
  end subroutine write_seismograms_su



More information about the CIG-COMMITS mailing list