[cig-commits] r20628 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER: . src/cuda src/meshfem3D src/shared src/specfem3D

danielpeter at geodynamics.org danielpeter at geodynamics.org
Fri Aug 24 19:15:27 PDT 2012


Author: danielpeter
Date: 2012-08-24 19:15:26 -0700 (Fri, 24 Aug 2012)
New Revision: 20628

Modified:
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/initialize_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/save_arrays_solver.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/lagrange_poly.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_arrays_source.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/define_derivation_matrices.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/get_event_info.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
Log:
bug fix in get_event_info() routine; updates compute_arrays_source() routine arguments


Property changes on: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER
___________________________________________________________________
Name: svn:ignore
   - *.mod
x*
config.log
config.status
xmeshfem3D
xspecfem3D
precision.h
constants.h
aclocal.m4
config.h
Makefile
autom4te.cache


   + 


Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu	2012-08-25 02:15:26 UTC (rev 20628)
@@ -118,6 +118,13 @@
       fclose(fp);
     }
 
+    // releases previous contexts
+#if CUDA_VERSION < 4000
+    cudaThreadExit();
+#else
+    cudaDeviceReset();
+#endif
+
     // stops program
     //free(kernel_name);
 #ifdef WITH_MPI
@@ -183,6 +190,13 @@
       fclose(fp);
     }
 
+    // releases previous contexts
+#if CUDA_VERSION < 4000
+    cudaThreadExit();
+#else
+    cudaDeviceReset();
+#endif
+
     // stops program
 #ifdef WITH_MPI
     MPI_Abort(MPI_COMM_WORLD,1);

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu	2012-08-25 02:15:26 UTC (rev 20628)
@@ -798,7 +798,7 @@
 
   __shared__ realw sh_hprime_xx[NGLL2];
   __shared__ realw sh_hprimewgll_xx[NGLL2];
-  
+
 // use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
 // because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
   active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
@@ -850,8 +850,8 @@
     }
   } // active
 
-  // gets constant arrays into shared memory 
-  // (only ghost threads which would be idle anyway)    
+  // gets constant arrays into shared memory
+  // (only ghost threads which would be idle anyway)
   if (tx == NGLL3_PADDED-1) {
     for(int m=0; m < NGLL2; m++){
       // hprime
@@ -863,9 +863,9 @@
       // weighted hprime
       sh_hprimewgll_xx[m] = d_hprimewgll_xx[m];
     }
-  }    
-  
-/*  
+  }
+
+/*
   if (tx < NGLL2) {
     // hprime
 #ifdef USE_TEXTURES_CONSTANTS
@@ -1274,7 +1274,7 @@
       tempx1l += s_tempx1[K*NGLL2+J*NGLLX+l]*fac1;
       tempy1l += s_tempy1[K*NGLL2+J*NGLLX+l]*fac1;
       tempz1l += s_tempz1[K*NGLL2+J*NGLLX+l]*fac1;
-      
+
       // assume hprimewgll_xx == hprimewgll_yy == hprimewgll_zz
       fac2 = sh_hprimewgll_xx[J*NGLLX+l];
       tempx2l += s_tempx2[K*NGLL2+l*NGLLX+I]*fac2;
@@ -1426,7 +1426,7 @@
       epsilondev_xz[tx + working_element*NGLL3] = epsilondev_xz_loc;
       epsilondev_yz[tx + working_element*NGLL3] = epsilondev_yz_loc;
     }
-  } // active  
+  } // active
 }
 
 /* ----------------------------------------------------------------------------------------------- */

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu	2012-08-25 02:15:26 UTC (rev 20628)
@@ -171,10 +171,10 @@
   __shared__ realw s_dummy_loc[NGLL3];
   __shared__ realw s_temp1[NGLL3];
   __shared__ realw s_temp2[NGLL3];
-  __shared__ realw s_temp3[NGLL3];  
+  __shared__ realw s_temp3[NGLL3];
   __shared__ realw sh_hprime_xx[NGLL2];
   __shared__ realw sh_hprimewgll_xx[NGLL2];
-  
+
 // use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
 // because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
   active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
@@ -388,7 +388,7 @@
         //assumes hprimewgll_xx = hprimewgll_yy = hprimewgll_zz
         temp2l += s_temp2[K*NGLL2+l*NGLLX+I]*sh_hprimewgll_xx[J*NGLLX+l];
         temp3l += s_temp3[l*NGLL2+J*NGLLX+I]*sh_hprimewgll_xx[K*NGLLX+l];
-    }    
+    }
 #else
     temp1l = s_temp1[K*NGLL2+J*NGLLX]*sh_hprimewgll_xx[I*NGLLX]
             + s_temp1[K*NGLL2+J*NGLLX+1]*sh_hprimewgll_xx[I*NGLLX+1]
@@ -409,8 +409,8 @@
             + s_temp3[4*NGLL2+J*NGLLX+I]*sh_hprimewgll_xx[K*NGLLX+4];
 #endif
 
-    sum_terms = - ( wgllwgll_yz[K*NGLLX+J]*temp1l 
-                  + wgllwgll_xz[K*NGLLX+I]*temp2l 
+    sum_terms = - ( wgllwgll_yz[K*NGLLX+J]*temp1l
+                  + wgllwgll_xz[K*NGLLX+I]*temp2l
                   + wgllwgll_xy[J*NGLLX+I]*temp3l);
 
     if( GRAVITY ) sum_terms += gravity_term;

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/initialize_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/initialize_cuda.cu	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/initialize_cuda.cu	2012-08-25 02:15:26 UTC (rev 20628)
@@ -103,8 +103,13 @@
     // generalized for more GPUs per node
     // note: without previous context release, cudaSetDevice will complain with the cuda error
     //         "setting the device when a process is active is not allowed"
+
     // releases previous contexts
+#if CUDA_VERSION < 4000
     cudaThreadExit();
+#else
+    cudaDeviceReset();
+#endif
 
     //printf("rank %d: cuda device count = %d sets device = %d \n",myrank,device_count,myrank % device_count);
     //MPI_Barrier(MPI_COMM_WORLD);

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2012-08-25 02:15:26 UTC (rev 20628)
@@ -524,10 +524,10 @@
   realw* d_hprime_xx_tex;
 #endif
 
-  realw* d_hprimewgll_xx; 
+  realw* d_hprimewgll_xx;
   //realw* d_hprimewgll_yy; // only needed if NGLLX != NGLLY != NGLLZ
   //realw* d_hprimewgll_zz; // only needed if NGLLX != NGLLY != NGLLZ
-  
+
   realw* d_wgllwgll_xy; realw* d_wgllwgll_xz; realw* d_wgllwgll_yz;
   realw* d_wgll_cube;
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h	2012-08-25 02:15:26 UTC (rev 20628)
@@ -80,7 +80,7 @@
 __device__ realw d_wgllwgll_yz[NGLL2];
 
 // wgll_cube: needed only for gravity case
-__device__ realw d_wgll_cube[NGLL3]; 
+__device__ realw d_wgll_cube[NGLL3];
 
 
 // setup functions
@@ -91,7 +91,8 @@
   if (err != cudaSuccess)
   {
     fprintf(stderr, "Error in setConst_hprime_xx: %s\n", cudaGetErrorString(err));
-    fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
+    fprintf(stderr, "The problem is maybe the target architecture: -arch sm_** in src/specfem3D/Makefile\n");
+    fprintf(stderr, "Please double-check with your GPU card\n");
     exit(1);
   }
 
@@ -111,7 +112,6 @@
  if (err != cudaSuccess)
  {
  fprintf(stderr, "Error in setConst_hprime_yy: %s\n", cudaGetErrorString(err));
- fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
  exit(1);
  }
 
@@ -132,7 +132,6 @@
  if (err != cudaSuccess)
  {
  fprintf(stderr, "Error in setConst_hprime_zz: %s\n", cudaGetErrorString(err));
- fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
  exit(1);
  }
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2012-08-25 02:15:26 UTC (rev 20628)
@@ -95,7 +95,7 @@
                                         int* myrank_f,
                                         int* h_NGLLX,
                                         realw* h_hprime_xx,
-                                        realw* h_hprimewgll_xx,realw* h_hprimewgll_yy,realw* h_hprimewgll_zz,
+                                        realw* h_hprimewgll_xx,
                                         realw* h_wgllwgll_xy,realw* h_wgllwgll_xz,realw* h_wgllwgll_yz,
                                         int* NSOURCES,int* nsources_local,
                                         realw* h_sourcearrays,
@@ -1235,22 +1235,22 @@
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel_crust_mantle),sizeof(realw)*size),4003);
   // backward/reconstructed wavefield
   if( mp->simulation_type == 3 ){
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ_crust_mantle),sizeof(realw)*size),4001);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc_crust_mantle),sizeof(realw)*size),4002);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel_crust_mantle),sizeof(realw)*size),4003);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ_crust_mantle),sizeof(realw)*size),4011);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc_crust_mantle),sizeof(realw)*size),4012);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel_crust_mantle),sizeof(realw)*size),4013);
   }
 
   #ifdef USE_TEXTURES_FIELDS
   {
-    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_displ_cm_tex_ref_ptr, "d_displ_cm_tex"), 4001);
+    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_displ_cm_tex_ref_ptr, "d_displ_cm_tex"), 4021);
     cudaChannelFormatDesc channelDesc1 = cudaCreateChannelDesc<realw>();
     print_CUDA_error_if_any(cudaBindTexture(0, mp->d_displ_cm_tex_ref_ptr, mp->d_displ_crust_mantle,
-                                            &channelDesc1, sizeof(realw)*size), 4001);
+                                            &channelDesc1, sizeof(realw)*size), 4021);
 
-    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_accel_cm_tex_ref_ptr, "d_accel_cm_tex"), 4003);
+    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_accel_cm_tex_ref_ptr, "d_accel_cm_tex"), 4023);
     cudaChannelFormatDesc channelDesc2 = cudaCreateChannelDesc<realw>();
     print_CUDA_error_if_any(cudaBindTexture(0, mp->d_accel_cm_tex_ref_ptr, mp->d_accel_crust_mantle,
-                                            &channelDesc2, sizeof(realw)*size), 4003);
+                                            &channelDesc2, sizeof(realw)*size), 4023);
   }
   #endif
 
@@ -1347,17 +1347,17 @@
   int size_glob = mp->NGLOB_OUTER_CORE;
 
   // mesh
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix_outer_core, size_padded*sizeof(realw)),1001);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy_outer_core, size_padded*sizeof(realw)),1002);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz_outer_core, size_padded*sizeof(realw)),1003);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax_outer_core, size_padded*sizeof(realw)),1004);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay_outer_core, size_padded*sizeof(realw)),1005);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz_outer_core, size_padded*sizeof(realw)),1006);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax_outer_core, size_padded*sizeof(realw)),1007);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay_outer_core, size_padded*sizeof(realw)),1008);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz_outer_core, size_padded*sizeof(realw)),1009);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix_outer_core, size_padded*sizeof(realw)),1101);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy_outer_core, size_padded*sizeof(realw)),1102);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz_outer_core, size_padded*sizeof(realw)),1103);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax_outer_core, size_padded*sizeof(realw)),1104);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay_outer_core, size_padded*sizeof(realw)),1105);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz_outer_core, size_padded*sizeof(realw)),1106);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax_outer_core, size_padded*sizeof(realw)),1107);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay_outer_core, size_padded*sizeof(realw)),1108);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz_outer_core, size_padded*sizeof(realw)),1109);
 
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappavstore_outer_core, size_padded*sizeof(realw)),1010);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappavstore_outer_core, size_padded*sizeof(realw)),1110);
 
   // transfer constant element data with padding
   for(int i=0;i < mp->NSPEC_OUTER_CORE;i++) {
@@ -1429,27 +1429,27 @@
 
   // wavefield
   int size = mp->NGLOB_OUTER_CORE;
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_outer_core),sizeof(realw)*size),4001);
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc_outer_core),sizeof(realw)*size),4002);
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel_outer_core),sizeof(realw)*size),4003);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_outer_core),sizeof(realw)*size),5001);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc_outer_core),sizeof(realw)*size),5002);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel_outer_core),sizeof(realw)*size),5003);
   // backward/reconstructed wavefield
   if( mp->simulation_type == 3 ){
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ_outer_core),sizeof(realw)*size),4001);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc_outer_core),sizeof(realw)*size),4002);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel_outer_core),sizeof(realw)*size),4003);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ_outer_core),sizeof(realw)*size),5011);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc_outer_core),sizeof(realw)*size),5022);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel_outer_core),sizeof(realw)*size),5033);
   }
 
   #ifdef USE_TEXTURES_FIELDS
   {
-    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_displ_oc_tex_ref_ptr, "d_displ_oc_tex"), 4001);
+    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_displ_oc_tex_ref_ptr, "d_displ_oc_tex"), 5021);
     cudaChannelFormatDesc channelDesc1 = cudaCreateChannelDesc<realw>();
     print_CUDA_error_if_any(cudaBindTexture(0, mp->d_displ_oc_tex_ref_ptr, mp->d_displ_outer_core,
-                                            &channelDesc1, sizeof(realw)*size), 4001);
+                                            &channelDesc1, sizeof(realw)*size), 5021);
 
-    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_accel_oc_tex_ref_ptr, "d_accel_oc_tex"), 4003);
+    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_accel_oc_tex_ref_ptr, "d_accel_oc_tex"), 5023);
     cudaChannelFormatDesc channelDesc2 = cudaCreateChannelDesc<realw>();
     print_CUDA_error_if_any(cudaBindTexture(0, mp->d_accel_oc_tex_ref_ptr, mp->d_accel_outer_core,
-                                            &channelDesc2, sizeof(realw)*size), 4003);
+                                            &channelDesc2, sizeof(realw)*size), 5023);
   }
   #endif
 
@@ -1520,18 +1520,18 @@
   int size_glob = mp->NGLOB_INNER_CORE;
 
   // mesh
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix_inner_core, size_padded*sizeof(realw)),1001);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy_inner_core, size_padded*sizeof(realw)),1002);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz_inner_core, size_padded*sizeof(realw)),1003);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax_inner_core, size_padded*sizeof(realw)),1004);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay_inner_core, size_padded*sizeof(realw)),1005);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz_inner_core, size_padded*sizeof(realw)),1006);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax_inner_core, size_padded*sizeof(realw)),1007);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay_inner_core, size_padded*sizeof(realw)),1008);
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz_inner_core, size_padded*sizeof(realw)),1009);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix_inner_core, size_padded*sizeof(realw)),1201);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy_inner_core, size_padded*sizeof(realw)),1202);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz_inner_core, size_padded*sizeof(realw)),1203);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax_inner_core, size_padded*sizeof(realw)),1204);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay_inner_core, size_padded*sizeof(realw)),1205);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz_inner_core, size_padded*sizeof(realw)),1206);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax_inner_core, size_padded*sizeof(realw)),1207);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay_inner_core, size_padded*sizeof(realw)),1208);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz_inner_core, size_padded*sizeof(realw)),1209);
 
   // muvstore needed for attenuatioin also for anisotropic inner core
-  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muvstore_inner_core, size_padded*sizeof(realw)),1011);
+  print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muvstore_inner_core, size_padded*sizeof(realw)),1211);
 
   // transfer constant element data with padding
   for(int i=0;i < mp->NSPEC_INNER_CORE;i++) {
@@ -1633,27 +1633,27 @@
 
   // wavefield
   int size = NDIM * mp->NGLOB_INNER_CORE;
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_inner_core),sizeof(realw)*size),4001);
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc_inner_core),sizeof(realw)*size),4002);
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel_inner_core),sizeof(realw)*size),4003);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_inner_core),sizeof(realw)*size),6001);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc_inner_core),sizeof(realw)*size),6002);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel_inner_core),sizeof(realw)*size),6003);
   // backward/reconstructed wavefield
   if( mp->simulation_type == 3 ){
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ_inner_core),sizeof(realw)*size),4001);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc_inner_core),sizeof(realw)*size),4002);
-    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel_inner_core),sizeof(realw)*size),4003);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ_inner_core),sizeof(realw)*size),6011);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc_inner_core),sizeof(realw)*size),6012);
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel_inner_core),sizeof(realw)*size),6013);
   }
 
   #ifdef USE_TEXTURES_FIELDS
   {
-    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_displ_ic_tex_ref_ptr, "d_displ_ic_tex"), 4001);
+    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_displ_ic_tex_ref_ptr, "d_displ_ic_tex"), 6021);
     cudaChannelFormatDesc channelDesc1 = cudaCreateChannelDesc<realw>();
     print_CUDA_error_if_any(cudaBindTexture(0, mp->d_displ_ic_tex_ref_ptr, mp->d_displ_inner_core,
-                                            &channelDesc1, sizeof(realw)*size), 4001);
+                                            &channelDesc1, sizeof(realw)*size), 6021);
 
-    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_accel_ic_tex_ref_ptr, "d_accel_ic_tex"), 4003);
+    print_CUDA_error_if_any(cudaGetTextureReference(&mp->d_accel_ic_tex_ref_ptr, "d_accel_ic_tex"), 6023);
     cudaChannelFormatDesc channelDesc2 = cudaCreateChannelDesc<realw>();
     print_CUDA_error_if_any(cudaBindTexture(0, mp->d_accel_ic_tex_ref_ptr, mp->d_accel_inner_core,
-                                            &channelDesc2, sizeof(realw)*size), 4003);
+                                            &channelDesc2, sizeof(realw)*size), 6023);
   }
   #endif
 
@@ -2146,7 +2146,13 @@
   }
 
   // releases previous contexts
+#if CUDA_VERSION < 4000
+  cudaThreadSynchronize();
   cudaThreadExit();
+#else
+  cudaDeviceSynchronize();
+  cudaDeviceReset();
+#endif
 
   // mesh pointer - not needed anymore
   free(mp);

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c	2012-08-25 02:15:26 UTC (rev 20628)
@@ -343,7 +343,7 @@
                                         int* myrank_f,
                                         int* h_NGLLX,
                                         realw* h_hprime_xx,
-                                        realw* h_hprimewgll_xx,realw* h_hprimewgll_yy,realw* h_hprimewgll_zz,
+                                        realw* h_hprimewgll_xx,
                                         realw* h_wgllwgll_xy,realw* h_wgllwgll_xz,realw* h_wgllwgll_yz,
                                         int* NSOURCES,int* nsources_local,
                                         realw* h_sourcearrays,

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/save_arrays_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/save_arrays_solver.f90	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/save_arrays_solver.f90	2012-08-25 02:15:26 UTC (rev 20628)
@@ -33,7 +33,7 @@
 
   use meshfem3D_models_par,only: &
     OCEANS,TRANSVERSE_ISOTROPY,HETEROGEN_3D_MANTLE,ANISOTROPIC_3D_MANTLE, &
-    ANISOTROPIC_INNER_CORE,ATTENUATION,ATTENUATION_3D
+    ANISOTROPIC_INNER_CORE,ATTENUATION
 
   use meshfem3D_par,only: &
     NCHUNKS,ABSORBING_CONDITIONS,SAVE_MESH_FILES

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/lagrange_poly.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/lagrange_poly.f90	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/lagrange_poly.f90	2012-08-25 02:15:26 UTC (rev 20628)
@@ -32,13 +32,14 @@
 
   implicit none
 
-  double precision :: xi
+  double precision,intent(in) :: xi
 
-  integer :: NGLL
-  double precision,dimension(NGLL):: xigll,h,hprime
+  integer,intent(in) :: NGLL
+  double precision,dimension(NGLL),intent(in) :: xigll
+  double precision,dimension(NGLL),intent(out) :: h,hprime
 
-  integer dgr,i,j
-  double precision prod1,prod2
+  integer :: dgr,i,j
+  double precision :: prod1,prod2
 
   do dgr=1,NGLL
 
@@ -52,10 +53,10 @@
     enddo
     h(dgr)=prod1/prod2
 
-    hprime(dgr)=0.0d0
+    hprime(dgr) = 0.0d0
     do i=1,NGLL
       if(i /= dgr) then
-        prod1=1.0d0
+        prod1 = 1.0d0
         do j=1,NGLL
           if(j /= dgr .and. j /= i) prod1 = prod1*(xi-xigll(j))
         enddo

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in	2012-08-25 02:15:26 UTC (rev 20628)
@@ -43,6 +43,10 @@
 @COND_CUDA_TRUE at NVCC = nvcc
 @COND_CUDA_FALSE at NVCC = g++
 
+# GPU architecture
+# Fermi: -gencode=arch=compute_10,code=sm_10 not supported
+# Tesla (default): -gencode=arch=compute_20,code=sm_20
+# Kepler: -gencode=arch=compute_30,code=sm_30
 @COND_CUDA_TRUE at NVCC_FLAGS = $(CUDA_INC) $(MPI_INC) -DWITH_MPI -DCUDA -gencode=arch=compute_20,code=sm_20
 @COND_CUDA_FALSE at NVCC_FLAGS = $(MPI_INC) -DWITH_MPI
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_arrays_source.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_arrays_source.f90	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_arrays_source.f90	2012-08-25 02:15:26 UTC (rev 20628)
@@ -25,25 +25,22 @@
 !
 !=====================================================================
 
-  subroutine compute_arrays_source(ispec_selected_source, &
-             xi_source,eta_source,gamma_source,sourcearray, &
-             Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
-             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-             xigll,yigll,zigll,nspec)
+  subroutine compute_arrays_source(sourcearray, &
+                                   xi_source,eta_source,gamma_source, &
+                                   Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
+                                   xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                                   xigll,yigll,zigll)
 
   implicit none
 
   include "constants.h"
 
-  integer :: ispec_selected_source
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
 
   double precision :: xi_source,eta_source,gamma_source
   double precision :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
 
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
-
-  integer :: nspec
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xix,xiy,xiz,etax,etay,etaz, &
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: xix,xiy,xiz,etax,etay,etaz, &
         gammax,gammay,gammaz
 
   ! Gauss-Lobatto-Legendre points of integration and weights
@@ -60,7 +57,7 @@
   double precision, dimension(NGLLY) :: hetas,hpetas
   double precision, dimension(NGLLZ) :: hgammas,hpgammas
 
-  integer k,l,m
+  integer :: k,l,m
 
   ! calculate G_ij for general source location
   ! the source does not necessarily correspond to a Gauss-Lobatto point
@@ -69,25 +66,25 @@
       do k=1,NGLLX
 
         if( CUSTOM_REAL == SIZE_REAL ) then
-          xixd    = dble(xix(k,l,m,ispec_selected_source))
-          xiyd    = dble(xiy(k,l,m,ispec_selected_source))
-          xizd    = dble(xiz(k,l,m,ispec_selected_source))
-          etaxd   = dble(etax(k,l,m,ispec_selected_source))
-          etayd   = dble(etay(k,l,m,ispec_selected_source))
-          etazd   = dble(etaz(k,l,m,ispec_selected_source))
-          gammaxd = dble(gammax(k,l,m,ispec_selected_source))
-          gammayd = dble(gammay(k,l,m,ispec_selected_source))
-          gammazd = dble(gammaz(k,l,m,ispec_selected_source))
+          xixd    = dble(xix(k,l,m))
+          xiyd    = dble(xiy(k,l,m))
+          xizd    = dble(xiz(k,l,m))
+          etaxd   = dble(etax(k,l,m))
+          etayd   = dble(etay(k,l,m))
+          etazd   = dble(etaz(k,l,m))
+          gammaxd = dble(gammax(k,l,m))
+          gammayd = dble(gammay(k,l,m))
+          gammazd = dble(gammaz(k,l,m))
         else
-          xixd    = xix(k,l,m,ispec_selected_source)
-          xiyd    = xiy(k,l,m,ispec_selected_source)
-          xizd    = xiz(k,l,m,ispec_selected_source)
-          etaxd   = etax(k,l,m,ispec_selected_source)
-          etayd   = etay(k,l,m,ispec_selected_source)
-          etazd   = etaz(k,l,m,ispec_selected_source)
-          gammaxd = gammax(k,l,m,ispec_selected_source)
-          gammayd = gammay(k,l,m,ispec_selected_source)
-          gammazd = gammaz(k,l,m,ispec_selected_source)
+          xixd    = xix(k,l,m)
+          xiyd    = xiy(k,l,m)
+          xizd    = xiz(k,l,m)
+          etaxd   = etax(k,l,m)
+          etayd   = etay(k,l,m)
+          etazd   = etaz(k,l,m)
+          gammaxd = gammax(k,l,m)
+          gammayd = gammay(k,l,m)
+          gammazd = gammaz(k,l,m)
         endif
 
         G11(k,l,m) = Mxx*xixd+Mxy*xiyd+Mxz*xizd

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/define_derivation_matrices.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/define_derivation_matrices.f90	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/define_derivation_matrices.f90	2012-08-25 02:15:26 UTC (rev 20628)
@@ -26,9 +26,9 @@
 !=====================================================================
 
   subroutine define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
-         hprime_xx,hprime_yy,hprime_zz, &
-         hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
-         wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube)
+                                        hprime_xx,hprime_yy,hprime_zz, &
+                                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+                                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube)
 
   implicit none
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90	2012-08-25 02:15:26 UTC (rev 20628)
@@ -141,7 +141,6 @@
   ! frees dynamically allocated memory
 
   ! mass matrices
-
   deallocate(rmassx_crust_mantle)
   deallocate(rmassy_crust_mantle)
   deallocate(rmassz_crust_mantle)
@@ -186,12 +185,13 @@
 
   ! sources
   deallocate(islice_selected_source, &
-            ispec_selected_source, &
-            Mxx,Myy,Mzz,Mxy,Mxz,Myz)
+             ispec_selected_source, &
+             Mxx,Myy,Mzz,Mxy,Mxz,Myz)
+  deallocate(xi_source,eta_source,gamma_source)
   deallocate(tshift_cmt,hdur,hdur_gaussian)
   deallocate(nu_source)
 
-  if (SIMULATION_TYPE == 1  .or. SIMULATION_TYPE == 3) deallocate(sourcearrays)
+  if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) deallocate(sourcearrays)
   if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
     deallocate(iadj_vec)
     if(nadj_rec_local > 0) then

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/get_event_info.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/get_event_info.f90	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/get_event_info.f90	2012-08-25 02:15:26 UTC (rev 20628)
@@ -48,29 +48,25 @@
 !--- input or output arguments of the subroutine below
 
   integer, intent(in) :: myrank
+  integer, intent(in) :: NSOURCES ! must be given
 
   integer, intent(out) :: yr,jda,ho,mi
+  double precision, intent(out) :: sec
   real, intent(out) :: mb
-  double precision, intent(out) :: tshift_cmt,elat,elon,depth,cmt_lat,cmt_lon,cmt_depth,cmt_hdur,sec
-
-  !character(len=12), intent(out) :: ename
-
-  integer, intent(in) :: NSOURCES ! must be given
+  double precision, intent(out) :: tshift_cmt,elat,elon,depth,cmt_lat,cmt_lon,cmt_depth,cmt_hdur
   double precision, intent(out) :: t_shift
+
   character(len=20), intent(out) :: event_name
 
+  ! local parameters
+  integer :: ier
 
-
-!--- local variables below
-
-  integer ier
-
-  !integer, parameter :: LENGTH_REGION_NAME = 150
-  !character(len=LENGTH_REGION_NAME) region
-
-! get event information for SAC header on the master
+  ! get event information for SAC header on the master
   if(myrank == 0) then
 
+    ! note: mb as (body wave) moment magnitude is not used any further,
+    !       see comment in write_output_SAC() routine
+
     call get_event_info_serial(yr,jda,ho,mi,sec,event_name,tshift_cmt,t_shift, &
                         elat,elon,depth,mb, &
                         cmt_lat,cmt_lon,cmt_depth,cmt_hdur,NSOURCES)
@@ -85,15 +81,14 @@
 
   endif
 
-! broadcast the information read on the master to the nodes
+  ! broadcast the information read on the master to the nodes
   call MPI_BCAST(yr,1,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
   call MPI_BCAST(jda,1,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
   call MPI_BCAST(ho,1,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
   call MPI_BCAST(mi,1,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+
   call MPI_BCAST(sec,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
 
-  call MPI_BCAST(NSOURCES,1,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
-
   call MPI_BCAST(tshift_cmt,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
   call MPI_BCAST(t_shift,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
 
@@ -108,8 +103,7 @@
   call MPI_BCAST(cmt_depth,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
   call MPI_BCAST(cmt_hdur,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
 
-  !call MPI_BCAST(ename,12,MPI_CHARACTER,0,MPI_COMM_WORLD,ier)
-  call MPI_BCAST(event_name,20,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
+  call MPI_BCAST(event_name,20,MPI_CHARACTER,0,MPI_COMM_WORLD,ier)
 
   end subroutine get_event_info_parallel
 
@@ -133,40 +127,32 @@
 
 !--- arguments of the subroutine below
 
-  integer, intent(out) :: yr,jda,ho,mi
+  integer, intent(in) :: NSOURCES
 
+  integer, intent(out) :: yr,jda,ho,mi
+  double precision, intent(out) :: sec
+  double precision, intent(out) :: tshift_cmt,t_shift
+  double precision, intent(out) :: elat_pde,elon_pde,depth_pde
   real, intent(out) :: mb
+  double precision, intent(out) :: cmt_lat,cmt_lon,cmt_depth,cmt_hdur
 
-  double precision, intent(out) :: sec,tshift_cmt,t_shift
-  double precision, intent(out) :: elat_pde,elon_pde,depth_pde,cmt_lat,cmt_lon,cmt_depth,cmt_hdur
-
-  !integer, intent(in) :: LENGTH_REGION_NAME
-  !character(len=LENGTH_REGION_NAME), intent(out) :: region ! event name for SAC header
-
   character(len=20), intent(out) :: event_name ! event name for SAC header
 
-  integer, intent(in) :: NSOURCES
-
-!--- local variables here
-
-  integer ios,mo,da,julian_day
-  integer isource
-
+  ! local parameters
+  integer :: ios,mo,da,julian_day
+  integer :: isource
   double precision, dimension(NSOURCES) :: t_s,hdur,lat,lon,depth
   character(len=20), dimension(NSOURCES) :: e_n
+  real :: ms
+  character(len=5) :: datasource
+  character(len=150) :: string,CMTSOLUTION
 
-  real ms
-
-  character(len=5) datasource
-  character(len=150) string,CMTSOLUTION
-
-
 !
 !---- read hypocenter info
 !
   call get_value_string(CMTSOLUTION, 'solver.CMTSOLUTION','DATA/CMTSOLUTION')
 
-  open(unit=821,file=CMTSOLUTION,iostat=ios,status='old',action='read')
+  open(unit=IIN,file=trim(CMTSOLUTION),status='old',action='read',iostat=ios)
   if(ios /= 0) stop 'error opening CMTSOLUTION file (in get_event_info_serial)'
 
   ! example header line of CMTSOLUTION file
@@ -177,41 +163,42 @@
   do isource=1,NSOURCES
 
     ! read header with event information
-    read(821,*) datasource,yr,mo,da,ho,mi,sec,elat_pde,elon_pde,depth_pde,mb,ms
+    read(IIN,*) datasource,yr,mo,da,ho,mi,sec,elat_pde,elon_pde,depth_pde,mb,ms
     jda=julian_day(yr,mo,da)
 
     ! ignore line with event name
-    read(821,"(a)") string
+    read(IIN,"(a)") string
     read(string(12:len_trim(string)),*) e_n(isource)
 
     ! read time shift
-    read(821,"(a)") string
+    read(IIN,"(a)") string
     read(string(12:len_trim(string)),*) t_s(isource)
 
     ! read half duration
-    read(821,"(a)") string
+    read(IIN,"(a)") string
     read(string(15:len_trim(string)),*) hdur(isource)
 
     ! read latitude
-    read(821,"(a)") string
+    read(IIN,"(a)") string
     read(string(10:len_trim(string)),*) lat(isource)
 
     ! read longitude
-    read(821,"(a)") string
+    read(IIN,"(a)") string
     read(string(11:len_trim(string)),*) lon(isource)
 
     ! read depth
-    read(821,"(a)") string
+    read(IIN,"(a)") string
     read(string(7:len_trim(string)),*) depth(isource)
 
     ! ignore the last 6 lines with moment tensor info
-    read(821,"(a)") string
-    read(821,"(a)") string
-    read(821,"(a)") string
-    read(821,"(a)") string
-    read(821,"(a)") string
-    read(821,"(a)") string
+    read(IIN,"(a)") string
+    read(IIN,"(a)") string
+    read(IIN,"(a)") string
+    read(IIN,"(a)") string
+    read(IIN,"(a)") string
+    read(IIN,"(a)") string
   enddo
+
   ! sets tshift_cmt to zero
   tshift_cmt = 0.
 
@@ -234,50 +221,7 @@
     t_shift = minval(t_s(1:NSOURCES))
   endif
 
-  close(821)
+  close(IIN)
 
-
-
-!  ! read header with event information
-!  read(821,*) datasource,yr,mo,da,ho,mi,sec,elat,elon,depth,mb,ms,region
-!
-!  jda=julian_day(yr,mo,da)
-!
-!  ! ignore line with event name
-!  read(821,"(a)") string
-!
-!  ! read time shift
-!  read(821,"(a)") string
-!  read(string(12:len_trim(string)),*) tshift_cmt
-!
-!  if (NSOURCES == 1) then
-!
-!  ! read half duration
-!    read(821,"(a)") string
-!    read(string(15:len_trim(string)),*) cmt_hdur
-!
-!  ! read latitude
-!    read(821,"(a)") string
-!    read(string(10:len_trim(string)),*) cmt_lat
-!
-!  ! read longitude
-!    read(821,"(a)") string
-!    read(string(11:len_trim(string)),*) cmt_lon
-!
-!  ! read depth
-!    read(821,"(a)") string
-!    read(string(7:len_trim(string)),*) cmt_depth
-!
-!  else
-!
-!    cmt_hdur=-1e8
-!    cmt_lat=-1e8
-!    cmt_lon=-1e8
-!    cmt_depth=-1e8
-!
-!  endif
-!
-!  close(821)
-
   end subroutine get_event_info_serial
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90	2012-08-25 02:15:26 UTC (rev 20628)
@@ -740,6 +740,7 @@
   ! main process broadcasts the results to all the slices
   call MPI_BCAST(islice_selected_source,NSOURCES,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
   call MPI_BCAST(ispec_selected_source,NSOURCES,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+
   call MPI_BCAST(xi_source,NSOURCES,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
   call MPI_BCAST(eta_source,NSOURCES,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
   call MPI_BCAST(gamma_source,NSOURCES,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
@@ -868,9 +869,8 @@
 
   integer :: NSOURCES,isource
 
-  double precision :: moment_tensor(6,NSOURCES)
-  double precision :: tshift_cmt(NSOURCES)
-  double precision :: hdur(NSOURCES)
+  double precision,dimension(6,NSOURCES) :: moment_tensor
+  double precision,dimension(NSOURCES) :: tshift_cmt,hdur
 
   double precision :: min_tshift_cmt_original
   integer :: NSTEP

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2012-08-25 02:15:26 UTC (rev 20628)
@@ -1150,7 +1150,7 @@
   ! prepares general fields on GPU
   call prepare_constants_device(Mesh_pointer,myrank,NGLLX, &
                                   hprime_xx, &
-                                  hprimewgll_xx, hprimewgll_yy, hprimewgll_zz, &
+                                  hprimewgll_xx, &
                                   wgllwgll_xy, wgllwgll_xz, wgllwgll_yz, &
                                   NSOURCES, nsources_local, &
                                   sourcearrays, &

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90	2012-08-25 02:15:26 UTC (rev 20628)
@@ -54,6 +54,14 @@
   endif
   call sync_all()
 
+  ! frees arrays
+  deallocate(theta_source,phi_source)
+
+  ! topography array no more needed
+  if( TOPOGRAPHY ) then
+    if(allocated(ibathy_topo) ) deallocate(ibathy_topo)
+  endif
+
   end subroutine setup_sources_receivers
 
 !
@@ -78,27 +86,27 @@
 
   ! allocate arrays for source
   allocate(islice_selected_source(NSOURCES), &
-          ispec_selected_source(NSOURCES), &
-          Mxx(NSOURCES), &
-          Myy(NSOURCES), &
-          Mzz(NSOURCES), &
-          Mxy(NSOURCES), &
-          Mxz(NSOURCES), &
-          Myz(NSOURCES),stat=ier)
+           ispec_selected_source(NSOURCES), &
+           Mxx(NSOURCES), &
+           Myy(NSOURCES), &
+           Mzz(NSOURCES), &
+           Mxy(NSOURCES), &
+           Mxz(NSOURCES), &
+           Myz(NSOURCES),stat=ier)
   if( ier /= 0 ) call exit_MPI(myrank,'error allocating source arrays')
 
   allocate(xi_source(NSOURCES), &
-          eta_source(NSOURCES), &
-          gamma_source(NSOURCES),stat=ier)
+           eta_source(NSOURCES), &
+           gamma_source(NSOURCES),stat=ier)
   if( ier /= 0 ) call exit_MPI(myrank,'error allocating source arrays')
 
   allocate(tshift_cmt(NSOURCES), &
-          hdur(NSOURCES), &
-          hdur_gaussian(NSOURCES),stat=ier)
+           hdur(NSOURCES), &
+           hdur_gaussian(NSOURCES),stat=ier)
   if( ier /= 0 ) call exit_MPI(myrank,'error allocating source arrays')
 
   allocate(theta_source(NSOURCES), &
-          phi_source(NSOURCES),stat=ier)
+           phi_source(NSOURCES),stat=ier)
   if( ier /= 0 ) call exit_MPI(myrank,'error allocating source arrays')
 
   allocate(nu_source(NDIM,NDIM,NSOURCES),stat=ier)
@@ -389,14 +397,6 @@
       call exit_MPI(myrank,'total number of receivers is incorrect')
   endif
 
-  ! frees arrays
-  deallocate(theta_source,phi_source)
-
-  ! topography array no more needed
-  if( TOPOGRAPHY ) then
-    if(allocated(ibathy_topo) ) deallocate(ibathy_topo)
-  endif
-
   end subroutine setup_receivers
 
 
@@ -512,8 +512,9 @@
   implicit none
 
   ! local parameters
-  integer :: isource,iglob,ispec,i,j,k
+  integer :: isource,iglob,i,j,k,ispec
   real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
+  double precision :: xi,eta,gamma
 
   do isource = 1,NSOURCES
 
@@ -522,27 +523,37 @@
 
     !   check that the source slice number is okay
     if(islice_selected_source(isource) < 0 .or. islice_selected_source(isource) > NPROCTOT_VAL-1) &
-      call exit_MPI(myrank,'something is wrong with the source slice number')
+      call exit_MPI(myrank,'error: source slice number invalid')
 
     !   compute source arrays in source slice
     if(myrank == islice_selected_source(isource)) then
-      call compute_arrays_source(ispec_selected_source(isource), &
-             xi_source(isource),eta_source(isource),gamma_source(isource),sourcearray, &
-             Mxx(isource),Myy(isource),Mzz(isource),Mxy(isource),Mxz(isource),Myz(isource), &
-             xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
-             etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
-             gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
-             xigll,yigll,zigll,NSPEC_CRUST_MANTLE)
 
+      ! element id which holds source
+      ispec = ispec_selected_source(isource)
+
+      ! checks bounds
+      if( ispec < 1 .or. ispec > NSPEC_CRUST_MANTLE ) &
+        call exit_MPI(myrank,'error: source ispec number invalid')
+
+      ! gets source location
+      xi = xi_source(isource)
+      eta = eta_source(isource)
+      gamma = gamma_source(isource)
+
+      ! pre-computes source contribution on GLL points
+      call compute_arrays_source(sourcearray,xi,eta,gamma, &
+                          Mxx(isource),Myy(isource),Mzz(isource),Mxy(isource),Mxz(isource),Myz(isource), &
+                          xix_crust_mantle(:,:,:,ispec),xiy_crust_mantle(:,:,:,ispec),xiz_crust_mantle(:,:,:,ispec), &
+                          etax_crust_mantle(:,:,:,ispec),etay_crust_mantle(:,:,:,ispec),etaz_crust_mantle(:,:,:,ispec), &
+                          gammax_crust_mantle(:,:,:,ispec),gammay_crust_mantle(:,:,:,ispec),gammaz_crust_mantle(:,:,:,ispec), &
+                          xigll,yigll,zigll)
+
       ! point forces, initializes sourcearray, used for simplified CUDA routines
       if(USE_FORCE_POINT_SOURCE) then
         ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
-        iglob = ibool_crust_mantle(nint(xi_source(isource)), &
-                                   nint(eta_source(isource)), &
-                                   nint(gamma_source(isource)), &
-                                   ispec_selected_source(isource))
+        iglob = ibool_crust_mantle(nint(xi),nint(eta),nint(gamma),ispec)
+
         ! sets sourcearrays
-        ispec = ispec_selected_source(isource)
         do k=1,NGLLZ
           do j=1,NGLLY
             do i=1,NGLLX

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90	2012-08-24 23:05:58 UTC (rev 20627)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90	2012-08-25 02:15:26 UTC (rev 20628)
@@ -187,9 +187,11 @@
 
   ! for SAC headers for seismograms
   integer :: yr_SAC,jda_SAC,ho_SAC,mi_SAC
+  double precision :: sec_SAC
   real :: mb_SAC
-  double precision :: t_cmt_SAC,t_shift_SAC,elat_SAC,elon_SAC,depth_SAC, &
-    cmt_lat_SAC,cmt_lon_SAC,cmt_depth_SAC,cmt_hdur_SAC,sec_SAC
+  double precision :: t_cmt_SAC,t_shift_SAC
+  double precision :: elat_SAC,elon_SAC,depth_SAC, &
+    cmt_lat_SAC,cmt_lon_SAC,cmt_depth_SAC,cmt_hdur_SAC
   character(len=20) :: event_name_SAC
 
   !-----------------------------------------------------------------



More information about the CIG-COMMITS mailing list