[cig-commits] [commit] devel,master: fix bugs (d321da5)

Thu Nov 6 08:14:06 PST 2014

Repository : https://github.com/geodynamics/specfem3d_globe

On branches: devel,master
Link       : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f

>---------------------------------------------------------------

commit d321da52d6b6cdd0d6a25535ab7e39588ee58479
Author: Kevin Pouget <kevin.pouget at imag.fr>
Date:   Tue May 6 16:13:53 2014 +0200

    fix bugs


>---------------------------------------------------------------

d321da52d6b6cdd0d6a25535ab7e39588ee58479
 src/gpu/compute_add_sources_elastic_gpu.c | 16 +++-------------
 src/gpu/initialize_gpu.c                  |  2 ++
 src/gpu/prepare_mesh_constants_gpu.c      | 14 ++++++++------
 src/gpu/write_seismograms_gpu.c           | 16 ++++++++++------
 4 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/src/gpu/compute_add_sources_elastic_gpu.c b/src/gpu/compute_add_sources_elastic_gpu.c
index 86b8ff7..1b1c7e9 100644
--- a/src/gpu/compute_add_sources_elastic_gpu.c
+++ b/src/gpu/compute_add_sources_elastic_gpu.c
@@ -109,16 +109,6 @@ void FC_FUNC_ (compute_add_sources_gpu,
 /*----------------------------------------------------------------------------------------------- */
 // backward sources
 /*----------------------------------------------------------------------------------------------- */
-#ifdef USE_CUDA
-__global__ void compute_add_sources_adjoint_cuda_kernel(realw* accel,
-                                                        int nrec,
-                                                        realw* adj_sourcearrays,
-                                                        int* ibool,
-                                                        int* ispec_selected_rec,
-                                                        int* pre_computed_irec,
-                                                        int nadj_rec_local) {}
-
-#endif
 
 extern EXTERN_LANG
 void FC_FUNC_ (compute_add_sources_backward_gpu,
@@ -207,7 +197,7 @@ void FC_FUNC_ (compute_add_sources_adjoint_gpu,
   // adds adjoint sources
   // note: call this routine after transfer_adj_to_device**() to have correct adjoint sourcearrays in array d_adj_sourcearrays
 
-  TRACE("compute_add_sources_adjoint_cuda");
+  TRACE("compute_add_sources_adjoint_gpu");
 
   Mesh *mp = (Mesh *)(*Mesh_pointer); //get mesh pointer out of Fortran integer container
 
@@ -226,7 +216,7 @@ void FC_FUNC_ (compute_add_sources_adjoint_gpu,
 #ifdef USE_CUDA
   if (run_cuda) {
     // waits for previous transfer_** calls to be finished
-    if( GPU_ASYNC_COPY ){
+    if (GPU_ASYNC_COPY ){
       // waits for asynchronous copy to finish
       cudaStreamSynchronize(mp->copy_stream);
     }
@@ -234,7 +224,7 @@ void FC_FUNC_ (compute_add_sources_adjoint_gpu,
     dim3 grid(num_blocks_x,num_blocks_y,1);
     dim3 threads(NGLLX,NGLLX,NGLLX);
 
-    compute_add_sources_adjoint_cuda_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_accel_crust_mantle.cuda,
+    compute_add_sources_adjoint_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_accel_crust_mantle.cuda,
                                                                                    nrec,
                                                                                    mp->d_adj_sourcearrays.cuda,
                                                                                    mp->d_ibool_crust_mantle.cuda,
diff --git a/src/gpu/initialize_gpu.c b/src/gpu/initialize_gpu.c
index fd3fb39..3cdc74a 100644
--- a/src/gpu/initialize_gpu.c
+++ b/src/gpu/initialize_gpu.c
@@ -588,6 +588,8 @@ void FC_FUNC_ (initialize_gpu_device,
   if (runtime_type != COMPILE && runtime_type != CUDA) {
     printf("WARNING: GPU_RUNTIME parameter (=%d) incompatible with Cuda-only compilation (CUDA=%d, COMPILE=%d). Defaulting to Cuda.\n", runtime_type, CUDA, COMPILE);
   }
+#else
+  #error "GPU code compiled but neither Cuda nor OpenCL are enabled"
 #endif
 
 #ifdef USE_OPENCL
diff --git a/src/gpu/prepare_mesh_constants_gpu.c b/src/gpu/prepare_mesh_constants_gpu.c
index 73cecc4..1460ede 100644
--- a/src/gpu/prepare_mesh_constants_gpu.c
+++ b/src/gpu/prepare_mesh_constants_gpu.c
@@ -615,12 +615,14 @@ void FC_FUNC_ (prepare_constants_device,
   mp->two_omega_earth = 0.f;
   mp->b_two_omega_earth = 0.f;
 #ifdef USE_CUDA
-// setup two streams, one for compute and one for host<->device memory copies
-  // uses pinned memory for asynchronous data transfers
-  // compute stream
-  cudaStreamCreate(&mp->compute_stream);
-  // copy stream (needed to transfer MPI buffers)
-  cudaStreamCreate(&mp->copy_stream);
+  if (run_cuda) {
+    // setup two streams, one for compute and one for host<->device memory copies
+    // uses pinned memory for asynchronous data transfers
+    // compute stream
+    cudaStreamCreate(&mp->compute_stream);
+    // copy stream (needed to transfer MPI buffers)
+    cudaStreamCreate(&mp->copy_stream);
+  }
 #endif
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_gpu_error ("prepare_constants_device");
diff --git a/src/gpu/write_seismograms_gpu.c b/src/gpu/write_seismograms_gpu.c
index 26d465f..240d8b1 100644
--- a/src/gpu/write_seismograms_gpu.c
+++ b/src/gpu/write_seismograms_gpu.c
@@ -358,21 +358,25 @@ void FC_FUNC_(transfer_seismo_from_device_async,
   int* h_ispec_selected;
 
   // checks if anything to do
-  if( mp->nrec_local == 0 ) return;
+  if (mp->nrec_local == 0) {
+    return;
+  }
 
   // checks async-memcpy
-  if( GPU_ASYNC_COPY == 0 ){
+  if (GPU_ASYNC_COPY ==  0){
     exit_on_error("transfer_seismo_from_device_async must be called with GPU_ASYNC_COPY == 1, please check mesh_constants_cuda.h");
   }
 
   // waits for previous copy call to be finished
 #ifdef USE_CUDA
-  cudaStreamSynchronize(mp->copy_stream);
+  if (run_cuda) {
+    cudaStreamSynchronize(mp->copy_stream);
+  }
 #endif
 
   // transfers displacements
   // select target array on host
-  switch( mp->simulation_type ){
+  switch (mp->simulation_type) {
     case 1:
       // forward simulation
       h_field = displ;
@@ -394,11 +398,11 @@ void FC_FUNC_(transfer_seismo_from_device_async,
 
   // updates corresponding array on CPU
   int irec_local;
-  for(irec_local = 0 ; irec_local < mp->nrec_local; irec_local++) {
+  for (irec_local = 0 ; irec_local < mp->nrec_local; irec_local++) {
     irec = number_receiver_global[irec_local] - 1;
     ispec = h_ispec_selected[irec] - 1;
 
-    for(i = 0; i < NGLL3; i++) {
+    for (i = 0; i < NGLL3; i++) {
       iglob = ibool[i+NGLL3*ispec] - 1;
       h_field[0+3*iglob] = mp->h_station_seismo_field[0+3*i+irec_local*NGLL3*3];
       h_field[1+3*iglob] = mp->h_station_seismo_field[1+3*i+irec_local*NGLL3*3];