[cig-commits] [commit] devel, master: bug fix for gpu output infos for displacement and strain when checking norm of arrays (9cb66c4)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Thu Nov 6 08:28:36 PST 2014
Repository : https://github.com/geodynamics/specfem3d_globe
On branches: devel,master
Link : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f
>---------------------------------------------------------------
commit 9cb66c48b146c822835de6c546833b825ffccd81
Author: daniel peter <peterda at ethz.ch>
Date: Tue Aug 12 13:39:29 2014 +0200
bug fix for gpu output infos for displacement and strain when checking norm of arrays
>---------------------------------------------------------------
9cb66c48b146c822835de6c546833b825ffccd81
src/gpu/assemble_MPI_scalar_gpu.c | 16 +++++---
src/gpu/assemble_MPI_vector_gpu.c | 28 ++++++++------
src/gpu/check_fields_gpu.c | 63 ++++++++++++++++++++-----------
src/gpu/compute_add_sources_elastic_gpu.c | 18 +++++----
src/gpu/mesh_constants_gpu.h | 5 +--
src/gpu/prepare_mesh_constants_gpu.c | 44 ++++++++++++---------
src/gpu/update_displacement_gpu.c | 21 +++++------
src/gpu/write_seismograms_gpu.c | 10 +++--
src/specfem3D/check_stability.f90 | 1 -
9 files changed, 123 insertions(+), 83 deletions(-)
diff --git a/src/gpu/assemble_MPI_scalar_gpu.c b/src/gpu/assemble_MPI_scalar_gpu.c
index 5a89d0b..1d9d8e7 100644
--- a/src/gpu/assemble_MPI_scalar_gpu.c
+++ b/src/gpu/assemble_MPI_scalar_gpu.c
@@ -233,9 +233,11 @@ void FC_FUNC_ (transfer_asmbl_pot_to_device,
cl_event *copy_evt = NULL;
cl_uint num_evt = 0;
- if (GPU_ASYNC_COPY && mp->has_last_copy_evt) {
- copy_evt = &mp->last_copy_evt;
- num_evt = 1;
+ if (GPU_ASYNC_COPY ){
+ if ( mp->has_last_copy_evt) {
+ copy_evt = &mp->last_copy_evt;
+ num_evt = 1;
+ }
}
if (*FORWARD_OR_ADJOINT == 1) {
@@ -288,9 +290,11 @@ void FC_FUNC_ (transfer_asmbl_pot_to_device,
clCheck (clEnqueueNDRangeKernel (mocl.command_queue, mocl.kernels.assemble_boundary_potential_on_device, 2, NULL,
global_work_size, local_work_size, num_evt, copy_evt, NULL));
}
- if (GPU_ASYNC_COPY && mp->has_last_copy_evt) {
- clCheck (clReleaseEvent (mp->last_copy_evt));
- mp->has_last_copy_evt = 0;
+ if (GPU_ASYNC_COPY ){
+ if ( mp->has_last_copy_evt) {
+ clCheck (clReleaseEvent (mp->last_copy_evt));
+ mp->has_last_copy_evt = 0;
+ }
}
}
#endif
diff --git a/src/gpu/assemble_MPI_vector_gpu.c b/src/gpu/assemble_MPI_vector_gpu.c
index d28af3e..3ace0b7 100644
--- a/src/gpu/assemble_MPI_vector_gpu.c
+++ b/src/gpu/assemble_MPI_vector_gpu.c
@@ -379,9 +379,11 @@ void FC_FUNC_ (transfer_asmbl_accel_to_device,
#ifdef USE_OPENCL
if (run_opencl) {
- if (GPU_ASYNC_COPY && mp->has_last_copy_evt) {
- copy_evt = &mp->last_copy_evt;
- num_evt = 1;
+ if (GPU_ASYNC_COPY){
+ if ( mp->has_last_copy_evt) {
+ copy_evt = &mp->last_copy_evt;
+ num_evt = 1;
+ }
}
if (*FORWARD_OR_ADJOINT == 1) {
@@ -506,9 +508,11 @@ void FC_FUNC_ (transfer_asmbl_accel_to_device,
if (run_opencl) {
idx = 0;
- if (GPU_ASYNC_COPY && mp->has_last_copy_evt) {
- copy_evt = &mp->last_copy_evt;
- num_evt = 1;
+ if (GPU_ASYNC_COPY){
+ if ( mp->has_last_copy_evt) {
+ copy_evt = &mp->last_copy_evt;
+ num_evt = 1;
+ }
}
if (*FORWARD_OR_ADJOINT == 1) {
@@ -558,9 +562,11 @@ void FC_FUNC_ (transfer_asmbl_accel_to_device,
clCheck (clEnqueueNDRangeKernel (mocl.command_queue, mocl.kernels.assemble_boundary_accel_on_device, 2, NULL, global_work_size, local_work_size, num_evt, copy_evt, NULL));
}
- if (GPU_ASYNC_COPY && mp->has_last_copy_evt) {
- clCheck (clReleaseEvent (mp->last_copy_evt));
- mp->has_last_copy_evt = 0;
+ if (GPU_ASYNC_COPY){
+ if( mp->has_last_copy_evt) {
+ clCheck (clReleaseEvent (mp->last_copy_evt));
+ mp->has_last_copy_evt = 0;
+ }
}
}
#endif
@@ -639,7 +645,7 @@ void FC_FUNC_(transfer_buffer_to_device_async,
Mesh *mp = (Mesh *)(*Mesh_pointer); // get Mesh from Fortran integer wrapper
// checks async-memcpy
- if (GPU_ASYNC_COPY == 0) {
+ if ( ! GPU_ASYNC_COPY ) {
exit_on_error("transfer_buffer_to_device_async must be called with GPU_ASYNC_COPY == 1, please check mesh_constants_cuda.h");
}
@@ -809,7 +815,7 @@ void FC_FUNC_(sync_copy_from_device,
Mesh *mp = (Mesh *)(*Mesh_pointer); // get Mesh from Fortran integer wrapper
// checks async-memcpy
- if (GPU_ASYNC_COPY == 0) {
+ if ( ! GPU_ASYNC_COPY ) {
exit_on_error("sync_copy_from_device must be called with GPU_ASYNC_COPY == 1, please check mesh_constants_gpu.h");
}
diff --git a/src/gpu/check_fields_gpu.c b/src/gpu/check_fields_gpu.c
index 28c00fe..4b050d7 100644
--- a/src/gpu/check_fields_gpu.c
+++ b/src/gpu/check_fields_gpu.c
@@ -221,8 +221,6 @@ void exit_on_gpu_error (char *kernel_name) {
fclose (fp);
}
- // releases previous contexts
-
// stops program
#ifdef WITH_MPI
MPI_Abort (MPI_COMM_WORLD, 1);
@@ -278,7 +276,7 @@ void print_CUDA_error_if_any(cudaError_t err, int num) {
#else
myrank = 0;
#endif
- sprintf(filename,"../in_out_files/OUTPUT_FILES/error_message_%06d.txt",myrank);
+ sprintf(filename,"OUTPUT_FILES/error_message_%06d.txt",myrank);
fp = fopen(filename,"a+");
if (fp != NULL){
fprintf(fp,"\nCUDA error !!!!! <%s> !!!!! \nat CUDA call error code: # %d\n",cudaGetErrorString(err),num);
@@ -467,14 +465,17 @@ void FC_FUNC_ (get_free_device_memory,
// Auxiliary functions
/*----------------------------------------------------------------------------------------------- */
-realw get_device_array_maximum_value (Mesh *mp, gpu_realw_mem *d_array, int size) {
+realw get_device_array_maximum_value (gpu_realw_mem *d_array, int size) {
+
+// gets maximum of array on GPU by copying over to CPU and handle it there
+
+ realw *h_array;
realw max = 0.0f;
// checks if anything to do
if (size > 0) {
- realw *h_array = (realw *) calloc(size ,sizeof (realw));
-
h_array = (realw *) calloc (size, sizeof (realw));
+
#ifdef USE_OPENCL
if (run_opencl) {
clCheck (clEnqueueReadBuffer (mocl.command_queue, d_array->ocl, CL_TRUE, 0,
@@ -487,11 +488,11 @@ realw get_device_array_maximum_value (Mesh *mp, gpu_realw_mem *d_array, int size
// explicitly wait for cuda kernels to finish
// (cudaMemcpy implicitly synchronizes all other cuda operations)
synchronize_cuda();
- print_CUDA_error_if_any(cudaMemcpy(h_array,d_array,sizeof(realw)*size,cudaMemcpyDeviceToHost),33001);
+ print_CUDA_error_if_any(cudaMemcpy(h_array,d_array->cuda,sizeof(realw)*size,cudaMemcpyDeviceToHost),33001);
}
#endif
// finds maximum value in array
- max = h_array[0];
+ max = abs(h_array[0]);
int i;
for (i = 1; i < size; i++) {
if (abs (h_array[i]) > max)
@@ -516,12 +517,11 @@ void FC_FUNC_ (check_norm_acoustic_from_device,
Mesh *mp = (Mesh *) *Mesh_pointer_f; //get mesh pointer out of Fortran integer container
realw max;
- gpu_realw_mem d_max;
max = 0.0f;
- // way 2 b: timing Elapsed time: 1.236916e-03
// launch simple reduction kernel
+ gpu_realw_mem d_max;
realw *h_max;
int blocksize = BLOCKSIZE_TRANSFER;
@@ -560,6 +560,7 @@ void FC_FUNC_ (check_norm_acoustic_from_device,
clCheck (clEnqueueNDRangeKernel (mocl.command_queue, mocl.kernels.get_maximum_scalar_kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL));
+ // copies to CPU
clCheck (clEnqueueReadBuffer (mocl.command_queue, d_max.ocl, CL_TRUE, 0,
num_blocks_x * num_blocks_y * sizeof (realw),
h_max, 0, NULL, NULL));
@@ -577,7 +578,7 @@ void FC_FUNC_ (check_norm_acoustic_from_device,
}else if(*FORWARD_OR_ADJOINT == 3 ){
get_maximum_scalar_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_b_displ_outer_core.cuda,size,d_max.cuda);
}
-
+ // copies to CPU
print_CUDA_error_if_any(cudaMemcpy(h_max,d_max.cuda,num_blocks_x*num_blocks_y*sizeof(realw),
cudaMemcpyDeviceToHost),222);
}
@@ -626,12 +627,14 @@ void FC_FUNC_ (check_norm_elastic_from_device,
int size, size_padded;
+ max = 0.0f;
+
// launch simple reduction kernel
+ gpu_realw_mem d_max;
realw *h_max;
int blocksize = BLOCKSIZE_TRANSFER;
// crust_mantle
- max = 0.0f;
size = mp->NGLOB_CRUST_MANTLE;
size_padded = ((int) ceil (((double) size) / ((double) blocksize))) * blocksize;
@@ -641,7 +644,6 @@ void FC_FUNC_ (check_norm_elastic_from_device,
h_max = (realw *) calloc (num_blocks_x * num_blocks_y, sizeof (realw));
- gpu_realw_mem d_max;
#ifdef USE_OPENCL
cl_int errcode;
@@ -675,7 +677,6 @@ void FC_FUNC_ (check_norm_elastic_from_device,
#endif
#ifdef USE_CUDA
dim3 grid,threads;
-
if (run_cuda) {
grid = dim3(num_blocks_x,num_blocks_y);
threads = dim3(blocksize,1,1);
@@ -687,6 +688,18 @@ void FC_FUNC_ (check_norm_elastic_from_device,
}else if(*FORWARD_OR_ADJOINT == 3 ){
get_maximum_vector_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_b_displ_crust_mantle.cuda,size,d_max.cuda);
}
+ // copies to CPU
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max.cuda,num_blocks_x*num_blocks_y*sizeof(realw),
+ cudaMemcpyDeviceToHost),222);
+
+ //debug
+ //realw max_d, max_v, max_a;
+ //max_d = get_device_array_maximum_value(&mp->d_displ_crust_mantle, NDIM * mp->NGLOB_CRUST_MANTLE);
+ //max_v = get_device_array_maximum_value(&mp->d_veloc_crust_mantle, NDIM * mp->NGLOB_CRUST_MANTLE);
+ //max_a = get_device_array_maximum_value(&mp->d_accel_crust_mantle, NDIM * mp->NGLOB_CRUST_MANTLE);
+ //printf ("rank %d - max crust_mantle displ: %e veloc: %e accel: %e\n", mp->myrank, max_d, max_v, max_a);
+ //fflush (stdout);
+ //synchronize_mpi ();
}
#endif
@@ -694,14 +707,13 @@ void FC_FUNC_ (check_norm_elastic_from_device,
max = h_max[0];
int i;
for (i = 1; i < num_blocks_x * num_blocks_y; i++) {
- // debug
- printf("rank %i: maximum cm = %i %f\n",mp->myrank,i,h_max[i]);
// sets maximum
if (max < h_max[i])
max = h_max[i];
}
max_crust_mantle = max;
+ // frees arrays
#ifdef USE_OPENCL
if (run_opencl) {
clReleaseMemObject (d_max.ocl);
@@ -767,7 +779,6 @@ void FC_FUNC_ (check_norm_elastic_from_device,
}else if(*FORWARD_OR_ADJOINT == 3 ){
get_maximum_vector_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_b_displ_inner_core.cuda,size,d_max.cuda);
}
-
// copies to CPU
print_CUDA_error_if_any(cudaMemcpy(h_max,d_max.cuda,num_blocks_x*num_blocks_y*sizeof(realw),
cudaMemcpyDeviceToHost),222);
@@ -781,6 +792,7 @@ void FC_FUNC_ (check_norm_elastic_from_device,
}
max_inner_core = max;
+ // frees arrays
#ifdef USE_OPENCL
if (run_opencl) {
clReleaseMemObject (d_max.ocl);
@@ -791,9 +803,11 @@ void FC_FUNC_ (check_norm_elastic_from_device,
cudaFree(d_max.cuda);
}
#endif
-
free (h_max);
+ //debug
+ //printf ("rank %d - max norm elastic: crust_mantle = %e inner_core = %e\n",mp->myrank,max_crust_mantle,max_inner_core);
+
// return result
max = MAX (max_inner_core, max_crust_mantle);
*norm = max;
@@ -866,7 +880,7 @@ void FC_FUNC_ (check_norm_strain_from_device,
global_work_size[0] = num_blocks_x * blocksize;
global_work_size[1] = num_blocks_y;
clCheck (clEnqueueNDRangeKernel (mocl.command_queue, mocl.kernels.get_maximum_scalar_kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL));
-
+ // copies to CPU
clCheck (clEnqueueReadBuffer (mocl.command_queue, d_max.ocl, CL_TRUE, 0,
num_blocks_x * num_blocks_y * sizeof (realw),
h_max, 0, NULL, NULL));
@@ -881,7 +895,7 @@ void FC_FUNC_ (check_norm_strain_from_device,
// determines max for: eps_trace_over_3_crust_mantle
get_maximum_scalar_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_eps_trace_over_3_crust_mantle.cuda,size,d_max.cuda);
-
+ // copies to CPU
print_CUDA_error_if_any(cudaMemcpy(h_max,d_max.cuda,num_blocks_x*num_blocks_y*sizeof(realw),
cudaMemcpyDeviceToHost),221);
}
@@ -906,7 +920,6 @@ void FC_FUNC_ (check_norm_strain_from_device,
cudaFree(d_max.cuda);
}
#endif
-
free (h_max);
// initializes
@@ -918,7 +931,7 @@ void FC_FUNC_ (check_norm_strain_from_device,
get_blocks_xy (size_padded / blocksize, &num_blocks_x, &num_blocks_y);
- h_max = (realw *) calloc (num_blocks_x*num_blocks_y, sizeof (realw));
+ h_max = (realw *) calloc (num_blocks_x * num_blocks_y, sizeof (realw));
max_eps = 0.0f;
#ifdef USE_OPENCL
@@ -958,6 +971,9 @@ void FC_FUNC_ (check_norm_strain_from_device,
#endif
#ifdef USE_CUDA
if (run_cuda) {
+ grid = dim3(num_blocks_x,num_blocks_y);
+ threads = dim3(blocksize,1,1);
+
cudaMalloc((void**)&d_max.cuda,num_blocks_x*num_blocks_y*sizeof(realw));
// determines max for: epsilondev_xx_crust_mantle
@@ -967,6 +983,8 @@ void FC_FUNC_ (check_norm_strain_from_device,
cudaMemcpyDeviceToHost),222);
max = h_max[0];
for(int i=1;i<num_blocks_x*num_blocks_y;i++) {
+ //debug
+ //if(mp->myrank == 0 ){printf ("rank %d - max %i %e %i %i\n",mp->myrank,i,h_max[i],num_blocks_x,num_blocks_y);}
if( max < h_max[i]) max = h_max[i];
}
max_eps = MAX(max_eps,max);
@@ -1030,7 +1048,6 @@ void FC_FUNC_ (check_norm_strain_from_device,
cudaFree(d_max.cuda);
}
#endif
-
free (h_max);
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
diff --git a/src/gpu/compute_add_sources_elastic_gpu.c b/src/gpu/compute_add_sources_elastic_gpu.c
index 81403ec..7255ce1 100644
--- a/src/gpu/compute_add_sources_elastic_gpu.c
+++ b/src/gpu/compute_add_sources_elastic_gpu.c
@@ -221,9 +221,11 @@ void FC_FUNC_ (compute_add_sources_adjoint_gpu,
cl_event *copy_evt = NULL;
cl_uint num_evt = 0;
- if (GPU_ASYNC_COPY && mp->has_last_copy_evt) {
- copy_evt = &mp->last_copy_evt;
- num_evt = 1;
+ if (GPU_ASYNC_COPY){
+ if (mp->has_last_copy_evt) {
+ copy_evt = &mp->last_copy_evt;
+ num_evt = 1;
+ }
}
clCheck (clSetKernelArg (mocl.kernels.compute_add_sources_adjoint_kernel, idx++, sizeof (cl_mem), (void *) &mp->d_accel_crust_mantle.ocl));
@@ -245,9 +247,11 @@ void FC_FUNC_ (compute_add_sources_adjoint_gpu,
clCheck (clEnqueueNDRangeKernel (mocl.command_queue, mocl.kernels.compute_add_sources_adjoint_kernel, 3, NULL,
global_work_size, local_work_size, num_evt, copy_evt, NULL));
- if (GPU_ASYNC_COPY && mp->has_last_copy_evt) {
- clCheck (clReleaseEvent (mp->last_copy_evt));
- mp->has_last_copy_evt = 0;
+ if (GPU_ASYNC_COPY){
+ if (mp->has_last_copy_evt) {
+ clCheck (clReleaseEvent (mp->last_copy_evt));
+ mp->has_last_copy_evt = 0;
+ }
}
}
#endif
@@ -380,7 +384,7 @@ void FC_FUNC_(transfer_adj_to_device_async,
}
// checks async-memcpy
- if (GPU_ASYNC_COPY == 0) {
+ if (! GPU_ASYNC_COPY ) {
exit_on_error("transfer_adj_to_device_async must be called with GPU_ASYNC_COPY == 1, \
please check mesh_constants_cuda.h");
}
diff --git a/src/gpu/mesh_constants_gpu.h b/src/gpu/mesh_constants_gpu.h
index c4d4c42..38bbb40 100644
--- a/src/gpu/mesh_constants_gpu.h
+++ b/src/gpu/mesh_constants_gpu.h
@@ -150,9 +150,8 @@ typedef float realw;
//#define R_EARTH_KM 6368.0f
// Asynchronous memory copies between GPU and CPU
-#ifndef GPU_ASYNC_COPY
+// (set to 0 for synchronuous/blocking copies, set to 1 for asynchronuous copies)
#define GPU_ASYNC_COPY 1
-#endif
/*----------------------------------------------------------------------------------------------- */
@@ -942,7 +941,7 @@ void exit_on_gpu_error (char *kernel_name);
void exit_on_error (char *info);
void synchronize_mpi ();
void get_blocks_xy (int num_blocks, int *num_blocks_x, int *num_blocks_y);
-realw get_device_array_maximum_value (Mesh *mp, gpu_realw_mem *d_array, int size);
+realw get_device_array_maximum_value (gpu_realw_mem *d_array, int size);
/* ----------------------------------------------------------------------------------------------- */
diff --git a/src/gpu/prepare_mesh_constants_gpu.c b/src/gpu/prepare_mesh_constants_gpu.c
index fa13260..0b7a213 100644
--- a/src/gpu/prepare_mesh_constants_gpu.c
+++ b/src/gpu/prepare_mesh_constants_gpu.c
@@ -3375,34 +3375,44 @@ void FC_FUNC_ (prepare_cleanup_device,
if (mp->nrec_local > 0) {
#ifdef USE_OPENCL
- if (run_opencl && GPU_ASYNC_COPY) {
- RELEASE_PINNED_BUFFER_OCL (station_seismo_field);
- } else
+ if (run_opencl ){
+ if (GPU_ASYNC_COPY) {
+ RELEASE_PINNED_BUFFER_OCL (station_seismo_field);
+ } else {
+ free (mp->h_station_seismo_field);
+ }
+ }
#endif
#ifdef USE_CUDA
- if (run_cuda && GPU_ASYNC_COPY) {
- cudaFreeHost(mp->h_station_seismo_field);
- } else
-#endif
- {
- free (mp->h_station_seismo_field);
+ if (run_cuda ){
+ if (GPU_ASYNC_COPY) {
+ cudaFreeHost(mp->h_station_seismo_field);
+ } else {
+ free (mp->h_station_seismo_field);
+ }
}
+#endif
}
if (mp->nadj_rec_local > 0) {
#ifdef USE_OPENCL
- if (run_opencl && GPU_ASYNC_COPY) {
- RELEASE_PINNED_BUFFER_OCL (adj_sourcearrays_slice);
+ if (run_opencl){
+ if (GPU_ASYNC_COPY) {
+ RELEASE_PINNED_BUFFER_OCL (adj_sourcearrays_slice);
+ } else {
+ free (mp->h_adj_sourcearrays_slice);
+ }
}
#endif
#ifdef USE_CUDA
- if (run_cuda && GPU_ASYNC_COPY) {
- cudaFreeHost(mp->h_adj_sourcearrays_slice);
- } else
-#endif
- {
- free (mp->h_adj_sourcearrays_slice);
+ if (run_cuda){
+ if (GPU_ASYNC_COPY) {
+ cudaFreeHost(mp->h_adj_sourcearrays_slice);
+ } else {
+ free (mp->h_adj_sourcearrays_slice);
+ }
}
+#endif
}
#ifdef USE_OPENCL
diff --git a/src/gpu/update_displacement_gpu.c b/src/gpu/update_displacement_gpu.c
index edd929d..09e38cb 100644
--- a/src/gpu/update_displacement_gpu.c
+++ b/src/gpu/update_displacement_gpu.c
@@ -44,13 +44,12 @@ void FC_FUNC_ (update_displacement_ic_gpu,
int size = NDIM * mp->NGLOB_INNER_CORE;
- //debug
-
#if DEBUG_BACKWARD_SIMULATIONS == 1 && DEBUG == 1
+ //debug
realw max_d, max_v, max_a;
- max_d = get_device_array_maximum_value (mp, mp->d_b_displ_inner_core, size);
- max_v = get_device_array_maximum_value (mp, mp->d_b_veloc_inner_core, size);
- max_a = get_device_array_maximum_value (mp, mp->d_b_accel_inner_core, size);
+ max_d = get_device_array_maximum_value(&mp->d_b_displ_inner_core, size);
+ max_v = get_device_array_maximum_value(&mp->d_b_veloc_inner_core, size);
+ max_a = get_device_array_maximum_value(&mp->d_b_accel_inner_core, size);
printf ("rank %d - max inner_core displ: %f veloc: %f accel: %f\n", mp->myrank, max_d, max_v, max_a);
fflush (stdout);
synchronize_mpi ();
@@ -163,9 +162,9 @@ void FC_FUNC_ (update_displacement_cm_gpu,
#if DEBUG_BACKWARD_SIMULATIONS == 1 && DEBUG == 1
realw max_d, max_v, max_a;
- max_d = get_device_array_maximum_value (mp, mp->d_b_displ_crust_mantle, size);
- max_v = get_device_array_maximum_value (mp, mp->d_b_veloc_crust_mantle, size);
- max_a = get_device_array_maximum_value (mp, mp->d_b_accel_crust_mantle, size);
+ max_d = get_device_array_maximum_value(&mp->d_b_displ_crust_mantle, size);
+ max_v = get_device_array_maximum_value(&mp->d_b_veloc_crust_mantle, size);
+ max_a = get_device_array_maximum_value(&mp->d_b_accel_crust_mantle, size);
printf ("rank %d - max crust_mantle displ: %f veloc: %f accel: %f\n", mp->myrank, max_d, max_v, max_a);
fflush (stdout);
synchronize_mpi ();
@@ -275,9 +274,9 @@ void FC_FUNC_ (update_displacement_oc_gpu,
#if DEBUG_BACKWARD_SIMULATIONS == 1 && DEBUG == 1
realw max_d, max_v, max_a;
- max_d = get_device_array_maximum_value (mp, mp->d_b_displ_outer_core, size);
- max_v = get_device_array_maximum_value (mp, mp->d_b_veloc_outer_core, size);
- max_a = get_device_array_maximum_value (mp, mp->d_b_accel_outer_core, size);
+ max_d = get_device_array_maximum_value(&mp->d_b_displ_outer_core, size);
+ max_v = get_device_array_maximum_value(&mp->d_b_veloc_outer_core, size);
+ max_a = get_device_array_maximum_value(&mp->d_b_accel_outer_core, size);
printf ("rank %d - max outer_core displ: %f veloc: %f accel: %f\n", mp->myrank, max_d, max_v, max_a);
fflush (stdout);
synchronize_mpi ();
diff --git a/src/gpu/write_seismograms_gpu.c b/src/gpu/write_seismograms_gpu.c
index 8919c19..d94b5b4 100644
--- a/src/gpu/write_seismograms_gpu.c
+++ b/src/gpu/write_seismograms_gpu.c
@@ -63,9 +63,11 @@ void write_seismograms_transfer_from_device (Mesh *mp,
cl_event *copy_evt = NULL;
cl_uint num_evt = 0;
- if (GPU_ASYNC_COPY && mp->has_last_copy_evt) {
- copy_evt = &mp->last_copy_evt;
- num_evt = 1;
+ if (GPU_ASYNC_COPY ){
+ if (mp->has_last_copy_evt) {
+ copy_evt = &mp->last_copy_evt;
+ num_evt = 1;
+ }
}
clCheck (clSetKernelArg (mocl.kernels.write_seismograms_transfer_from_device_kernel, idx++, sizeof (cl_mem), (void *) &mp->d_number_receiver_global.ocl));
@@ -369,7 +371,7 @@ void FC_FUNC_(transfer_seismo_from_device_async,
}
// checks async-memcpy
- if (GPU_ASYNC_COPY == 0){
+ if (! GPU_ASYNC_COPY ){
exit_on_error("transfer_seismo_from_device_async must be called with GPU_ASYNC_COPY == 1, please check mesh_constants_cuda.h");
}
diff --git a/src/specfem3D/check_stability.f90 b/src/specfem3D/check_stability.f90
index 2a7ed00..09dd09a 100644
--- a/src/specfem3D/check_stability.f90
+++ b/src/specfem3D/check_stability.f90
@@ -96,7 +96,6 @@
displ_inner_core(3,:)**2)))
Ufluidnorm = maxval(abs(displ_outer_core))
-
else
! on GPU
! way 2: just get maximum of fields from GPU
More information about the CIG-COMMITS
mailing list