[cig-commits] [commit] devel,master: fix bugs (c38411c)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Thu Nov 6 08:14:26 PST 2014
Repository : https://github.com/geodynamics/specfem3d_globe
On branches: devel,master
Link : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f
>---------------------------------------------------------------
commit c38411ca2c76ac11b34b8db870f404746fe104a8
Author: Kevin Pouget <kevin.pouget at imag.fr>
Date: Wed May 7 17:04:49 2014 +0200
fix bugs
>---------------------------------------------------------------
c38411ca2c76ac11b34b8db870f404746fe104a8
src/gpu/check_fields_gpu.c | 10 +--
src/gpu/prepare_mesh_constants_gpu.c | 117 ++++++++++++++++++-----------------
2 files changed, 64 insertions(+), 63 deletions(-)
diff --git a/src/gpu/check_fields_gpu.c b/src/gpu/check_fields_gpu.c
index f543e16..7e5a914 100644
--- a/src/gpu/check_fields_gpu.c
+++ b/src/gpu/check_fields_gpu.c
@@ -839,6 +839,11 @@ void FC_FUNC_ (check_norm_strain_from_device,
#ifdef USE_OPENCL
if (run_opencl) {
+ local_work_size[0] = blocksize;
+ local_work_size[1] = 1;
+ global_work_size[0] = num_blocks_x * blocksize;
+ global_work_size[1] = num_blocks_y;
+
idx = 0;
d_max.ocl = clCreateBuffer (mocl.context, CL_MEM_READ_WRITE, num_blocks_x * num_blocks_y * sizeof (realw), NULL, clck_(&errcode));
@@ -926,11 +931,6 @@ void FC_FUNC_ (check_norm_strain_from_device,
clCheck (clSetKernelArg (mocl.kernels.get_maximum_scalar_kernel, idx++, sizeof (int), (void *) &size));
clCheck (clSetKernelArg (mocl.kernels.get_maximum_scalar_kernel, idx++, sizeof (cl_mem), (void *) &d_max.ocl));
- local_work_size[0] = blocksize;
- local_work_size[1] = 1;
- global_work_size[0] = num_blocks_x * blocksize;
- global_work_size[1] = num_blocks_y;
-
clCheck (clEnqueueNDRangeKernel (mocl.command_queue, mocl.kernels.get_maximum_scalar_kernel, 2, NULL,
global_work_size, local_work_size, 0, NULL, NULL));
diff --git a/src/gpu/prepare_mesh_constants_gpu.c b/src/gpu/prepare_mesh_constants_gpu.c
index 1460ede..fc2eeaa 100644
--- a/src/gpu/prepare_mesh_constants_gpu.c
+++ b/src/gpu/prepare_mesh_constants_gpu.c
@@ -1353,53 +1353,54 @@ void FC_FUNC_ (prepare_mpi_buffers_device,
}
#endif
}
- }
+
#ifdef USE_OPENCL
- if (run_opencl) {
- // asynchronous MPI buffer
- if (GPU_ASYNC_COPY) {
- ALLOC_PINNED_BUFFER_OCL(send_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
- ALLOC_PINNED_BUFFER_OCL(recv_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
+ if (run_opencl) {
+ // asynchronous MPI buffer
+ if (GPU_ASYNC_COPY) {
+ ALLOC_PINNED_BUFFER_OCL(send_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
+ ALLOC_PINNED_BUFFER_OCL(recv_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
- if (mp->simulation_type == 3) {
- ALLOC_PINNED_BUFFER_OCL(b_send_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
- ALLOC_PINNED_BUFFER_OCL(b_recv_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
+ if (mp->simulation_type == 3) {
+ ALLOC_PINNED_BUFFER_OCL(b_send_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
+ ALLOC_PINNED_BUFFER_OCL(b_recv_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
+ }
}
}
- }
#endif
#ifdef USE_OPENCL
- if (run_opencl) {
- // asynchronous MPI buffer
- if (GPU_ASYNC_COPY) {
- ALLOC_PINNED_BUFFER_OCL(send_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
- ALLOC_PINNED_BUFFER_OCL(recv_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
+ if (run_opencl) {
+ // asynchronous MPI buffer
+ if (GPU_ASYNC_COPY) {
+ ALLOC_PINNED_BUFFER_OCL(send_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
+ ALLOC_PINNED_BUFFER_OCL(recv_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
- if (mp->simulation_type == 3) {
- ALLOC_PINNED_BUFFER_OCL(b_send_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
- ALLOC_PINNED_BUFFER_OCL(b_recv_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
+ if (mp->simulation_type == 3) {
+ ALLOC_PINNED_BUFFER_OCL(b_send_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
+ ALLOC_PINNED_BUFFER_OCL(b_recv_accel_buffer_cm, sizeof(realw)* size_mpi_buffer);
+ }
}
}
- }
#endif
#ifdef USE_CUDA
- if (run_cuda) {
- // asynchronous MPI buffer
- if( GPU_ASYNC_COPY ){
- // note: Allocate pinned MPI buffers.
- // MPI buffers use pinned memory allocated by cudaMallocHost, which
- // enables the use of asynchronous memory copies from host <-> device
- // send buffer
- print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_send_accel_buffer_cm),sizeof(realw)* size_mpi_buffer ),8004);
- // receive buffer
- print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_recv_accel_buffer_cm),sizeof(realw)* size_mpi_buffer ),8004);
- if( mp->simulation_type == 3){
- print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_b_send_accel_buffer_cm),sizeof(realw)* size_mpi_buffer ),8004);
- print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_b_recv_accel_buffer_cm),sizeof(realw)* size_mpi_buffer ),8004);
+ if (run_cuda) {
+ // asynchronous MPI buffer
+ if( GPU_ASYNC_COPY ){
+ // note: Allocate pinned MPI buffers.
+ // MPI buffers use pinned memory allocated by cudaMallocHost, which
+ // enables the use of asynchronous memory copies from host <-> device
+ // send buffer
+ print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_send_accel_buffer_cm),sizeof(realw)* size_mpi_buffer ),8004);
+ // receive buffer
+ print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_recv_accel_buffer_cm),sizeof(realw)* size_mpi_buffer ),8004);
+ if( mp->simulation_type == 3){
+ print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_b_send_accel_buffer_cm),sizeof(realw)* size_mpi_buffer ),8004);
+ print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_b_recv_accel_buffer_cm),sizeof(realw)* size_mpi_buffer ),8004);
+ }
}
}
- }
#endif
+ }
// inner core mesh
mp->num_interfaces_inner_core = *num_interfaces_inner_core;
@@ -1444,41 +1445,41 @@ void FC_FUNC_ (prepare_mpi_buffers_device,
}
#endif
}
- }
#ifdef USE_OPENCL
- if (run_opencl) {
- // asynchronous MPI buffer
- if (GPU_ASYNC_COPY) {
- ALLOC_PINNED_BUFFER_OCL(send_accel_buffer_ic, sizeof(realw)* size_mpi_buffer);
- ALLOC_PINNED_BUFFER_OCL(recv_accel_buffer_ic, sizeof(realw)* size_mpi_buffer);
+ if (run_opencl) {
+ // asynchronous MPI buffer
+ if (GPU_ASYNC_COPY) {
+ ALLOC_PINNED_BUFFER_OCL(send_accel_buffer_ic, sizeof(realw)* size_mpi_buffer);
+ ALLOC_PINNED_BUFFER_OCL(recv_accel_buffer_ic, sizeof(realw)* size_mpi_buffer);
- if (mp->simulation_type == 3) {
- ALLOC_PINNED_BUFFER_OCL(b_send_accel_buffer_ic, sizeof(realw)* size_mpi_buffer);
- ALLOC_PINNED_BUFFER_OCL(b_recv_accel_buffer_ic, sizeof(realw)* size_mpi_buffer);
+ if (mp->simulation_type == 3) {
+ ALLOC_PINNED_BUFFER_OCL(b_send_accel_buffer_ic, sizeof(realw)* size_mpi_buffer);
+ ALLOC_PINNED_BUFFER_OCL(b_recv_accel_buffer_ic, sizeof(realw)* size_mpi_buffer);
+ }
}
}
- }
#endif
#ifdef USE_CUDA
- if (run_cuda) {
- // asynchronous MPI buffer
- if( GPU_ASYNC_COPY ){
- // note: Allocate pinned MPI buffers.
- // MPI buffers use pinned memory allocated by cudaMallocHost, which
- // enables the use of asynchronous memory copies from host <-> device
- // send buffer
- print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_send_accel_buffer_ic),sizeof(realw)*size_mpi_buffer ),8004);
- // receive buffer
- print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_recv_accel_buffer_ic),sizeof(realw)*size_mpi_buffer ),8004);
- // adjoint
- if( mp->simulation_type == 3){
- print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_b_send_accel_buffer_ic),sizeof(realw)*size_mpi_buffer ),8004);
- print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_b_recv_accel_buffer_ic),sizeof(realw)*size_mpi_buffer ),8004);
+ if (run_cuda) {
+ // asynchronous MPI buffer
+ if( GPU_ASYNC_COPY ){
+ // note: Allocate pinned MPI buffers.
+ // MPI buffers use pinned memory allocated by cudaMallocHost, which
+ // enables the use of asynchronous memory copies from host <-> device
+ // send buffer
+ print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_send_accel_buffer_ic),sizeof(realw)*size_mpi_buffer ),8004);
+ // receive buffer
+ print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_recv_accel_buffer_ic),sizeof(realw)*size_mpi_buffer ),8004);
+ // adjoint
+ if( mp->simulation_type == 3){
+ print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_b_send_accel_buffer_ic),sizeof(realw)*size_mpi_buffer ),8004);
+ print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_b_recv_accel_buffer_ic),sizeof(realw)*size_mpi_buffer ),8004);
+ }
}
}
- }
#endif
+ }
// outer core mesh
// note: uses only scalar wavefield arrays
mp->num_interfaces_outer_core = *num_interfaces_outer_core;
More information about the CIG-COMMITS
mailing list