[cig-commits] r19659 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src: cuda shared specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Tue Feb 21 20:38:33 PST 2012
Author: danielpeter
Date: 2012-02-21 20:38:32 -0800 (Tue, 21 Feb 2012)
New Revision: 19659
Modified:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/noise_tomography.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_forward_arrays.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_forward_arrays.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90
Log:
updates time loop and writing seismograms
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -156,7 +156,7 @@
// copies scalar buffer onto GPU
cudaMemcpy(mp->d_send_accel_buffer_outer_core, buffer_recv_scalar,
- (mp->max_nibool_interfaces_outer_core)*(mp->num_interfaces_outer_core)*sizeof(realw),
+ (mp->max_nibool_interfaces_outer_core)*(mp->num_interfaces_outer_core)*sizeof(realw),
cudaMemcpyHostToDevice);
// assembles on GPU
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -74,7 +74,7 @@
int* FORWARD_OR_ADJOINT){
TRACE("transfer_boun_accel_from_device");
int blocksize,size_padded,num_blocks_x,num_blocks_y;
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
// crust/mantle region
@@ -113,14 +113,14 @@
cudaMemcpy(send_accel_buffer,mp->d_send_accel_buffer_crust_mantle,
3*mp->max_nibool_interfaces_crust_mantle*mp->num_interfaces_crust_mantle*sizeof(realw),
cudaMemcpyDeviceToHost);
-
+
}
}
-
+
// inner core region
if( *IREGION == IREGION_INNER_CORE ){
if( mp->num_interfaces_inner_core > 0 ){
-
+
blocksize = BLOCKSIZE_TRANSFER;
size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_inner_core)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
@@ -128,10 +128,10 @@
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
- }
+ }
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
if(*FORWARD_OR_ADJOINT == 1) {
prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel_inner_core,
mp->d_send_accel_buffer_inner_core,
@@ -148,12 +148,12 @@
mp->d_nibool_interfaces_inner_core,
mp->d_ibool_interfaces_inner_core);
}
-
+
// copies buffer to CPU
cudaMemcpy(send_accel_buffer,mp->d_send_accel_buffer_inner_core,
3*mp->max_nibool_interfaces_inner_core*mp->num_interfaces_inner_core*sizeof(realw),
cudaMemcpyDeviceToHost);
-
+
}
}
@@ -172,7 +172,7 @@
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
int iinterface=0;
-
+
for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
if(id < d_nibool_interfaces_ext_mesh[iinterface]) {
atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)],
@@ -222,7 +222,7 @@
dim3 threads(blocksize,1,1);
if(*FORWARD_OR_ADJOINT == 1) { //assemble forward accel
- assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel_crust_mantle,
+ assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel_crust_mantle,
mp->d_send_accel_buffer_crust_mantle,
mp->num_interfaces_crust_mantle,
mp->max_nibool_interfaces_crust_mantle,
@@ -230,7 +230,7 @@
mp->d_ibool_interfaces_crust_mantle);
}
else if(*FORWARD_OR_ADJOINT == 3) { //assemble adjoint accel
- assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
+ assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
mp->d_send_accel_buffer_crust_mantle,
mp->num_interfaces_crust_mantle,
mp->max_nibool_interfaces_crust_mantle,
@@ -247,7 +247,7 @@
cudaMemcpy(mp->d_send_accel_buffer_inner_core, buffer_recv_vector,
3*(mp->max_nibool_interfaces_inner_core)*(mp->num_interfaces_inner_core)*sizeof(realw),
cudaMemcpyHostToDevice);
-
+
// assembles values
blocksize = BLOCKSIZE_TRANSFER;
size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_inner_core)/((double)blocksize)))*blocksize;
@@ -257,12 +257,12 @@
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
if(*FORWARD_OR_ADJOINT == 1) { //assemble forward accel
- assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel_inner_core,
+ assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel_inner_core,
mp->d_send_accel_buffer_inner_core,
mp->num_interfaces_inner_core,
mp->max_nibool_interfaces_inner_core,
@@ -270,7 +270,7 @@
mp->d_ibool_interfaces_inner_core);
}
else if(*FORWARD_OR_ADJOINT == 3) { //assemble adjoint accel
- assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel_inner_core,
+ assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel_inner_core,
mp->d_send_accel_buffer_inner_core,
mp->num_interfaces_inner_core,
mp->max_nibool_interfaces_inner_core,
@@ -279,8 +279,8 @@
}
}
}
-
-
+
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("transfer_asmbl_accel_to_device");
#endif
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -409,7 +409,7 @@
// Check functions
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(check_max_norm_displ_gpu,
CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {
@@ -426,9 +426,9 @@
}
printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(check_max_norm_vector,
CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {
@@ -451,9 +451,9 @@
}
printf("%d:maxnorm of vector %d [%d] = %e\n",procid,*announceID,maxloc,maxnorm);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(check_max_norm_displ,
CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {
@@ -467,9 +467,9 @@
}
printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(check_max_norm_b_displ_gpu,
CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {
@@ -494,9 +494,9 @@
printf("%d: maxnorm of backward displ = %e\n",*announceID,maxnorm);
printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm_accel);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(check_max_norm_b_accel_gpu,
CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {
@@ -514,9 +514,9 @@
}
printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(check_max_norm_b_veloc_gpu,
CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {
@@ -534,9 +534,9 @@
}
printf("%d: maxnorm of backward veloc = %e\n",*announceID,maxnorm);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(check_max_norm_b_displ,
CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {
@@ -550,9 +550,9 @@
}
printf("%d:maxnorm of backward displ = %e\n",*announceID,maxnorm);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(check_max_norm_b_accel,
CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {
@@ -566,9 +566,9 @@
}
printf("%d:maxnorm of backward accel = %e\n",*announceID,maxnorm);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(check_error_vectors,
CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {
@@ -607,8 +607,8 @@
}
}
+*/
-
/* ----------------------------------------------------------------------------------------------- */
// Auxiliary functions
@@ -617,7 +617,7 @@
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(get_max_accel,
GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {
@@ -642,14 +642,14 @@
printf("%d/%d: max=%e\n",it,procid,maxval);
free(accel_cpy);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
// ACOUSTIC simulations
/* ----------------------------------------------------------------------------------------------- */
-__global__ void get_maximum_kernel(realw* array, int size, realw* d_max){
+__global__ void get_maximum_scalar_kernel(realw* array, int size, realw* d_max){
/* simplest version: uses only 1 thread
realw max;
@@ -696,12 +696,12 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(get_norm_acoustic_from_device,
- GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
+void FC_FUNC_(check_norm_acoustic_from_device,
+ CHECK_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
long* Mesh_pointer_f,
int* SIMULATION_TYPE) {
-TRACE("get_norm_acoustic_from_device");
+TRACE("check_norm_acoustic_from_device");
//double start_time = get_time();
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
@@ -745,27 +745,21 @@
realw* h_max;
int blocksize = 256;
- int num_blocks_x = (int) ceil(mp->NGLOB_AB/blocksize);
- //printf("num_blocks_x %i \n",num_blocks_x);
+ // outer core
+ int size = mp->NGLOB_OUTER_CORE;
+ int num_blocks_x = (int) ceil(size/blocksize);
h_max = (realw*) calloc(num_blocks_x,sizeof(realw));
cudaMalloc((void**)&d_max,num_blocks_x*sizeof(realw));
-
dim3 grid(num_blocks_x,1);
dim3 threads(blocksize,1,1);
if(*SIMULATION_TYPE == 1 ){
- get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
- mp->NGLOB_AB,
- d_max);
+ get_maximum_scalar_kernel<<<grid,threads>>>(mp->d_displ_outer_core,size,d_max);
+ }else if(*SIMULATION_TYPE == 3 ){
+ get_maximum_scalar_kernel<<<grid,threads>>>(mp->d_b_displ_outer_core,size,d_max);
}
- if(*SIMULATION_TYPE == 3 ){
- get_maximum_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
- mp->NGLOB_AB,
- d_max);
- }
-
print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),222);
// determines max for all blocks
@@ -819,7 +813,7 @@
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
- exit_on_cuda_error("after get_norm_acoustic_from_device");
+ exit_on_cuda_error("after check_norm_acoustic_from_device");
#endif
}
@@ -865,45 +859,71 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(get_norm_elastic_from_device,
- GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
- long* Mesh_pointer_f,
- int* SIMULATION_TYPE) {
+void FC_FUNC_(check_norm_elastic_from_device,
+ CHECK_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
+ long* Mesh_pointer_f,
+ int* SIMULATION_TYPE) {
- TRACE("get_norm_elastic_from_device");
+ TRACE("check_norm_elastic_from_device");
//double start_time = get_time();
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- realw max;
+
+ realw max,max_crust_mantle,max_inner_core;
realw *d_max;
+ int num_blocks_x,size;
- max = 0;
// launch simple reduction kernel
realw* h_max;
int blocksize = 256;
- int num_blocks_x = (int) ceil(mp->NGLOB_AB/blocksize);
- //printf("num_blocks_x %i \n",num_blocks_x);
+ // crust_mantle
+ max = 0;
+ size = mp->NGLOB_CRUST_MANTLE;
+ num_blocks_x = (int) ceil(size/blocksize);
h_max = (realw*) calloc(num_blocks_x,sizeof(realw));
cudaMalloc((void**)&d_max,num_blocks_x*sizeof(realw));
- dim3 grid(num_blocks_x,1);
- dim3 threads(blocksize,1,1);
-
+ dim3 grid1(num_blocks_x,1);
+ dim3 threads1(blocksize,1,1);
if(*SIMULATION_TYPE == 1 ){
- get_maximum_vector_kernel<<<grid,threads>>>(mp->d_displ,
- mp->NGLOB_AB,
- d_max);
+ get_maximum_vector_kernel<<<grid1,threads1>>>(mp->d_displ_crust_mantle,size,d_max);
+ }else if(*SIMULATION_TYPE == 3 ){
+ get_maximum_vector_kernel<<<grid1,threads1>>>(mp->d_b_displ_crust_mantle,size,d_max);
}
- if(*SIMULATION_TYPE == 3 ){
- get_maximum_vector_kernel<<<grid,threads>>>(mp->d_b_displ,
- mp->NGLOB_AB,
- d_max);
+ // copies to CPU
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),222);
+
+ // determines max for all blocks
+ max = h_max[0];
+ for(int i=1;i<num_blocks_x;i++) {
+ if( max < h_max[i]) max = h_max[i];
}
+ max_crust_mantle = max;
+ cudaFree(d_max);
+ free(h_max);
+
+ // inner_core
+ max = 0;
+ size = mp->NGLOB_INNER_CORE;
+
+ num_blocks_x = (int) ceil(size/blocksize);
+ h_max = (realw*) calloc(num_blocks_x,sizeof(realw));
+ cudaMalloc((void**)&d_max,num_blocks_x*sizeof(realw));
+
+ dim3 grid2(num_blocks_x,1);
+ dim3 threads2(blocksize,1,1);
+ if(*SIMULATION_TYPE == 1 ){
+ get_maximum_vector_kernel<<<grid2,threads2>>>(mp->d_displ_inner_core,size,d_max);
+ }else if(*SIMULATION_TYPE == 3 ){
+ get_maximum_vector_kernel<<<grid2,threads2>>>(mp->d_b_displ_inner_core,size,d_max);
+ }
+
+ // copies to CPU
print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),222);
// determines max for all blocks
@@ -911,18 +931,124 @@
for(int i=1;i<num_blocks_x;i++) {
if( max < h_max[i]) max = h_max[i];
}
+ max_inner_core = max;
cudaFree(d_max);
free(h_max);
// return result
+ max = MAX(max_inner_core,max_crust_mantle);
*norm = max;
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
- exit_on_cuda_error("after get_norm_elastic_from_device");
+ exit_on_cuda_error("after check_norm_elastic_from_device");
#endif
}
+/* ----------------------------------------------------------------------------------------------- */
+extern "C"
+void FC_FUNC_(check_norm_strain_from_device,
+ CHECK_NORM_STRAIN_FROM_DEVICE)(realw* norm_strain,
+ realw* norm_strain2,
+ long* Mesh_pointer_f) {
+
+ TRACE("check_norm_strain_from_device");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ realw max,max_eps;
+ realw *d_max;
+ int num_blocks_x,size;
+
+
+ // launch simple reduction kernel
+ realw* h_max;
+ int blocksize = 256;
+
+ // crust_mantle strain arrays
+ size = NGLL3*(mp->NSPEC_CRUST_MANTLE);
+
+ num_blocks_x = (int) ceil(size/blocksize);
+ h_max = (realw*) calloc(num_blocks_x,sizeof(realw));
+ cudaMalloc((void**)&d_max,num_blocks_x*sizeof(realw));
+
+ dim3 grid(num_blocks_x,1);
+ dim3 threads(blocksize,1,1);
+
+ // determines max for: eps_trace_over_3_crust_mantle
+ get_maximum_scalar_kernel<<<grid,threads>>>(mp->d_eps_trace_over_3_crust_mantle,size,d_max);
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),221);
+ max = h_max[0];
+ for(int i=1;i<num_blocks_x;i++) {
+ if( max < h_max[i]) max = h_max[i];
+ }
+ // strain trace maximum
+ *norm_strain = max;
+
+ // initializes
+ max_eps = 0.0f;
+
+ // determines max for: epsilondev_xx_crust_mantle
+ get_maximum_scalar_kernel<<<grid,threads>>>(mp->d_epsilondev_xx_crust_mantle,size,d_max);
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),222);
+ max = h_max[0];
+ for(int i=1;i<num_blocks_x;i++) {
+ if( max < h_max[i]) max = h_max[i];
+ }
+ max_eps = MAX(max_eps,max);
+
+ // determines max for: epsilondev_yy_crust_mantle
+ get_maximum_scalar_kernel<<<grid,threads>>>(mp->d_epsilondev_yy_crust_mantle,size,d_max);
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),223);
+ max = h_max[0];
+ for(int i=1;i<num_blocks_x;i++) {
+ if( max < h_max[i]) max = h_max[i];
+ }
+ max_eps = MAX(max_eps,max);
+
+ // determines max for: epsilondev_xy_crust_mantle
+ get_maximum_scalar_kernel<<<grid,threads>>>(mp->d_epsilondev_xy_crust_mantle,size,d_max);
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),224);
+ max = h_max[0];
+ for(int i=1;i<num_blocks_x;i++) {
+ if( max < h_max[i]) max = h_max[i];
+ }
+ max_eps = MAX(max_eps,max);
+
+ // determines max for: epsilondev_xz_crust_mantle
+ get_maximum_scalar_kernel<<<grid,threads>>>(mp->d_epsilondev_xz_crust_mantle,size,d_max);
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),225);
+ max = h_max[0];
+ for(int i=1;i<num_blocks_x;i++) {
+ if( max < h_max[i]) max = h_max[i];
+ }
+ max_eps = MAX(max_eps,max);
+
+ // determines max for: epsilondev_yz_crust_mantle
+ get_maximum_scalar_kernel<<<grid,threads>>>(mp->d_epsilondev_yz_crust_mantle,size,d_max);
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),226);
+ max = h_max[0];
+ for(int i=1;i<num_blocks_x;i++) {
+ if( max < h_max[i]) max = h_max[i];
+ }
+ max_eps = MAX(max_eps,max);
+
+ // strain maximum
+ *norm_strain2 = max_eps;
+
+ // frees arrays
+ cudaFree(d_max);
+ free(h_max);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("after check_norm_strain_from_device");
+#endif
+}
+
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -71,7 +71,7 @@
stf = (realw) stf_pre_compute[isource];
iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
- // note: for global version, sourcearrays has dimensions
+ // note: for global version, sourcearrays has dimensions
// sourcearrays(NDIM,NGLLX,NGLLY,NGLLZ,NSOURCES)
atomicAdd(&accel[iglob*3],
sourcearrays[INDEX5(3,5,5,5, 0,i,j,k,isource)]*stf);
@@ -161,7 +161,7 @@
// copies source time function buffer values to GPU
print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
NSOURCES*sizeof(double),cudaMemcpyHostToDevice),19);
-
+
compute_add_sources_kernel<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
mp->d_ibool_crust_mantle,
mp->d_sourcearrays,
@@ -193,7 +193,7 @@
int ispec,iglob;
int irec,i,j,k;
-
+
int irec_local = blockIdx.x + gridDim.x*blockIdx.y;
if(irec_local < nadj_rec_local) { // when nrec > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -60,33 +60,33 @@
/* note:
constant arrays when used in other compute_forces_***_cuda.cu routines stay zero,
constant declaration and cudaMemcpyToSymbol would have to be in the same file...
-
+
extern keyword doesn't work for __constant__ declarations.
-
+
also:
cudaMemcpyToSymbol("deviceCaseParams", caseParams, sizeof(CaseParams));
..
and compile with -arch=sm_20
-
+
see also: http://stackoverflow.com/questions/4008031/how-to-use-cuda-constant-memory-in-a-programmer-pleasant-way
doesn't seem to work.
-
+
we could keep arrays separated for acoustic and elastic routines...
-
+
workaround:
-
+
for now, we store pointers with cudaGetSymbolAddress() function calls.
we pass those pointers in all other compute_forces_..() routines
-
+
in this file, we can use the above constant array declarations without need of the pointers.
-
+
*/
// constant arrays
void setConst_hprime_xx(realw* array,Mesh* mp)
{
-
+
cudaError_t err = cudaMemcpyToSymbol(d_hprime_xx, array, NGLL2*sizeof(realw));
if (err != cudaSuccess)
{
@@ -94,7 +94,7 @@
fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprime_xx),"d_hprime_xx");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprime_xx: %s\n", cudaGetErrorString(err));
@@ -104,7 +104,7 @@
void setConst_hprime_yy(realw* array,Mesh* mp)
{
-
+
cudaError_t err = cudaMemcpyToSymbol(d_hprime_yy, array, NGLL2*sizeof(realw));
if (err != cudaSuccess)
{
@@ -112,7 +112,7 @@
fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprime_yy),"d_hprime_yy");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprime_yy: %s\n", cudaGetErrorString(err));
@@ -122,7 +122,7 @@
void setConst_hprime_zz(realw* array,Mesh* mp)
{
-
+
cudaError_t err = cudaMemcpyToSymbol(d_hprime_zz, array, NGLL2*sizeof(realw));
if (err != cudaSuccess)
{
@@ -130,7 +130,7 @@
fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprime_zz),"d_hprime_zz");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprime_zz: %s\n", cudaGetErrorString(err));
@@ -147,7 +147,7 @@
fprintf(stderr, "Error in setConst_hprimewgll_xx: %s\n", cudaGetErrorString(err));
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_xx),"d_hprimewgll_xx");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprimewgll_xx: %s\n", cudaGetErrorString(err));
@@ -163,7 +163,7 @@
fprintf(stderr, "Error in setConst_hprimewgll_yy: %s\n", cudaGetErrorString(err));
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_yy),"d_hprimewgll_yy");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprimewgll_yy: %s\n", cudaGetErrorString(err));
@@ -179,7 +179,7 @@
fprintf(stderr, "Error in setConst_hprimewgll_zz: %s\n", cudaGetErrorString(err));
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_zz),"d_hprimewgll_zz");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprimewgll_zz: %s\n", cudaGetErrorString(err));
@@ -201,7 +201,7 @@
fprintf(stderr, "Error with d_wgllwgll_xy: %s\n", cudaGetErrorString(err));
exit(1);
}
-
+
}
void setConst_wgllwgll_xz(realw* array,Mesh* mp)
@@ -218,7 +218,7 @@
fprintf(stderr, "Error with d_wgllwgll_xz: %s\n", cudaGetErrorString(err));
exit(1);
}
-
+
}
void setConst_wgllwgll_yz(realw* array,Mesh* mp)
@@ -235,7 +235,7 @@
fprintf(stderr, "Error with d_wgllwgll_yz: %s\n", cudaGetErrorString(err));
exit(1);
}
-
+
}
void setConst_wgll_cube(realw* array,Mesh* mp)
@@ -252,7 +252,7 @@
fprintf(stderr, "Error with d_wgll_cube: %s\n", cudaGetErrorString(err));
exit(1);
}
-
+
}
@@ -413,7 +413,7 @@
reald cos_theta,sin_theta,cos_phi,sin_phi;
reald minus_g,minus_dg;
reald rho;
- reald gxl,gyl,gzl;
+ reald gxl,gyl,gzl;
reald minus_g_over_radius,minus_dg_plus_g_over_radius;
reald cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq;
reald Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl;
@@ -424,50 +424,50 @@
const reald R_EARTH_KM = 6371.0f;
// uncomment line below for PREM with oceans
//const reald R_EARTH_KM = 6368.0f;
-
+
// compute non-symmetric terms for gravity
// use mesh coordinates to get theta and phi
// x y z contain r theta phi
int iglob = d_ibool[working_element*NGLL3 + tx]-1;
-
+
radius = d_xstore[iglob];
theta = d_ystore[iglob];
phi = d_zstore[iglob];
-
+
cos_theta = cos(theta);
sin_theta = sin(theta);
cos_phi = cos(phi);
sin_phi = sin(phi);
-
+
// for efficiency replace with lookup table every 100 m in radial direction
// note: radius in crust mantle should never be zero,
// and arrays in C start from 0, thus we need to subtract -1
int int_radius = rint(radius * R_EARTH_KM * 10.0f ) - 1;
-
+
// get g, rho and dg/dr=dg
// spherical components of the gravitational acceleration
// for efficiency replace with lookup table every 100 m in radial direction
minus_g = d_minus_gravity_table[int_radius];
minus_dg = d_minus_deriv_gravity_table[int_radius];
rho = d_density_table[int_radius];
-
+
// Cartesian components of the gravitational acceleration
gxl = minus_g*sin_theta*cos_phi;
gyl = minus_g*sin_theta*sin_phi;
gzl = minus_g*cos_theta;
-
+
// Cartesian components of gradient of gravitational acceleration
// obtained from spherical components
-
+
minus_g_over_radius = minus_g / radius;
minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius;
-
+
cos_theta_sq = cos_theta*cos_theta;
sin_theta_sq = sin_theta*sin_theta;
cos_phi_sq = cos_phi*cos_phi;
sin_phi_sq = sin_phi*sin_phi;
-
+
Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq;
Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq;
Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq;
@@ -479,21 +479,21 @@
sx_l = rho * s_dummyx_loc[tx];
sy_l = rho * s_dummyy_loc[tx];
sz_l = rho * s_dummyz_loc[tx];
-
+
// compute G tensor from s . g and add to sigma (not symmetric)
*sigma_xx = *sigma_xx + sy_l*gyl + sz_l*gzl;
*sigma_yy = *sigma_yy + sx_l*gxl + sz_l*gzl;
*sigma_zz = *sigma_zz + sx_l*gxl + sy_l*gyl;
-
+
*sigma_xy = *sigma_xy - sx_l * gyl;
*sigma_yx = *sigma_yx - sy_l * gxl;
-
+
*sigma_xz = *sigma_xz - sx_l * gzl;
*sigma_zx = *sigma_zx - sz_l * gxl;
-
+
*sigma_yz = *sigma_yz - sy_l * gzl;
*sigma_zy = *sigma_zy - sz_l * gyl;
-
+
// precompute vector
factor = jacobianl * wgll_cube[tx];
*rho_s_H1 = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl);
@@ -635,7 +635,7 @@
realw* d_ystore, realw* d_zstore,
reald* sigma_xx,reald* sigma_yy,reald* sigma_zz,
reald* sigma_xy,reald* sigma_xz,reald* sigma_yz){
-
+
reald kappavl,muvl,kappahl,muhl;
reald rhovpvsq,rhovphsq,rhovsvsq,rhovshsq,eta_aniso;
reald costheta,sintheta,cosphi,sinphi;
@@ -649,8 +649,8 @@
reald c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,c33,c34,c35,c36,c44,c45,c46,c55,c56,c66;
// cosine and sine function in CUDA only supported for float
- reald theta,phi;
-
+ reald theta,phi;
+
// use Kappa and mu from transversely isotropic model
kappavl = d_kappavstore[offset];
muvl = d_muvstore[offset];
@@ -676,7 +676,7 @@
// use mesh coordinates to get theta and phi
//ystore and zstore contain theta and phi
theta = d_ystore[iglob];
- phi = d_zstore[iglob];
+ phi = d_zstore[iglob];
if( sizeof( theta ) == sizeof( float ) ){
// float operations
@@ -685,9 +685,9 @@
// or: sincosf(theta, &sintheta, &costheta);
// or with loss of accuracy: __sincosf(theta, &sintheta, &costheta);
// or compile with: -use_fast_math
-
+
costheta = cosf(theta);
- sintheta = sinf(theta);
+ sintheta = sinf(theta);
cosphi = cosf(phi);
sinphi = sinf(phi);
@@ -697,14 +697,14 @@
costwophi = cosf(2.0f * phi);
sintwophi = sinf(2.0f * phi);
cosfourtheta = cosf(4.0f * theta);
- cosfourphi = cosf(4.0f * phi);
+ cosfourphi = cosf(4.0f * phi);
}else{
// double operations
costheta = cos(theta);
- sintheta = sin(theta);
+ sintheta = sin(theta);
cosphi = cos(phi);
- sinphi = sin(phi);
+ sinphi = sin(phi);
costwotheta = cos(2.0f * theta);
sintwotheta = sin(2.0f * theta);
@@ -712,9 +712,9 @@
sintwophi = sin(2.0f * phi);
cosfourtheta = cos(4.0f * theta);
- cosfourphi = cos(4.0f * phi);
+ cosfourphi = cos(4.0f * phi);
}
-
+
costhetasq = costheta * costheta;
sinthetasq = sintheta * sintheta;
cosphisq = cosphi * cosphi;
@@ -1023,7 +1023,7 @@
// synchronize all the threads (one thread for each of the NGLL grid points of the
// current spectral element) because we need the whole element to be ready in order
// to be able to compute the matrix products along cut planes of the 3D element below
- __syncthreads();
+ __syncthreads();
#ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
@@ -1175,7 +1175,7 @@
// use unrelaxed parameters if attenuation
one_minus_sum_beta_use = one_minus_sum_beta[tx+working_element*NGLL3]; // (i,j,k,ispec)
minus_sum_beta = one_minus_sum_beta_use - 1.0f;
- }
+ }
// computes stresses
if(ANISOTROPY){
@@ -1204,7 +1204,7 @@
duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl,
&sigma_xx,&sigma_yy,&sigma_zz,
&sigma_xy,&sigma_xz,&sigma_yz);
- }else{
+ }else{
// transverse isotropy
compute_element_cm_tiso(offset,
d_kappavstore,d_muvstore,
@@ -1222,8 +1222,8 @@
&sigma_xy,&sigma_xz,&sigma_yz);
}
} // ! end of test whether isotropic or anisotropic element
-
+
if(ATTENUATION && (! USE_ATTENUATION_MIMIC ) ){
// subtracts memory variables if attenuation
compute_element_cm_att_stress(tx,working_element,
@@ -1249,7 +1249,7 @@
wgll_cube,jacobianl,
s_dummyx_loc,s_dummyy_loc,s_dummyz_loc,
&sigma_xx,&sigma_yy,&sigma_zz,&sigma_xy,&sigma_yx,
- &sigma_xz,&sigma_zx,&sigma_yz,&sigma_zy,
+ &sigma_xz,&sigma_zx,&sigma_yz,&sigma_zy,
&rho_s_H1,&rho_s_H2,&rho_s_H3);
}
@@ -1822,5 +1822,5 @@
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
exit_on_cuda_error("compute_forces_crust_mantle_cuda");
-#endif
+#endif
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -184,101 +184,101 @@
reald cos_theta,sin_theta,cos_phi,sin_phi;
reald minus_g,minus_dg;
reald rho;
- reald gxl,gyl,gzl;
+ reald gxl,gyl,gzl;
reald minus_g_over_radius,minus_dg_plus_g_over_radius;
reald cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq;
reald Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl;
reald sx_l,sy_l,sz_l;
reald factor;
-
+
// R_EARTH_KM is the radius of the bottom of the oceans
const reald R_EARTH = 6371000.0f; // in m
const reald R_EARTH_KM = 6371.0f; // in km
// uncomment line below for PREM with oceans
//const reald R_EARTH = 6368000.0f;
//const reald R_EARTH_KM = 6368.0f;
-
+
// compute non-symmetric terms for gravity
-
+
// use mesh coordinates to get theta and phi
// x y z contain r theta phi
int iglob = d_ibool[working_element*NGLL3 + tx]-1;
-
- radius = d_xstore[iglob];
+
+ radius = d_xstore[iglob];
// make sure radius is never zero even for points at center of cube
// because we later divide by radius
if(radius < 100.f / R_EARTH){ radius = 100.f / R_EARTH; }
-
+
theta = d_ystore[iglob];
phi = d_zstore[iglob];
-
+
cos_theta = cos(theta);
sin_theta = sin(theta);
cos_phi = cos(phi);
sin_phi = sin(phi);
-
+
// for efficiency replace with lookup table every 100 m in radial direction
// note: radius in crust mantle should never be zero,
// and arrays in C start from 0, thus we need to subtract -1
int int_radius = rint(radius * R_EARTH_KM * 10.0f ) - 1;
//make sure we never use below zero for point exactly at the center of the Earth
if( int_radius < 0 ){int_radius = 0;}
-
+
// get g, rho and dg/dr=dg
// spherical components of the gravitational acceleration
// for efficiency replace with lookup table every 100 m in radial direction
minus_g = d_minus_gravity_table[int_radius];
minus_dg = d_minus_deriv_gravity_table[int_radius];
rho = d_density_table[int_radius];
-
+
// Cartesian components of the gravitational acceleration
gxl = minus_g*sin_theta*cos_phi;
gyl = minus_g*sin_theta*sin_phi;
gzl = minus_g*cos_theta;
-
+
// Cartesian components of gradient of gravitational acceleration
// obtained from spherical components
-
+
minus_g_over_radius = minus_g / radius;
minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius;
-
+
cos_theta_sq = cos_theta*cos_theta;
sin_theta_sq = sin_theta*sin_theta;
cos_phi_sq = cos_phi*cos_phi;
sin_phi_sq = sin_phi*sin_phi;
-
+
Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq;
Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq;
Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq;
Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq;
Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta;
Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta;
-
+
// get displacement and multiply by density to compute G tensor
sx_l = rho * s_dummyx_loc[tx];
sy_l = rho * s_dummyy_loc[tx];
sz_l = rho * s_dummyz_loc[tx];
-
+
// compute G tensor from s . g and add to sigma (not symmetric)
*sigma_xx = *sigma_xx + sy_l*gyl + sz_l*gzl;
*sigma_yy = *sigma_yy + sx_l*gxl + sz_l*gzl;
*sigma_zz = *sigma_zz + sx_l*gxl + sy_l*gyl;
-
+
*sigma_xy = *sigma_xy - sx_l * gyl;
*sigma_yx = *sigma_yx - sy_l * gxl;
-
+
*sigma_xz = *sigma_xz - sx_l * gzl;
*sigma_zx = *sigma_zx - sz_l * gxl;
-
+
*sigma_yz = *sigma_yz - sy_l * gzl;
*sigma_zy = *sigma_zy - sz_l * gyl;
-
+
// precompute vector
factor = jacobianl * wgll_cube[tx];
*rho_s_H1 = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl);
*rho_s_H2 = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl);
*rho_s_H3 = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl);
-
+
return;
}
@@ -299,7 +299,7 @@
int num_phase_ispec,
int d_iphase,
int use_mesh_coloring_gpu,
- realw* d_displ,
+ realw* d_displ,
realw* d_accel,
realw* d_xix, realw* d_xiy, realw* d_xiz,
realw* d_etax, realw* d_etay, realw* d_etaz,
@@ -671,7 +671,7 @@
wgll_cube,jacobianl,
s_dummyx_loc,s_dummyy_loc,s_dummyz_loc,
&sigma_xx,&sigma_yy,&sigma_zz,&sigma_xy,&sigma_yx,
- &sigma_xz,&sigma_zx,&sigma_yz,&sigma_zy,
+ &sigma_xz,&sigma_zx,&sigma_yz,&sigma_zy,
&rho_s_H1,&rho_s_H2,&rho_s_H3);
}
@@ -1212,6 +1212,6 @@
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
exit_on_cuda_error("compute_forces_inner_core_cuda");
-#endif
+#endif
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -663,11 +663,11 @@
);
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
exit_on_cuda_error("compute_forces_outer_core_cuda");
-#endif
+#endif
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -473,120 +473,35 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_kernels_hess_el_cudakernel(int* ispec_is_elastic,
- int* ibool,
- realw* accel,
- realw* b_accel,
- realw* hess_kl,
- realw deltat,
- int NSPEC_AB) {
+__global__ void compute_kernels_hess_cudakernel(int* ibool,
+ realw* accel,
+ realw* b_accel,
+ realw* hess_kl,
+ realw deltat,
+ int NSPEC_AB) {
int ispec = blockIdx.x + blockIdx.y*gridDim.x;
// handles case when there is 1 extra block (due to rectangular grid)
if(ispec < NSPEC_AB) {
- // elastic elements only
- if( ispec_is_elastic[ispec] ) {
+ int ijk = threadIdx.x;
+ int ijk_ispec = ijk + NGLL3*ispec;
+ int iglob = ibool[ijk_ispec] - 1 ;
- int ijk = threadIdx.x;
- int ijk_ispec = ijk + NGLL3*ispec;
- int iglob = ibool[ijk_ispec] - 1 ;
-
- // approximate hessian
- hess_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_accel[3*iglob]+
- accel[3*iglob+1]*b_accel[3*iglob+1]+
- accel[3*iglob+2]*b_accel[3*iglob+2]);
- }
+ // approximate hessian
+ hess_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_accel[3*iglob]+
+ accel[3*iglob+1]*b_accel[3*iglob+1]+
+ accel[3*iglob+2]*b_accel[3*iglob+2]);
}
}
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_kernels_hess_ac_cudakernel(int* ispec_is_acoustic,
- int* ibool,
- realw* potential_dot_dot_acoustic,
- realw* b_potential_dot_dot_acoustic,
- realw* rhostore,
- realw* hprime_xx,
- realw* hprime_yy,
- realw* hprime_zz,
- realw* d_xix,
- realw* d_xiy,
- realw* d_xiz,
- realw* d_etax,
- realw* d_etay,
- realw* d_etaz,
- realw* d_gammax,
- realw* d_gammay,
- realw* d_gammaz,
- realw* hess_kl,
- realw deltat,
- int NSPEC_AB,
- int gravity) {
-
- int ispec = blockIdx.x + blockIdx.y*gridDim.x;
-
- // handles case when there is 1 extra block (due to rectangular grid)
- if(ispec < NSPEC_AB) {
-
- // acoustic elements only
- if( ispec_is_acoustic[ispec] ){
-
- // local and global indices
- int ijk = threadIdx.x;
- int ijk_ispec = ijk + NGLL3*ispec;
- int iglob = ibool[ijk_ispec] - 1 ;
-
- int ijk_ispec_padded = ijk + NGLL3_PADDED*ispec;
-
- realw accel_elm[3];
- realw b_accel_elm[3];
- realw rhol;
-
- // shared memory between all threads within this block
- __shared__ realw scalar_field_accel[NGLL3];
- __shared__ realw scalar_field_b_accel[NGLL3];
-
- // copy field values
- scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob];
- scalar_field_b_accel[ijk] = b_potential_dot_dot_acoustic[iglob];
- __syncthreads();
-
- // gets material parameter
- rhol = rhostore[ijk_ispec_padded];
-
- // acceleration vector
- compute_gradient_kernel(ijk,ispec,
- scalar_field_accel,accel_elm,
- hprime_xx,hprime_yy,hprime_zz,
- d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
- rhol,gravity);
-
- // acceleration vector from backward field
- compute_gradient_kernel(ijk,ispec,
- scalar_field_b_accel,b_accel_elm,
- hprime_xx,hprime_yy,hprime_zz,
- d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
- rhol,gravity);
- // approximates hessian
- hess_kl[ijk_ispec] += deltat * (accel_elm[0]*b_accel_elm[0] +
- accel_elm[1]*b_accel_elm[1] +
- accel_elm[2]*b_accel_elm[2]);
-
- } // ispec_is_acoustic
-
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
extern "C"
void FC_FUNC_(compute_kernels_hess_cuda,
COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
- realw* deltat_f,
- int* ELASTIC_SIMULATION,
- int* ACOUSTIC_SIMULATION) {
+ realw* deltat_f) {
TRACE("compute_kernels_hess_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
@@ -594,7 +509,7 @@
int blocksize = NGLL3; // NGLLX*NGLLY*NGLLZ
realw deltat = *deltat_f;
- int num_blocks_x = mp->NSPEC_AB;
+ int num_blocks_x = mp->NSPEC_CRUST_MANTLE;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
@@ -604,41 +519,13 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- if( *ELASTIC_SIMULATION ) {
- compute_kernels_hess_el_cudakernel<<<grid,threads>>>(mp->d_ispec_is_elastic,
- mp->d_ibool,
- mp->d_accel,
- mp->d_b_accel,
- mp->d_hess_el_kl,
- deltat,
- mp->NSPEC_AB);
- }
+ compute_kernels_hess_cudakernel<<<grid,threads>>>(mp->d_ibool_crust_mantle,
+ mp->d_accel_crust_mantle,
+ mp->d_b_accel_crust_mantle,
+ mp->d_hess_kl_crust_mantle,
+ deltat,
+ mp->NSPEC_CRUST_MANTLE);
- if( *ACOUSTIC_SIMULATION ) {
- compute_kernels_hess_ac_cudakernel<<<grid,threads>>>(mp->d_ispec_is_acoustic,
- mp->d_ibool,
- mp->d_potential_dot_dot_acoustic,
- mp->d_b_potential_dot_dot_acoustic,
- mp->d_rhostore,
- mp->d_hprime_xx,
- mp->d_hprime_yy,
- mp->d_hprime_zz,
- mp->d_xix,
- mp->d_xiy,
- mp->d_xiz,
- mp->d_etax,
- mp->d_etay,
- mp->d_etaz,
- mp->d_gammax,
- mp->d_gammay,
- mp->d_gammaz,
- mp->d_hess_ac_kl,
- deltat,
- mp->NSPEC_AB,
- mp->gravity);
- }
-
-
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("compute_kernels_hess_cuda");
#endif
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -46,12 +46,12 @@
int* abs_boundary_ispec,
int* nkmin_xi, int* nkmin_eta,
int* njmin, int* njmax,
- int* nimin, int* nimax,
+ int* nimin, int* nimax,
realw* abs_boundary_jacobian2D,
realw* wgllwgll,
int* ibool,
realw* vpstore,
- int SIMULATION_TYPE,
+ int SIMULATION_TYPE,
int SAVE_FORWARD,
realw* b_potential_dot_dot_acoustic,
realw* b_absorb_potential) {
@@ -77,73 +77,73 @@
case 4:
// xmin
if( nkmin_xi[INDEX2(2,0,iface)] == 0 || njmin[INDEX2(2,0,iface)] == 0 ) return;
-
+
i = 0; // index -1
k = (igll/NGLLX);
j = (igll-k*NGLLX);
-
+
if( k < nkmin_xi[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
if( j < njmin[INDEX2(2,0,iface)]-1 || j > njmax[INDEX2(2,0,iface)]-1 ) return;
-
+
fac1 = wgllwgll[k*NGLLX+j];
break;
-
+
case 5:
// xmax
if( nkmin_xi[INDEX2(2,1,iface)] == 0 || njmin[INDEX2(2,1,iface)] == 0 ) return;
-
+
i = NGLLX-1;
k = (igll/NGLLX);
j = (igll-k*NGLLX);
-
+
if( k < nkmin_xi[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
if( j < njmin[INDEX2(2,1,iface)]-1 || j > njmax[INDEX2(2,1,iface)]-1 ) return;
-
+
fac1 = wgllwgll[k*NGLLX+j];
break;
-
+
case 6:
// ymin
if( nkmin_eta[INDEX2(2,0,iface)] == 0 || nimin[INDEX2(2,0,iface)] == 0 ) return;
-
+
j = 0;
- k = (igll/NGLLX);
+ k = (igll/NGLLX);
i = (igll-k*NGLLX);
-
+
if( k < nkmin_eta[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
if( i < nimin[INDEX2(2,0,iface)]-1 || i > nimax[INDEX2(2,0,iface)]-1 ) return;
-
+
fac1 = wgllwgll[k*NGLLX+i];
break;
-
+
case 7:
// ymax
if( nkmin_eta[INDEX2(2,1,iface)] == 0 || nimin[INDEX2(2,1,iface)] == 0 ) return;
-
+
j = NGLLX-1;
- k = (igll/NGLLX);
+ k = (igll/NGLLX);
i = (igll-k*NGLLX);
-
+
if( k < nkmin_eta[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
if( i < nimin[INDEX2(2,1,iface)]-1 || i > nimax[INDEX2(2,1,iface)]-1 ) return;
-
+
fac1 = wgllwgll[k*NGLLX+i];
break;
case 8:
- // zmin
+ // zmin
k = 0;
- j = (igll/NGLLX);
+ j = (igll/NGLLX);
i = (igll-j*NGLLX);
-
+
if( j < 0 || j > NGLLX-1 ) return;
if( i < 0 || i > NGLLX-1 ) return;
-
+
fac1 = wgllwgll[j*NGLLX+i];
break;
-
+
}
-
+
iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
// determines bulk sound speed
@@ -183,9 +183,9 @@
realw* d_abs_boundary_jacobian2D;
realw* d_wgllwgll;
realw* d_b_absorb_potential;
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
// absorbing boundary type
int interface_type = *itype;
switch( interface_type ){
@@ -197,7 +197,7 @@
d_b_absorb_potential = mp->d_absorb_xmin_outer_core;
d_wgllwgll = mp->d_wgllwgll_yz;
break;
-
+
case 5:
// xmax
num_abs_boundary_faces = mp->nspec2D_xmax_outer_core;
@@ -206,22 +206,22 @@
d_b_absorb_potential = mp->d_absorb_xmax_outer_core;
d_wgllwgll = mp->d_wgllwgll_yz;
break;
-
+
case 6:
// ymin
num_abs_boundary_faces = mp->nspec2D_ymin_outer_core;
d_abs_boundary_ispec = mp->d_ibelm_ymin_outer_core;
d_abs_boundary_jacobian2D = mp->d_jacobian2D_ymin_outer_core;
- d_b_absorb_potential = mp->d_absorb_ymin_outer_core;
+ d_b_absorb_potential = mp->d_absorb_ymin_outer_core;
d_wgllwgll = mp->d_wgllwgll_xz;
break;
-
+
case 7:
// ymax
num_abs_boundary_faces = mp->nspec2D_ymax_outer_core;
d_abs_boundary_ispec = mp->d_ibelm_ymax_outer_core;
d_abs_boundary_jacobian2D = mp->d_jacobian2D_ymax_outer_core;
- d_b_absorb_potential = mp->d_absorb_ymax_outer_core;
+ d_b_absorb_potential = mp->d_absorb_ymax_outer_core;
d_wgllwgll = mp->d_wgllwgll_xz;
break;
@@ -230,18 +230,18 @@
num_abs_boundary_faces = mp->nspec2D_zmin_outer_core;
d_abs_boundary_ispec = mp->d_ibelm_zmin_outer_core;
d_abs_boundary_jacobian2D = mp->d_jacobian2D_zmin_outer_core;
- d_b_absorb_potential = mp->d_absorb_zmin_outer_core;
+ d_b_absorb_potential = mp->d_absorb_zmin_outer_core;
d_wgllwgll = mp->d_wgllwgll_xy;
break;
-
+
default:
exit_on_cuda_error("compute_stacey_acoustic_cuda: unknown interface type");
break;
}
- // checks if anything to do
+ // checks if anything to do
if( num_abs_boundary_faces == 0 ) return;
-
+
// way 1: Elapsed time: 4.385948e-03
// > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
// int blocksize = 32;
@@ -272,11 +272,11 @@
num_abs_boundary_faces,
d_abs_boundary_ispec,
mp->d_nkmin_xi_outer_core,
- mp->d_nkmin_eta_outer_core,
+ mp->d_nkmin_eta_outer_core,
mp->d_njmin_outer_core,
mp->d_njmax_outer_core,
mp->d_nimin_outer_core,
- mp->d_nimax_outer_core,
+ mp->d_nimax_outer_core,
d_abs_boundary_jacobian2D,
d_wgllwgll,
mp->d_ibool_outer_core,
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -73,71 +73,71 @@
// and don't compute points outside NGLLSQUARE==NGLL2==25
//if(igll < NGLL2 && iface < num_abs_boundary_faces) {
- // way 2: only check face, no further check needed since blocksize = 25
+ // way 2: only check face, no further check needed since blocksize = 25
if( iface < num_abs_boundary_faces){
// "-1" from index values to convert from Fortran-> C indexing
ispec = abs_boundary_ispec[iface]-1;
-
+
// determines indices i,j,k depending on absorbing boundary type
switch( interface_type ){
case 0:
// xmin
if( nkmin_xi[INDEX2(2,0,iface)] == 0 || njmin[INDEX2(2,0,iface)] == 0 ) return;
-
+
i = 0; // index -1
k = (igll/NGLLX);
j = (igll-k*NGLLX);
-
+
if( k < nkmin_xi[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
if( j < njmin[INDEX2(2,0,iface)]-1 || j > NGLLX-1 ) return;
-
+
fac1 = wgllwgll[k*NGLLX+j];
break;
-
+
case 1:
// xmax
if( nkmin_xi[INDEX2(2,1,iface)] == 0 || njmin[INDEX2(2,1,iface)] == 0 ) return;
-
+
i = NGLLX-1;
k = (igll/NGLLX);
j = (igll-k*NGLLX);
-
+
if( k < nkmin_xi[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
if( j < njmin[INDEX2(2,1,iface)]-1 || j > njmax[INDEX2(2,1,iface)]-1 ) return;
fac1 = wgllwgll[k*NGLLX+j];
break;
-
+
case 2:
// ymin
if( nkmin_eta[INDEX2(2,0,iface)] == 0 || nimin[INDEX2(2,0,iface)] == 0 ) return;
-
+
j = 0;
- k = (igll/NGLLX);
+ k = (igll/NGLLX);
i = (igll-k*NGLLX);
-
+
if( k < nkmin_eta[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
if( i < nimin[INDEX2(2,0,iface)]-1 || i > nimax[INDEX2(2,0,iface)]-1 ) return;
-
+
fac1 = wgllwgll[k*NGLLX+i];
break;
case 3:
// ymax
if( nkmin_eta[INDEX2(2,1,iface)] == 0 || nimin[INDEX2(2,1,iface)] == 0 ) return;
-
+
j = NGLLX-1;
- k = (igll/NGLLX);
+ k = (igll/NGLLX);
i = (igll-k*NGLLX);
-
+
if( k < nkmin_eta[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
if( i < nimin[INDEX2(2,1,iface)]-1 || i > nimax[INDEX2(2,1,iface)]-1 ) return;
-
+
fac1 = wgllwgll[k*NGLLX+i];
break;
}
-
+
iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
// gets associated velocity
@@ -176,7 +176,7 @@
b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)] = ty*jacobianw;
b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)] = tz*jacobianw;
} // SIMULATION_TYPE
-
+
} // num_abs_boundary_faces
}
@@ -197,7 +197,7 @@
realw* d_abs_boundary_jacobian2D;
realw* d_wgllwgll;
realw* d_b_absorb_field;
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
// absorbing boundary type
@@ -212,7 +212,7 @@
d_b_absorb_field = mp->d_absorb_xmin_crust_mantle;
d_wgllwgll = mp->d_wgllwgll_yz;
break;
-
+
case 1:
// xmax
num_abs_boundary_faces = mp->nspec2D_xmax_crust_mantle;
@@ -222,33 +222,33 @@
d_b_absorb_field = mp->d_absorb_xmax_crust_mantle;
d_wgllwgll = mp->d_wgllwgll_yz;
break;
-
+
case 2:
// ymin
num_abs_boundary_faces = mp->nspec2D_ymin_crust_mantle;
d_abs_boundary_ispec = mp->d_ibelm_ymin_crust_mantle;
d_abs_boundary_normal = mp->d_normal_ymin_crust_mantle;
d_abs_boundary_jacobian2D = mp->d_jacobian2D_ymin_crust_mantle;
- d_b_absorb_field = mp->d_absorb_ymin_crust_mantle;
+ d_b_absorb_field = mp->d_absorb_ymin_crust_mantle;
d_wgllwgll = mp->d_wgllwgll_xz;
break;
-
+
case 3:
// ymax
num_abs_boundary_faces = mp->nspec2D_ymax_crust_mantle;
d_abs_boundary_ispec = mp->d_ibelm_ymax_crust_mantle;
d_abs_boundary_normal = mp->d_normal_ymax_crust_mantle;
d_abs_boundary_jacobian2D = mp->d_jacobian2D_ymax_crust_mantle;
- d_b_absorb_field = mp->d_absorb_ymax_crust_mantle;
+ d_b_absorb_field = mp->d_absorb_ymax_crust_mantle;
d_wgllwgll = mp->d_wgllwgll_xz;
break;
-
+
default:
exit_on_cuda_error("compute_stacey_elastic_cuda: unknown interface type");
break;
}
-
- // checks if anything to do
+
+ // checks if anything to do
if( num_abs_boundary_faces == 0 ) return;
// way 1
@@ -258,7 +258,7 @@
// way 2: seems sligthly faster
// > NGLLSQUARE==NGLL2==25, no further check inside kernel
int blocksize = NGLL2;
-
+
int num_blocks_x = num_abs_boundary_faces;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
@@ -270,7 +270,7 @@
// adjoint simulations: needs absorbing boundary buffer
if(mp->simulation_type == 3 && num_abs_boundary_faces > 0) {
- // copies array to GPU
+ // copies array to GPU
print_CUDA_error_if_any(cudaMemcpy(d_b_absorb_field,absorb_field,
NDIM*NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyHostToDevice),7700);
}
@@ -279,14 +279,14 @@
compute_stacey_elastic_kernel<<<grid,threads>>>(mp->d_veloc_crust_mantle,
mp->d_accel_crust_mantle,
interface_type,
- num_abs_boundary_faces,
+ num_abs_boundary_faces,
d_abs_boundary_ispec,
mp->d_nkmin_xi_crust_mantle,
- mp->d_nkmin_eta_crust_mantle,
+ mp->d_nkmin_eta_crust_mantle,
mp->d_njmin_crust_mantle,
mp->d_njmax_crust_mantle,
mp->d_nimin_crust_mantle,
- mp->d_nimax_crust_mantle,
+ mp->d_nimax_crust_mantle,
d_abs_boundary_normal,
d_abs_boundary_jacobian2D,
d_wgllwgll,
@@ -301,7 +301,7 @@
// adjoint simulations: stores absorbed wavefield part
if(mp->simulation_type == 1 && mp->save_forward && num_abs_boundary_faces > 0 ) {
- // copies array to CPU
+ // copies array to CPU
print_CUDA_error_if_any(cudaMemcpy(absorb_field,d_b_absorb_field,
NDIM*NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyDeviceToHost),7701);
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -28,16 +28,18 @@
#include <stdio.h>
#include <cuda.h>
-#include <cublas.h>
+//#include <cublas.h>
+
#include "config.h"
#include "mesh_constants_cuda.h"
-#define CUBLAS_ERROR(s,n) if (s != CUBLAS_STATUS_SUCCESS) { \
-fprintf (stderr, "CUBLAS Memory Write Error @ %d\n",n); \
-exit(EXIT_FAILURE); }
+//#define CUBLAS_ERROR(s,n) if (s != CUBLAS_STATUS_SUCCESS) { \
+//fprintf (stderr, "CUBLAS Memory Write Error @ %d\n",n); \
+//exit(EXIT_FAILURE); }
+
/* ----------------------------------------------------------------------------------------------- */
// elastic wavefield
@@ -66,32 +68,30 @@
/* ----------------------------------------------------------------------------------------------- */
+// KERNEL 1
+// inner core
+
+/* ----------------------------------------------------------------------------------------------- */
+
extern "C"
-void FC_FUNC_(it_update_displacement_cuda,
- IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f,
- int* size_F,
- realw* deltat_F,
- realw* deltatsqover2_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F) {
+void FC_FUNC_(it_update_displacement_ic_cuda,
+ IT_UPDATE_DISPLACMENT_IC_CUDA)(long* Mesh_pointer_f,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {
-TRACE("it_update_displacement_cuda");
+TRACE("it_update_displacement_ic_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
- //int i,device;
+ int size = NDIM * mp->NGLOB_INNER_CORE;
- int size = *size_F;
realw deltat = *deltat_F;
realw deltatsqover2 = *deltatsqover2_F;
realw deltatover2 = *deltatover2_F;
- realw b_deltat = *b_deltat_F;
- realw b_deltatsqover2 = *b_deltatsqover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
- //cublasStatus status;
int blocksize = BLOCKSIZE_KERNEL1;
int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
@@ -106,39 +106,93 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
+ //launch kernel
+ UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
+ mp->d_veloc_inner_core,
+ mp->d_accel_inner_core,
+ size,deltat,deltatsqover2,deltatover2);
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-// exit_on_cuda_error("Before UpdateDispVeloc_kernel");
-//#endif
+ // kernel for backward fields
+ if(mp->simulation_type == 3) {
+ realw b_deltat = *b_deltat_F;
+ realw b_deltatsqover2 = *b_deltatsqover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+ UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_b_displ_inner_core,
+ mp->d_b_veloc_inner_core,
+ mp->d_b_accel_inner_core,
+ size,b_deltat,b_deltatsqover2,b_deltatover2);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("it_update_displacement_ic_cuda");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// KERNEL 1
+// crust/mantle
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(it_update_displacement_cm_cuda,
+ IT_UPDATE_DISPLACMENT_CM_CUDA)(long* Mesh_pointer_f,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {
+
+ TRACE("it_update_displacement_cm_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ int size = NDIM * mp->NGLOB_CRUST_MANTLE;
+
+ realw deltat = *deltat_F;
+ realw deltatsqover2 = *deltatsqover2_F;
+ realw deltatover2 = *deltatover2_F;
+
+ int blocksize = BLOCKSIZE_KERNEL1;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
//launch kernel
- UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ,mp->d_veloc,mp->d_accel,
+ UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
+ mp->d_veloc_crust_mantle,
+ mp->d_accel_crust_mantle,
size,deltat,deltatsqover2,deltatover2);
- //cudaThreadSynchronize();
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-// //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
-// // sync and check to catch errors from previous async operations
-// exit_on_cuda_error("UpdateDispVeloc_kernel");
-//#endif
-
// kernel for backward fields
- if(*SIMULATION_TYPE == 3) {
+ if(mp->simulation_type == 3) {
+ realw b_deltat = *b_deltat_F;
+ realw b_deltatsqover2 = *b_deltatsqover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
- UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_b_displ,mp->d_b_veloc,mp->d_b_accel,
+ UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_b_displ_crust_mantle,
+ mp->d_b_veloc_crust_mantle,
+ mp->d_b_accel_crust_mantle,
size,b_deltat,b_deltatsqover2,b_deltatover2);
-
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-// //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
-// exit_on_cuda_error("after SIM_TYPE==3 UpdateDispVeloc_kernel");
-//#endif
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("it_update_displacement_cuda");
+ exit_on_cuda_error("it_update_displacement_cm_cuda");
#endif
}
+
/* ----------------------------------------------------------------------------------------------- */
// acoustic wavefield
@@ -171,29 +225,30 @@
/* ----------------------------------------------------------------------------------------------- */
+// KERNEL 1
+// outer core
+
+/* ----------------------------------------------------------------------------------------------- */
+
extern "C"
-void FC_FUNC_(it_update_displacement_ac_cuda,
- it_update_displacement_ac_cuda)(long* Mesh_pointer_f,
- int* size_F,
+void FC_FUNC_(it_update_displacement_oc_cuda,
+ IT_UPDATE_DISPLACEMENT_OC_cuda)(long* Mesh_pointer_f,
realw* deltat_F,
realw* deltatsqover2_F,
realw* deltatover2_F,
- int* SIMULATION_TYPE,
realw* b_deltat_F,
realw* b_deltatsqover2_F,
realw* b_deltatover2_F) {
-TRACE("it_update_displacement_ac_cuda");
+
+ TRACE("it_update_displacement_oc_cuda");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
- //int i,device;
- int size = *size_F;
+ int size = mp->NGLOB_OUTER_CORE;
+
realw deltat = *deltat_F;
realw deltatsqover2 = *deltatsqover2_F;
realw deltatover2 = *deltatover2_F;
- realw b_deltat = *b_deltat_F;
- realw b_deltatsqover2 = *b_deltatsqover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
- //cublasStatus status;
int blocksize = BLOCKSIZE_KERNEL1;
int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
@@ -209,22 +264,24 @@
dim3 threads(blocksize,1,1);
//launch kernel
- UpdatePotential_kernel<<<grid,threads>>>(mp->d_potential_acoustic,
- mp->d_potential_dot_acoustic,
- mp->d_potential_dot_dot_acoustic,
+ UpdatePotential_kernel<<<grid,threads>>>(mp->d_displ_outer_core,
+ mp->d_veloc_outer_core,
+ mp->d_accel_outer_core,
size,deltat,deltatsqover2,deltatover2);
- if(*SIMULATION_TYPE == 3) {
- UpdatePotential_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic,
- mp->d_b_potential_dot_acoustic,
- mp->d_b_potential_dot_dot_acoustic,
+ if(mp->simulation_type == 3) {
+ realw b_deltat = *b_deltat_F;
+ realw b_deltatsqover2 = *b_deltatsqover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+
+ UpdatePotential_kernel<<<grid,threads>>>(mp->d_b_displ_outer_core,
+ mp->d_b_veloc_outer_core,
+ mp->d_b_accel_outer_core,
size,b_deltat,b_deltatsqover2,b_deltatover2);
}
- //cudaThreadSynchronize();
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- exit_on_cuda_error("it_update_displacement_ac_cuda");
+ exit_on_cuda_error("it_update_displacement_oc_cuda");
#endif
}
@@ -301,7 +358,7 @@
TRACE("kernel_3_a_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
-
+
int SIMULATION_TYPE = *SIMULATION_TYPE_f;
realw deltatover2 = *deltatover2_F;
realw b_deltatover2 = *b_deltatover2_F;
@@ -323,26 +380,26 @@
// check whether we can update accel and veloc, or only accel at this point
if( *OCEANS == 0 ){
// updates both, accel and veloc
- kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_crust_mantle,
- mp->d_accel_crust_mantle,
- mp->NGLOB_CRUST_MANTLE,
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_crust_mantle,
+ mp->d_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
deltatover2, mp->d_rmass_crust_mantle);
if(SIMULATION_TYPE == 3) {
- kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc_crust_mantle,
- mp->d_b_accel_crust_mantle,
- mp->NGLOB_CRUST_MANTLE,
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc_crust_mantle,
+ mp->d_b_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
b_deltatover2,mp->d_rmass_crust_mantle);
}
}else{
// updates only accel
- kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel_crust_mantle,
- mp->NGLOB_CRUST_MANTLE,
+ kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
mp->d_rmass_crust_mantle);
if(SIMULATION_TYPE == 3) {
- kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_b_accel_crust_mantle,
- mp->NGLOB_CRUST_MANTLE,
+ kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_b_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
mp->d_rmass_crust_mantle);
}
}
@@ -372,7 +429,7 @@
realw b_deltatover2 = *b_deltatover2_F;
int blocksize = BLOCKSIZE_KERNEL3;
-
+
// crust/mantle region
// in case of ocean loads, we still have to update the velocity for crust/mantle region
if( *OCEANS ){
@@ -383,24 +440,24 @@
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
+ dim3 grid1(num_blocks_x,num_blocks_y);
+ dim3 threads1(blocksize,1,1);
+
// updates only veloc at this point
- kernel_3_veloc_cuda_device<<< grid, threads>>>(mp->d_veloc_crust_mantle,
+ kernel_3_veloc_cuda_device<<< grid1, threads1>>>(mp->d_veloc_crust_mantle,
mp->d_accel_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
deltatover2);
if(SIMULATION_TYPE == 3) {
- kernel_3_veloc_cuda_device<<< grid, threads>>>(mp->d_b_veloc_crust_mantle,
+ kernel_3_veloc_cuda_device<<< grid1, threads1>>>(mp->d_b_veloc_crust_mantle,
mp->d_b_accel_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
b_deltatover2);
}
}
-
- // inner core
+
+ // inner core
size_padded = ((int)ceil(((double)mp->NGLOB_INNER_CORE)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
@@ -410,19 +467,19 @@
}
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
// updates both, accel and veloc
- kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_inner_core,
- mp->d_accel_inner_core,
- mp->NGLOB_INNER_CORE,
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_inner_core,
+ mp->d_accel_inner_core,
+ mp->NGLOB_INNER_CORE,
deltatover2, mp->d_rmass_inner_core);
-
+
if(SIMULATION_TYPE == 3) {
- kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc_inner_core,
- mp->d_b_accel_inner_core,
- mp->NGLOB_INNER_CORE,
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc_inner_core,
+ mp->d_b_accel_inner_core,
+ mp->NGLOB_INNER_CORE,
b_deltatover2,mp->d_rmass_inner_core);
- }
+ }
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -451,9 +508,9 @@
if(id < size) {
// multiplies pressure with the inverse of the mass matrix
accel[id] = accel[id]*rmass[id];
-
+
// Newmark time scheme: corrector term
- veloc[id] = veloc[id] + deltatover2*accel[id];
+ veloc[id] = veloc[id] + deltatover2*accel[id];
}
}
@@ -474,7 +531,7 @@
int SIMULATION_TYPE = *SIMULATION_TYPE_f;
realw deltatover2 = *deltatover2_F;
realw b_deltatover2 = *b_deltatover2_F;
-
+
int blocksize = BLOCKSIZE_KERNEL3;
int size_padded = ((int)ceil(((double)mp->NGLOB_OUTER_CORE)/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-02-22 04:38:32 UTC (rev 19659)
@@ -249,6 +249,9 @@
realw* d_eps_trace_over_3_crust_mantle;
realw* d_b_eps_trace_over_3_crust_mantle;
+ // kernels
+ realw* d_hess_kl_crust_mantle;
+
// inner / outer elements
int* d_phase_ispec_inner_crust_mantle;
int num_phase_ispec_crust_mantle;
@@ -402,7 +405,7 @@
// simulation type: 1 = forward, 2 = adjoint, 3 = kernel
int simulation_type;
-
+
// mesh coloring flag
int use_mesh_coloring_gpu;
@@ -417,7 +420,8 @@
int rotation;
int anisotropic_inner_core;
int save_boundary_mesh;
-
+ int approximate_hess_kl;
+
// ------------------------------------------------------------------ //
// gravity
// ------------------------------------------------------------------ //
@@ -490,60 +494,60 @@
int* d_nibool_interfaces_outer_core;
int* d_ibool_interfaces_outer_core;
realw* d_send_accel_buffer_outer_core;
-
+
// ------------------------------------------------------------------ //
// absorbing boundaries
// ------------------------------------------------------------------ //
-
+
int nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle;
- int nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle;
+ int nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle;
int* d_nimin_crust_mantle, *d_nimax_crust_mantle;
int* d_njmin_crust_mantle, *d_njmax_crust_mantle;
int* d_nkmin_xi_crust_mantle, *d_nkmin_eta_crust_mantle;
int* d_ibelm_xmin_crust_mantle, *d_ibelm_xmax_crust_mantle;
- int* d_ibelm_ymin_crust_mantle, *d_ibelm_ymax_crust_mantle;
-
+ int* d_ibelm_ymin_crust_mantle, *d_ibelm_ymax_crust_mantle;
+
realw* d_normal_xmin_crust_mantle, *d_normal_xmax_crust_mantle;
realw* d_normal_ymin_crust_mantle, *d_normal_ymax_crust_mantle;
-
+
realw* d_jacobian2D_xmin_crust_mantle, *d_jacobian2D_xmax_crust_mantle;
realw* d_jacobian2D_ymin_crust_mantle, *d_jacobian2D_ymax_crust_mantle;
-
+
realw* d_absorb_xmin_crust_mantle, *d_absorb_xmax_crust_mantle;
realw* d_absorb_ymin_crust_mantle, *d_absorb_ymax_crust_mantle;
-
+
realw* d_rho_vp_crust_mantle;
realw* d_rho_vs_crust_mantle;
int nspec2D_xmin_outer_core,nspec2D_xmax_outer_core;
- int nspec2D_ymin_outer_core,nspec2D_ymax_outer_core;
+ int nspec2D_ymin_outer_core,nspec2D_ymax_outer_core;
int nspec2D_zmin_outer_core;
-
+
int* d_nimin_outer_core, *d_nimax_outer_core;
int* d_njmin_outer_core, *d_njmax_outer_core;
int* d_nkmin_xi_outer_core, *d_nkmin_eta_outer_core;
-
+
int* d_ibelm_xmin_outer_core, *d_ibelm_xmax_outer_core;
- int* d_ibelm_ymin_outer_core, *d_ibelm_ymax_outer_core;
+ int* d_ibelm_ymin_outer_core, *d_ibelm_ymax_outer_core;
int* d_ibelm_zmin_outer_core;
-
+
realw* d_jacobian2D_xmin_outer_core, *d_jacobian2D_xmax_outer_core;
realw* d_jacobian2D_ymin_outer_core, *d_jacobian2D_ymax_outer_core;
realw* d_jacobian2D_zmin_outer_core;
-
+
realw* d_absorb_xmin_outer_core, *d_absorb_xmax_outer_core;
realw* d_absorb_ymin_outer_core, *d_absorb_ymax_outer_core;
realw* d_absorb_zmin_outer_core;
-
+
realw* d_vp_outer_core;
-
+
// ------------------------------------------------------------------ //
// noise tomography
// ------------------------------------------------------------------ //
int noise_tomography;
-
+
int nspec_top;
int* d_ibelm_top_crust_mantle;
@@ -558,7 +562,7 @@
// noise sensitivity kernel
realw* d_Sigma_kl;
-
+
// ------------------------------------------------------------------ //
//daniel: TODO - former code...
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -193,10 +193,10 @@
int tx = threadIdx.x;
int ispec = ispec_selected_rec[irec_master_noise]-1;
int iglob = ibool[tx + NGLL3*ispec]-1;
-
+
atomicAdd(&accel[iglob*3 ],noise_sourcearray[ 3*tx + 3*NGLL3*it]);
atomicAdd(&accel[iglob*3+1],noise_sourcearray[1+3*tx + 3*NGLL3*it]);
- atomicAdd(&accel[iglob*3+2],noise_sourcearray[2+3*tx + 3*NGLL3*it]);
+ atomicAdd(&accel[iglob*3+2],noise_sourcearray[2+3*tx + 3*NGLL3*it]);
}
/* ----------------------------------------------------------------------------------------------- */
@@ -207,18 +207,18 @@
int* it_f,
int* irec_master_noise_f,
int* islice_selected_rec) {
-
+
TRACE("noise_add_source_master_rec_cu");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
int it = *it_f - 1; // -1 for Fortran -> C indexing differences
int irec_master_noise = *irec_master_noise_f-1;
-
+
dim3 grid(1,1,1);
dim3 threads(NGLL3,1,1);
- // adds noise source at master location
+ // adds noise source at master location
if(mp->myrank == islice_selected_rec[irec_master_noise]) {
noise_add_source_master_rec_cuda_kernel<<<grid,threads>>>(mp->d_ibool_crust_mantle,
mp->d_ispec_selected_rec,
@@ -227,10 +227,10 @@
mp->d_noise_sourcearray,
it);
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("noise_add_source_master_rec_cuda_kernel");
-#endif
+#endif
}
/* ----------------------------------------------------------------------------------------------- */
@@ -239,7 +239,7 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void noise_add_surface_movie_cuda_kernel(realw* accel,
+__global__ void noise_add_surface_movie_cuda_kernel(realw* accel,
int* ibool,
int* ibelm_top,
int nspec_top,
@@ -257,8 +257,8 @@
// when nspec_top > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
if(iface < nspec_top) {
- int ispec = ibelm_top[iface]-1;
-
+ int ispec = ibelm_top[iface]-1;
+
int k = NGLLX - 1;
int j = (igll/NGLLX);
int i = (igll-j*NGLLX);
@@ -276,7 +276,7 @@
// weighted jacobian
realw jacobianw = wgllwgll[k*NGLLX+i]*jacobian2D[igll+NGLL2*iface];
-
+
// note: check error from cuda-memcheck and ddt seems "incorrect", because we
// are passing a __constant__ variable pointer around like it was
// made using cudaMalloc, which *may* be "incorrect", but produces
@@ -298,7 +298,7 @@
void FC_FUNC_(noise_add_surface_movie_cuda,
NOISE_ADD_SURFACE_MOVIE_CUDA)(long* Mesh_pointer_f,
realw* h_noise_surface_movie) {
-
+
TRACE("noise_add_surface_movie_cuda");
@@ -318,7 +318,7 @@
NDIM*NGLL2*(mp->nspec_top)*sizeof(realw),cudaMemcpyHostToDevice);
switch(mp->noise_tomography) {
- case 2:
+ case 2:
// adds surface source to forward field
noise_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_accel_crust_mantle,
mp->d_ibool_crust_mantle,
@@ -332,8 +332,8 @@
mp->d_jacobian2D_top_crust_mantle,
mp->d_wgllwgll_xy);
break;
-
- case 3:
+
+ case 3:
// adds surface source to adjoint (backward) field
noise_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
mp->d_ibool_crust_mantle,
@@ -343,7 +343,7 @@
mp->d_normal_x_noise,
mp->d_normal_y_noise,
mp->d_normal_z_noise,
- mp->d_mask_noise,
+ mp->d_mask_noise,
mp->d_jacobian2D_top_crust_mantle,
mp->d_wgllwgll_xy);
break;
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -183,7 +183,7 @@
extern "C"
void FC_FUNC_(prepare_constants_device,
- PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
+ PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
int* myrank_f,
int* h_NGLLX,
realw* h_hprime_xx,realw* h_hprime_yy,realw* h_hprime_zz,
@@ -210,7 +210,8 @@
int* ANISOTROPIC_3D_MANTLE_f,
int* ANISOTROPIC_INNER_CORE_f,
int* SAVE_BOUNDARY_MESH_f,
- int* USE_MESH_COLORING_GPU_f) {
+ int* USE_MESH_COLORING_GPU_f,
+ int* APPROXIMATE_HESS_KL_f) {
TRACE("prepare_constants_device");
@@ -246,7 +247,7 @@
// simulation type
mp->simulation_type = *SIMULATION_TYPE;
mp->noise_tomography = *NOISE_TOMOGRAPHY;
-
+
// simulation flags initialization
mp->save_forward = *SAVE_FORWARD_f;
mp->absorbing_conditions = *ABSORBING_CONDITIONS_f;
@@ -258,7 +259,9 @@
mp->anisotropic_3D_mantle = *ANISOTROPIC_3D_MANTLE_f;
mp->anisotropic_inner_core = *ANISOTROPIC_INNER_CORE_f;
mp->save_boundary_mesh = *SAVE_BOUNDARY_MESH_f;
+ mp->approximate_hess_kl = *APPROXIMATE_HESS_KL_f;
+ // mpi process rank
mp->myrank = *myrank_f;
// mesh coloring flag
@@ -305,28 +308,36 @@
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_number_receiver_global),mp->nrec_local*sizeof(int)),1);
print_CUDA_error_if_any(cudaMemcpy(mp->d_number_receiver_global,h_number_receiver_global,
mp->nrec_local*sizeof(int),cudaMemcpyHostToDevice),1512);
+
+ // for seismograms
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),
+ 3*NGLL3*(mp->nrec_local)*sizeof(realw)),4015);
+
+ mp->h_station_seismo_field = (realw*) malloc( 3*NGLL3*(mp->nrec_local)*sizeof(realw) );
+ if( mp->h_station_seismo_field == NULL) exit_on_error("h_station_seismo_field not allocated \n");
+
}
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_selected_rec),(*nrec)*sizeof(int)),1513);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_rec,h_ispec_selected_rec,
(*nrec)*sizeof(int),cudaMemcpyHostToDevice),1514);
-
+
// receiver adjoint source arrays only used for noise and adjoint simulations
// adjoint source arrays
mp->nadj_rec_local = *nadj_rec_local;
if( mp->nadj_rec_local > 0 ){
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_adj_sourcearrays,
(mp->nadj_rec_local)*3*NGLL3*sizeof(realw)),6003);
-
+
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_pre_computed_irec,
(mp->nadj_rec_local)*sizeof(int)),6004);
-
+
// prepares local irec array:
// the irec_local variable needs to be precomputed (as
// h_pre_comp..), because normally it is in the loop updating accel,
// and due to how it's incremented, it cannot be parallelized
int* h_pre_computed_irec = (int*) malloc( (mp->nadj_rec_local)*sizeof(int) );
if( h_pre_computed_irec == NULL ) exit_on_error("h_pre_computed_irec not allocated\n");
-
+
int irec_local = 0;
for(int irec = 0; irec < *nrec; irec++) {
if(mp->myrank == h_islice_selected_rec[irec]) {
@@ -339,12 +350,12 @@
print_CUDA_error_if_any(cudaMemcpy(mp->d_pre_computed_irec,h_pre_computed_irec,
(mp->nadj_rec_local)*sizeof(int),cudaMemcpyHostToDevice),6010);
free(h_pre_computed_irec);
-
+
// temporary array to prepare extracted source array values
mp->h_adj_sourcearrays_slice = (realw*) malloc( (mp->nadj_rec_local)*3*NGLL3*sizeof(realw) );
- if( mp->h_adj_sourcearrays_slice == NULL ) exit_on_error("h_adj_sourcearrays_slice not allocated\n");
+ if( mp->h_adj_sourcearrays_slice == NULL ) exit_on_error("h_adj_sourcearrays_slice not allocated\n");
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("prepare_constants_device");
#endif
@@ -390,7 +401,7 @@
NGLL3*(*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw)),9002);
print_CUDA_error_if_any(cudaMemcpy(mp->d_B_array_rotation, B_array_rotation,
NGLL3*(*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw),cudaMemcpyHostToDevice),9003);
-
+
// backward/reconstructed fields
if( mp->simulation_type == 3 ){
mp->d_b_two_omega_earth = *b_two_omega_earth;
@@ -659,7 +670,7 @@
) {
TRACE("prepare_fields_strain_device");
- int R_size,R_size2;
+ int R_size;
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
@@ -668,8 +679,6 @@
// crust_mantle
R_size = NGLL3*mp->NSPEC_CRUST_MANTLE;
- R_size2 = NGLL3*mp->NSPEC_CRUST_MANTLE;
-
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xx_crust_mantle,
R_size*sizeof(realw)),4432);
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_yy_crust_mantle,
@@ -693,9 +702,9 @@
R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_eps_trace_over_3_crust_mantle,
- R_size2*sizeof(realw)),4401);
+ R_size*sizeof(realw)),4401);
print_CUDA_error_if_any(cudaMemcpy(mp->d_eps_trace_over_3_crust_mantle,eps_trace_over_3_crust_mantle,
- R_size2*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4402);
// backward/reconstructed fields
if( mp->simulation_type == 3 ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xx_crust_mantle,
@@ -722,15 +731,13 @@
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_eps_trace_over_3_crust_mantle,
- R_size2*sizeof(realw)),4401);
+ R_size*sizeof(realw)),4401);
print_CUDA_error_if_any(cudaMemcpy(mp->d_b_eps_trace_over_3_crust_mantle,b_eps_trace_over_3_crust_mantle,
- R_size2*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4402);
}
// inner_core
R_size = NGLL3*mp->NSPEC_INNER_CORE;
- R_size2 = NGLL3*mp->NSPEC_INNER_CORE;
-
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xx_inner_core,
R_size*sizeof(realw)),4432);
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_yy_inner_core,
@@ -755,9 +762,9 @@
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_eps_trace_over_3_inner_core,
- R_size2*sizeof(realw)),4401);
+ R_size*sizeof(realw)),4401);
print_CUDA_error_if_any(cudaMemcpy(mp->d_eps_trace_over_3_inner_core,eps_trace_over_3_inner_core,
- R_size2*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4402);
// backward/reconstructed fields
if( mp->simulation_type == 3 ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xx_inner_core,
@@ -784,9 +791,9 @@
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_eps_trace_over_3_inner_core,
- R_size2*sizeof(realw)),4401);
+ R_size*sizeof(realw)),4401);
print_CUDA_error_if_any(cudaMemcpy(mp->d_b_eps_trace_over_3_inner_core,b_eps_trace_over_3_inner_core,
- R_size2*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4402);
}
}
@@ -804,11 +811,11 @@
int* NSPEC2DMAX_XMIN_XMAX_CM,int* NSPEC2DMAX_YMIN_YMAX_CM,
int* nimin_crust_mantle,int* nimax_crust_mantle,
int* njmin_crust_mantle,int* njmax_crust_mantle,
- int* nkmin_xi_crust_mantle,int* nkmin_eta_crust_mantle,
+ int* nkmin_xi_crust_mantle,int* nkmin_eta_crust_mantle,
int* ibelm_xmin_crust_mantle,int* ibelm_xmax_crust_mantle,
- int* ibelm_ymin_crust_mantle,int* ibelm_ymax_crust_mantle,
+ int* ibelm_ymin_crust_mantle,int* ibelm_ymax_crust_mantle,
realw* normal_xmin_crust_mantle,realw* normal_xmax_crust_mantle,
- realw* normal_ymin_crust_mantle,realw* normal_ymax_crust_mantle,
+ realw* normal_ymin_crust_mantle,realw* normal_ymax_crust_mantle,
realw* jacobian2D_xmin_crust_mantle, realw* jacobian2D_xmax_crust_mantle,
realw* jacobian2D_ymin_crust_mantle, realw* jacobian2D_ymax_crust_mantle,
realw* rho_vp_crust_mantle,
@@ -819,31 +826,31 @@
int* NSPEC2DMAX_XMIN_XMAX_OC,int* NSPEC2DMAX_YMIN_YMAX_OC,
int* nimin_outer_core,int* nimax_outer_core,
int* njmin_outer_core,int* njmax_outer_core,
- int* nkmin_xi_outer_core,int* nkmin_eta_outer_core,
+ int* nkmin_xi_outer_core,int* nkmin_eta_outer_core,
int* ibelm_xmin_outer_core,int* ibelm_xmax_outer_core,
int* ibelm_ymin_outer_core,int* ibelm_ymax_outer_core,
int* ibelm_bottom_outer_core,
realw* jacobian2D_xmin_outer_core, realw* jacobian2D_xmax_outer_core,
realw* jacobian2D_ymin_outer_core, realw* jacobian2D_ymax_outer_core,
- realw* jacobian2D_bottom_outer_core,
+ realw* jacobian2D_bottom_outer_core,
realw* vp_outer_core
) {
-
+
TRACE("prepare_fields_absorb_device");
int size;
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-
+
// checks flag
if( ! mp->absorbing_conditions ){ exit_on_cuda_error("prepare_fields_absorb_device absorbing_conditions not properly initialized"); }
-
+
// crust_mantle
mp->nspec2D_xmin_crust_mantle = *nspec2D_xmin_crust_mantle;
mp->nspec2D_xmax_crust_mantle = *nspec2D_xmax_crust_mantle;
mp->nspec2D_ymin_crust_mantle = *nspec2D_ymin_crust_mantle;
mp->nspec2D_ymax_crust_mantle = *nspec2D_ymax_crust_mantle;
- // vp & vs
+ // vp & vs
size = NGLL3*(mp->NSPEC_CRUST_MANTLE);
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rho_vp_crust_mantle,
size*sizeof(realw)),2201);
@@ -865,12 +872,12 @@
2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_eta_crust_mantle,nkmin_eta_crust_mantle,
2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmin_crust_mantle,
2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_njmin_crust_mantle,njmin_crust_mantle,
2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmax_crust_mantle,
2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_njmax_crust_mantle,njmax_crust_mantle,
@@ -885,15 +892,15 @@
2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_nimax_crust_mantle,nimax_crust_mantle,
2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
-
- // xmin
+
+
+ // xmin
if( mp->nspec2D_xmin_crust_mantle > 0 ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_xmin_crust_mantle,
(mp->nspec2D_xmin_crust_mantle)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmin_crust_mantle,ibelm_xmin_crust_mantle,
(mp->nspec2D_xmin_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_xmin_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_xmin_crust_mantle,normal_xmin_crust_mantle,
@@ -903,12 +910,12 @@
NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmin_crust_mantle,jacobian2D_xmin_crust_mantle,
NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
// boundary buffer
- if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmin_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw)),1202);
- }
+ }
}
// xmax
@@ -917,22 +924,22 @@
(mp->nspec2D_xmax_crust_mantle)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmax_crust_mantle,ibelm_xmax_crust_mantle,
(mp->nspec2D_xmax_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_xmax_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_xmax_crust_mantle,normal_xmax_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmax_crust_mantle,
NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmax_crust_mantle,jacobian2D_xmax_crust_mantle,
NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
// boundary buffer
- if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmax_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw)),1202);
- }
+ }
}
// ymin
@@ -941,22 +948,22 @@
(mp->nspec2D_ymin_crust_mantle)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymin_crust_mantle,ibelm_ymin_crust_mantle,
(mp->nspec2D_ymin_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_ymin_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_ymin_crust_mantle,normal_ymin_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymin_crust_mantle,
NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymin_crust_mantle,jacobian2D_ymin_crust_mantle,
NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
// boundary buffer
- if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymin_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw)),1202);
- }
+ }
}
// ymax
@@ -965,87 +972,87 @@
(mp->nspec2D_ymax_crust_mantle)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymax_crust_mantle,ibelm_ymax_crust_mantle,
(mp->nspec2D_ymax_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_ymax_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_ymax_crust_mantle,normal_ymax_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymax_crust_mantle,
NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymax_crust_mantle,jacobian2D_ymax_crust_mantle,
NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
// boundary buffer
- if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymax_crust_mantle,
NDIM*NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw)),1202);
- }
+ }
}
-
+
// outer_core
mp->nspec2D_xmin_outer_core = *nspec2D_xmin_outer_core;
mp->nspec2D_xmax_outer_core = *nspec2D_xmax_outer_core;
mp->nspec2D_ymin_outer_core = *nspec2D_ymin_outer_core;
mp->nspec2D_ymax_outer_core = *nspec2D_ymax_outer_core;
mp->nspec2D_zmin_outer_core = *nspec2D_zmin_outer_core;
-
- // vp
+
+ // vp
size = NGLL3*(mp->NSPEC_OUTER_CORE);
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_vp_outer_core,
size*sizeof(realw)),2201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_vp_outer_core,vp_outer_core,
size*sizeof(realw),cudaMemcpyHostToDevice),2202);
-
+
// ijk index arrays
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nkmin_xi_outer_core,
2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_xi_outer_core,nkmin_xi_outer_core,
2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nkmin_eta_outer_core,
2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_eta_outer_core,nkmin_eta_outer_core,
2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmin_outer_core,
2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_njmin_outer_core,njmin_outer_core,
2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmax_outer_core,
2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_njmax_outer_core,njmax_outer_core,
2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nimin_outer_core,
2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_nimin_outer_core,nimin_outer_core,
2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nimax_outer_core,
2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_nimax_outer_core,nimax_outer_core,
2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- // xmin
+
+ // xmin
if( mp->nspec2D_xmin_outer_core > 0 ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_xmin_outer_core,
(mp->nspec2D_xmin_outer_core)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmin_outer_core,ibelm_xmin_outer_core,
(mp->nspec2D_xmin_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmin_outer_core,
NGLL2*(mp->nspec2D_xmin_outer_core)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmin_outer_core,jacobian2D_xmin_outer_core,
NGLL2*(mp->nspec2D_xmin_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
// boundary buffer
- if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmin_outer_core,
NGLL2*(mp->nspec2D_xmin_outer_core)*sizeof(realw)),1202);
- }
+ }
}
// xmax
@@ -1054,55 +1061,55 @@
(mp->nspec2D_xmax_outer_core)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmax_outer_core,ibelm_xmax_outer_core,
(mp->nspec2D_xmax_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmax_outer_core,
NGLL2*(mp->nspec2D_xmax_outer_core)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmax_outer_core,jacobian2D_xmax_outer_core,
NGLL2*(mp->nspec2D_xmax_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
// boundary buffer
- if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmax_outer_core,
NGLL2*(mp->nspec2D_xmax_outer_core)*sizeof(realw)),1202);
- }
+ }
}
-
+
// ymin
if( mp->nspec2D_ymin_outer_core > 0 ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_ymin_outer_core,
(mp->nspec2D_ymin_outer_core)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymin_outer_core,ibelm_ymin_outer_core,
(mp->nspec2D_ymin_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymin_outer_core,
NGLL2*(mp->nspec2D_ymin_outer_core)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymin_outer_core,jacobian2D_ymin_outer_core,
NGLL2*(mp->nspec2D_ymin_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
// boundary buffer
- if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymin_outer_core,
NGLL2*(mp->nspec2D_ymin_outer_core)*sizeof(realw)),1202);
- }
+ }
}
-
+
// ymax
if( mp->nspec2D_ymax_outer_core > 0 ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_ymax_outer_core,
(mp->nspec2D_ymax_outer_core)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymax_outer_core,ibelm_ymax_outer_core,
(mp->nspec2D_ymax_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymax_outer_core,
NGLL2*(mp->nspec2D_ymax_outer_core)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymax_outer_core,jacobian2D_ymax_outer_core,
NGLL2*(mp->nspec2D_ymax_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
// boundary buffer
- if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymax_outer_core,
NGLL2*(mp->nspec2D_ymax_outer_core)*sizeof(realw)),1202);
- }
+ }
}
// zmin
@@ -1111,21 +1118,21 @@
(mp->nspec2D_zmin_outer_core)*sizeof(int)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_zmin_outer_core,ibelm_bottom_outer_core,
(mp->nspec2D_zmin_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
+
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_zmin_outer_core,
NGLL2*(mp->nspec2D_zmin_outer_core)*sizeof(realw)),1201);
print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_zmin_outer_core,jacobian2D_bottom_outer_core,
NGLL2*(mp->nspec2D_zmin_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+
// boundary buffer
- if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_zmin_outer_core,
NGLL2*(mp->nspec2D_zmin_outer_core)*sizeof(realw)),1202);
- }
+ }
}
-
-}
+}
+
/* ----------------------------------------------------------------------------------------------- */
// MPI interfaces
@@ -1134,7 +1141,7 @@
extern "C"
void FC_FUNC_(prepare_mpi_buffers_device,
- PREPARE_MPI_BUFFERS_DEVICE)(long* Mesh_pointer_f,
+ PREPARE_MPI_BUFFERS_DEVICE)(long* Mesh_pointer_f,
int* num_interfaces_crust_mantle,
int* max_nibool_interfaces_crust_mantle,
int* nibool_interfaces_crust_mantle,
@@ -1146,13 +1153,13 @@
int* num_interfaces_outer_core,
int* max_nibool_interfaces_outer_core,
int* nibool_interfaces_outer_core,
- int* ibool_interfaces_outer_core
+ int* ibool_interfaces_outer_core
){
TRACE("prepare_mpi_buffers_device");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-
+
// prepares interprocess-edge exchange information
// crust/mantle mesh
@@ -1172,8 +1179,8 @@
cudaMemcpyHostToDevice),1204);
// allocates mpi buffer for exchange with cpu
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer_crust_mantle),
- 3*(mp->max_nibool_interfaces_crust_mantle)*(mp->num_interfaces_crust_mantle)*sizeof(realw)),4004);
- }
+ 3*(mp->max_nibool_interfaces_crust_mantle)*(mp->num_interfaces_crust_mantle)*sizeof(realw)),4004);
+ }
// inner core mesh
mp->num_interfaces_inner_core = *num_interfaces_inner_core;
@@ -1192,8 +1199,8 @@
cudaMemcpyHostToDevice),1204);
// allocates mpi buffer for exchange with cpu
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer_inner_core),
- 3*(mp->max_nibool_interfaces_inner_core)*(mp->num_interfaces_inner_core)*sizeof(realw)),4004);
- }
+ 3*(mp->max_nibool_interfaces_inner_core)*(mp->num_interfaces_inner_core)*sizeof(realw)),4004);
+ }
// outer core mesh
// note: uses only scalar wavefield arrays
@@ -1213,10 +1220,10 @@
cudaMemcpyHostToDevice),1204);
// allocates mpi buffer for exchange with cpu
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer_outer_core),
- (mp->max_nibool_interfaces_outer_core)*(mp->num_interfaces_outer_core)*sizeof(realw)),4004);
- }
-
+ (mp->max_nibool_interfaces_outer_core)*(mp->num_interfaces_outer_core)*sizeof(realw)),4004);
+ }
+
}
/* ----------------------------------------------------------------------------------------------- */
@@ -1314,7 +1321,7 @@
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_tiso_crust_mantle, (mp->NSPEC_CRUST_MANTLE)*sizeof(int)),1025);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_tiso_crust_mantle, h_ispec_is_tiso,
(mp->NSPEC_CRUST_MANTLE)*sizeof(int),cudaMemcpyHostToDevice),1025);
-
+
// kappavstore, kappahstore
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappavstore_crust_mantle, size_padded*sizeof(realw)),1010);
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappahstore_crust_mantle, size_padded*sizeof(realw)),1010);
@@ -1336,7 +1343,7 @@
}
}else{
// anisotropic 3D mantle
-
+
// allocates memory on GPU
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c11store_crust_mantle),
size_padded*sizeof(realw)),4700);
@@ -1445,7 +1452,7 @@
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_zstore_crust_mantle),sizeof(realw)*size_glob),2005);
print_CUDA_error_if_any(cudaMemcpy(mp->d_zstore_crust_mantle,h_zstore,
sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
-
+
// xstore only needed when gravity is on
if( mp->gravity ){
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_xstore_crust_mantle),sizeof(realw)*size_glob),2005);
@@ -1465,6 +1472,7 @@
// wavefield
int size = NDIM * mp->NGLOB_CRUST_MANTLE;
+
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_crust_mantle),sizeof(realw)*size),4001);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc_crust_mantle),sizeof(realw)*size),4002);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel_crust_mantle),sizeof(realw)*size),4003);
@@ -1481,6 +1489,20 @@
sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ // kernels
+ if( mp->simulation_type == 3 ){
+ size = NGLL3*(mp->NSPEC_CRUST_MANTLE);
+
+ // preconditioner
+ if( mp->approximate_hess_kl ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hess_kl_crust_mantle),
+ size*sizeof(realw)),3030);
+ // initializes with zeros
+ print_CUDA_error_if_any(cudaMemset(mp->d_hess_kl_crust_mantle,0,
+ size*sizeof(realw)),3031);
+ }
+ }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("prepare_crust_mantle_device");
#endif
@@ -1794,7 +1816,7 @@
// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolleft_xi_inner_core,npoin2D_xi*sizeof(int)),280);
// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolright_eta_inner_core,npoin2D_eta*sizeof(int)),290);
// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolleft_eta_inner_core,npoin2D_eta*sizeof(int)),300);
-//
+//
// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolright_xi_inner_core,iboolright_xi,
// npoin2D_xi*sizeof(int),cudaMemcpyHostToDevice),91);
// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolleft_xi_inner_core,iboolleft_xi,
@@ -1803,16 +1825,16 @@
// npoin2D_eta*sizeof(int),cudaMemcpyHostToDevice),93);
// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolleft_eta_inner_core,iboolleft_eta,
// npoin2D_eta*sizeof(int),cudaMemcpyHostToDevice),94);
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("prepare_inner_core_device");
#endif
-}
-
+}
+
/* ----------------------------------------------------------------------------------------------- */
// for ACOUSTIC simulations
@@ -2607,10 +2629,10 @@
// alloc storage for the surface buffer to be copied
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie,
- NDIM*NGLL2*(mp->nspec_top)*sizeof(realw)),7005);
+ NDIM*NGLL2*(mp->nspec_top)*sizeof(realw)),7005);
}
-
+
// prepares noise source array
if( mp->noise_tomography == 1 ){
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
@@ -2637,12 +2659,12 @@
nface_size*sizeof(realw)),7303);
print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise,
nface_size*sizeof(realw),cudaMemcpyHostToDevice),7308);
-
+
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise,
nface_size*sizeof(realw)),7304);
print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise,
nface_size*sizeof(realw),cudaMemcpyHostToDevice),7309);
-
+
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_jacobian2D_top_crust_mantle,
nface_size*sizeof(realw)),7305);
print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_top_crust_mantle, jacobian2D_top_crust_mantle,
@@ -2700,7 +2722,7 @@
cudaFree(mp->d_kappavstore_crust_mantle);
cudaFree(mp->d_kappahstore_crust_mantle);
cudaFree(mp->d_muhstore_crust_mantle);
- cudaFree(mp->d_eta_anisostore_crust_mantle);
+ cudaFree(mp->d_eta_anisostore_crust_mantle);
cudaFree(mp->d_ispec_is_tiso_crust_mantle);
}else{
cudaFree(mp->d_c11store_crust_mantle);
@@ -2830,7 +2852,11 @@
cudaFree(mp->d_ispec_selected_source);
// receivers
- if( mp->nrec_local > 0 ) cudaFree(mp->d_number_receiver_global);
+ if( mp->nrec_local > 0 ) {
+ cudaFree(mp->d_number_receiver_global);
+ cudaFree(mp->d_station_seismo_field);
+ free(mp->h_station_seismo_field);
+ }
cudaFree(mp->d_ispec_selected_rec);
// rotation arrays
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-02-22 04:38:32 UTC (rev 19659)
@@ -110,17 +110,22 @@
void FC_FUNC_(get_max_accel,
GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {}
-void FC_FUNC_(get_norm_acoustic_from_device,
- GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
+void FC_FUNC_(check_norm_acoustic_from_device,
+ CHECK_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
long* Mesh_pointer_f,
int* SIMULATION_TYPE) {}
-void FC_FUNC_(get_norm_elastic_from_device,
- GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
- long* Mesh_pointer_f,
- int* SIMULATION_TYPE) {}
+void FC_FUNC_(check_norm_elastic_from_device,
+ CHECK_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
+ long* Mesh_pointer_f,
+ int* SIMULATION_TYPE) {}
+void FC_FUNC_(check_norm_strain_from_device,
+ CHECK_NORM_STRAIN_FROM_DEVICE)(realw* norm_strain,
+ realw* norm_strain2,
+ long* Mesh_pointer_f) {}
+
//
// src/cuda/compute_add_sources_elastic_cuda.cu
//
@@ -216,9 +221,7 @@
void FC_FUNC_(compute_kernels_hess_cuda,
COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
- realw* deltat_f,
- int* ELASTIC_SIMULATION,
- int* ACOUSTIC_SIMULATION) {}
+ realw* deltat_f) {}
//
@@ -245,24 +248,29 @@
// src/cuda/it_update_displacement_cuda.cu
//
-void FC_FUNC_(it_update_displacement_cuda,
- IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f,
- int* size_F,
- realw* deltat_F,
- realw* deltatsqover2_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F) {}
+void FC_FUNC_(it_update_displacement_ic_cuda,
+ IT_UPDATE_DISPLACMENT_IC_CUDA)(long* Mesh_pointer_f,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {}
-void FC_FUNC_(it_update_displacement_ac_cuda,
- it_update_displacement_ac_cuda)(long* Mesh_pointer_f,
- int* size_F,
+void FC_FUNC_(it_update_displacement_cm_cuda,
+ IT_UPDATE_DISPLACMENT_CM_CUDA)(long* Mesh_pointer_f,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {}
+
+void FC_FUNC_(it_update_displacement_oc_cuda,
+ IT_UPDATE_DISPLACEMENT_OC_cuda)(long* Mesh_pointer_f,
realw* deltat_F,
realw* deltatsqover2_F,
realw* deltatover2_F,
- int* SIMULATION_TYPE,
realw* b_deltat_F,
realw* b_deltatsqover2_F,
realw* b_deltatover2_F) {}
@@ -355,7 +363,8 @@
int* ANISOTROPIC_3D_MANTLE_f,
int* ANISOTROPIC_INNER_CORE_f,
int* SAVE_BOUNDARY_MESH_f,
- int* USE_MESH_COLORING_GPU_f) {}
+ int* USE_MESH_COLORING_GPU_f,
+ int* APPROXIMATE_HESS_KL_f) {}
void FC_FUNC_(prepare_fields_rotation_device,
PREPARE_FIELDS_ROTATION_DEVICE)(long* Mesh_pointer_f,
@@ -713,12 +722,82 @@
void FC_FUNC_(transfer_accel_cm_to_device,
TRNASFER_ACCEL_CM_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+void FC_FUNC_(transfer_displ_cm_from_device,
+ TRANSFER_DISPL_CM_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_displ_cm_from_device,
+ TRANSFER_B_DISPL_CM_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_displ_ic_from_device,
+ TRANSFER_DISPL_IC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_displ_ic_from_device,
+ TRANSFER_B_DISPL_IC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_veloc_cm_from_device,
+ TRANSFER_DISPL_CM_FROM_DEVICE)(int* size, realw* veloc, long* Mesh_pointer_f) {}
+
void FC_FUNC_(transfer_accel_cm_from_device,
TRANSFER_ACCEL_CM_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_accel_cm_from_device,
TRNASFER_B_ACCEL_CM_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f) {}
+void FC_FUNC_(transfer_accel_ic_from_device,
+ TRANSFER_ACCEL_IC_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_strain_cm_from_device,
+ TRANSFER_STRAIN_CM_FROM_DEVICE)(long* Mesh_pointer,
+ realw* eps_trace_over_3,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz) {}
+
+void FC_FUNC_(transfer_b_strain_cm_to_device,
+ TRANSFER_B_STRAIN_CM_TO_DEVICE)(long* Mesh_pointer,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz) {}
+
+void FC_FUNC_(transfer_strain_ic_from_device,
+ TRANSFER_STRAIN_IC_FROM_DEVICE)(long* Mesh_pointer,
+ realw* eps_trace_over_3,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz) {}
+
+void FC_FUNC_(transfer_b_strain_ic_to_device,
+ TRANSFER_B_STRAIN_IC_TO_DEVICE)(long* Mesh_pointer,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz) {}
+
+void FC_FUNC_(transfer_rotation_from_device,
+ TRANSFER_ROTATION_FROM_DEVICE)(long* Mesh_pointer,
+ realw* A_array_rotation,
+ realw* B_array_rotation) {}
+
+void FC_FUNC_(transfer_b_rotation_to_device,
+ TRANSFER_B_ROTATION_TO_DEVICE)(long* Mesh_pointer,
+ realw* A_array_rotation,
+ realw* B_array_rotation) {}
+
+void FC_FUNC_(transfer_b_att_cm_to_device,
+ TRANSFER_B_ATT_CM_TO_DEVICE)(long* Mesh_pointer,
+ realw* R_xx,
+ realw* R_yy,
+ realw* R_xy,
+ realw* R_xz,
+ realw* R_yz) {}
+
void FC_FUNC_(transfer_sigma_from_device,
TRANSFER_SIGMA_FROM_DEVICE)(int* size, realw* sigma_kl,long* Mesh_pointer_f) {}
@@ -834,10 +913,10 @@
realw* h_kappa_ac_kl,
int* NSPEC_AB) {}
-void FC_FUNC_(transfer_kernels_hess_el_tohost,
- TRANSFER_KERNELS_HESS_EL_TOHOST)(long* Mesh_pointer,
+void FC_FUNC_(transfer_kernels_hess_cm_tohost,
+ TRANSFER_KERNELS_HESS_CM_TOHOST)(long* Mesh_pointer,
realw* h_hess_kl,
- int* NSPEC_AB) {}
+ int* NSPEC) {}
void FC_FUNC_(transfer_kernels_hess_ac_tohost,
TRANSFER_KERNELS_HESS_AC_TOHOST)(long* Mesh_pointer,
@@ -849,12 +928,20 @@
// src/cuda/write_seismograms_cuda.cu
//
-void FC_FUNC_(transfer_station_el_from_device,
- TRANSFER_STATION_EL_FROM_DEVICE)(realw* displ,realw* veloc,realw* accel,
- realw* b_displ, realw* b_veloc, realw* b_accel,
- long* Mesh_pointer_f,int* number_receiver_global,
- int* ispec_selected_rec,int* ispec_selected_source,
- int* ibool,int* SIMULATION_TYPEf) {}
+void FC_FUNC_(write_seismograms_transfer_cuda,
+ WRITE_SEISMOGRAMS_TRANSFER_CUDA)(realw* displ,
+ realw* b_displ,
+ realw* eps_trace_over_3,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz,
+ long* Mesh_pointer_f,
+ int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ispec_selected_source,
+ int* ibool) {}
void FC_FUNC_(transfer_station_ac_from_device,
TRANSFER_STATION_AC_FROM_DEVICE)(
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -168,6 +168,8 @@
}
+/* ----------------------------------------------------------------------------------------------- */
+
// inner_core
extern "C"
void FC_FUNC_(transfer_fields_ic_from_device,
@@ -183,6 +185,8 @@
}
+/* ----------------------------------------------------------------------------------------------- */
+
// outer_core
extern "C"
void FC_FUNC_(transfer_fields_oc_from_device,
@@ -218,36 +222,40 @@
}
+/* ----------------------------------------------------------------------------------------------- */
+
// inner_core
extern "C"
void FC_FUNC_(transfer_b_fields_ic_from_device,
TRANSFER_B_FIELDS_IC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
long* Mesh_pointer_f) {
-
+
TRACE("transfer_fields_b_ic_from_device_");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(b_displ,mp->d_b_displ_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
print_CUDA_error_if_any(cudaMemcpy(b_veloc,mp->d_b_veloc_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40007);
print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40008);
-
+
}
+/* ----------------------------------------------------------------------------------------------- */
+
// outer_core
extern "C"
void FC_FUNC_(transfer_b_fields_oc_from_device,
TRANSFER_B_FIELDS_OC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
long* Mesh_pointer_f) {
-
+
TRACE("transfer_b_fields_oc_from_device_");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(b_displ,mp->d_b_displ_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
print_CUDA_error_if_any(cudaMemcpy(b_veloc,mp->d_b_veloc_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40007);
print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40008);
-
+
}
@@ -265,13 +273,79 @@
}
+
/* ----------------------------------------------------------------------------------------------- */
extern "C"
+void FC_FUNC_(transfer_displ_cm_from_device,
+ TRANSFER_DISPL_CM_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {
+
+ TRACE("transfer_displ_cm_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_displ_cm_from_device,
+ TRANSFER_B_DISPL_CM_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {
+
+ TRACE("transfer_b_displ_cm_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_b_displ_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_displ_ic_from_device,
+ TRANSFER_DISPL_IC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {
+
+ TRACE("transfer_displ_ic_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_displ_ic_from_device,
+ TRANSFER_B_DISPL_IC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {
+
+ TRACE("transfer_b_displ_ic_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_b_displ_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_veloc_cm_from_device,
+ TRANSFER_DISPL_CM_FROM_DEVICE)(int* size, realw* veloc, long* Mesh_pointer_f) {
+
+ TRACE("transfer_veloc_cm_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(veloc,mp->d_veloc_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40007);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
void FC_FUNC_(transfer_accel_cm_from_device,
TRANSFER_ACCEL_CM_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {
-TRACE("transfer_accel_cm_from_device");
+ TRACE("transfer_accel_cm_from_device");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
@@ -293,20 +367,248 @@
}
+/* ----------------------------------------------------------------------------------------------- */
+extern "C"
+void FC_FUNC_(transfer_accel_ic_from_device,
+ TRANSFER_ACCEL_IC_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {
+ TRACE("transfer_accel_ic_from_device");
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40026);
+}
+/* ----------------------------------------------------------------------------------------------- */
+// strain fields
+/* ----------------------------------------------------------------------------------------------- */
+// crust/mantle
+extern "C"
+void FC_FUNC_(transfer_strain_cm_from_device,
+ TRANSFER_STRAIN_CM_FROM_DEVICE)(long* Mesh_pointer,
+ realw* eps_trace_over_3,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz) {
+ TRACE("transfer_strain_cm_from_device");
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+ int size = NGLL3*mp->NSPEC_CRUST_MANTLE;
+ cudaMemcpy(eps_trace_over_3,mp->d_eps_trace_over_3_crust_mantle,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xx,mp->d_epsilondev_xx_crust_mantle,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_yy,mp->d_epsilondev_yy_crust_mantle,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xy,mp->d_epsilondev_xy_crust_mantle,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xz,mp->d_epsilondev_xz_crust_mantle,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_yz,mp->d_epsilondev_yz_crust_mantle,size*sizeof(realw),cudaMemcpyDeviceToHost);
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_strain_cm_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// crust/mantle
+
+extern "C"
+void FC_FUNC_(transfer_b_strain_cm_to_device,
+ TRANSFER_B_STRAIN_CM_TO_DEVICE)(long* Mesh_pointer,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz) {
+ TRACE("transfer_b_strain_cm_to_device");
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+ int size = NGLL3*mp->NSPEC_CRUST_MANTLE;
+
+ cudaMemcpy(mp->d_b_epsilondev_xx_crust_mantle,epsilondev_xx,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_yy_crust_mantle,epsilondev_yy,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_xy_crust_mantle,epsilondev_xy,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_xz_crust_mantle,epsilondev_xz,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_yz_crust_mantle,epsilondev_yz,size*sizeof(realw),cudaMemcpyHostToDevice);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_b_strain_cm_to_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// inner core
+
+extern "C"
+void FC_FUNC_(transfer_strain_ic_from_device,
+ TRANSFER_STRAIN_IC_FROM_DEVICE)(long* Mesh_pointer,
+ realw* eps_trace_over_3,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz) {
+ TRACE("transfer_strain_ic_from_device");
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+ int size = NGLL3*mp->NSPEC_INNER_CORE;
+
+ cudaMemcpy(eps_trace_over_3,mp->d_eps_trace_over_3_inner_core,size*sizeof(realw),cudaMemcpyDeviceToHost);
+
+ cudaMemcpy(epsilondev_xx,mp->d_epsilondev_xx_inner_core,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_yy,mp->d_epsilondev_yy_inner_core,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xy,mp->d_epsilondev_xy_inner_core,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xz,mp->d_epsilondev_xz_inner_core,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_yz,mp->d_epsilondev_yz_inner_core,size*sizeof(realw),cudaMemcpyDeviceToHost);
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_strain_ic_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// inner_core
+
+extern "C"
+void FC_FUNC_(transfer_b_strain_ic_to_device,
+ TRANSFER_B_STRAIN_IC_TO_DEVICE)(long* Mesh_pointer,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz) {
+ TRACE("transfer_b_strain_cm_to_device");
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+ int size = NGLL3*mp->NSPEC_INNER_CORE;
+
+ cudaMemcpy(mp->d_b_epsilondev_xx_inner_core,epsilondev_xx,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_yy_inner_core,epsilondev_yy,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_xy_inner_core,epsilondev_xy,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_xz_inner_core,epsilondev_xz,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_yz_inner_core,epsilondev_yz,size*sizeof(realw),cudaMemcpyHostToDevice);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_b_strain_ic_to_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// rotation arrays
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for outer core
+
+extern "C"
+void FC_FUNC_(transfer_rotation_from_device,
+ TRANSFER_ROTATION_FROM_DEVICE)(long* Mesh_pointer,
+ realw* A_array_rotation,
+ realw* B_array_rotation) {
+ TRACE("transfer_rotation_from_device");
+
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+ int size = NGLL3*mp->NSPEC_OUTER_CORE;
+
+ cudaMemcpy(A_array_rotation,mp->d_A_array_rotation,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(B_array_rotation,mp->d_B_array_rotation,size*sizeof(realw),cudaMemcpyDeviceToHost);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_rotation_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for outer core
+
+extern "C"
+void FC_FUNC_(transfer_b_rotation_to_device,
+ TRANSFER_B_ROTATION_TO_DEVICE)(long* Mesh_pointer,
+ realw* A_array_rotation,
+ realw* B_array_rotation) {
+ TRACE("transfer_b_rotation_to_device");
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+ int size = NGLL3*mp->NSPEC_OUTER_CORE;
+
+ cudaMemcpy(mp->d_b_A_array_rotation,A_array_rotation,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_B_array_rotation,B_array_rotation,size*sizeof(realw),cudaMemcpyHostToDevice);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_b_rotation_to_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// attenuation fields
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/*
+// feature not used so far ...
+// crust_mantle
+extern "C"
+void FC_FUNC_(transfer_b_att_cm_to_device,
+ TRANSFER_B_ATT_CM_TO_DEVICE)(long* Mesh_pointer,
+ realw* R_xx,
+ realw* R_yy,
+ realw* R_xy,
+ realw* R_xz,
+ realw* R_yz) {
+ TRACE("transfer_b_att_cm_to_device");
+
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+ if( ! mp->use_attenuation_mimic){ exit_on_cuda_error("not supported attenuation feature yet");}
+
+ // not used so far...
+ // see notes about USE_ATTENUATION_MIMIC
+ int size = N_SLS*NGLL3*mp->NSPEC_CRUST_MANTLE;
+
+ cudaMemcpy(mp->d_b_R_xx_crust_mantle,R_xx,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_R_yy_crust_mantle,R_yy,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_R_xy_crust_mantle,R_xy,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_R_xz_crust_mantle,R_xz,size*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_R_yz_crust_mantle,R_yz,size*sizeof(realw),cudaMemcpyHostToDevice);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_b_att_cm_to_device");
+#endif
+}
+*/
+
+
+
+
+
+
+
+
+
//daniel: TODO old code routines...
@@ -326,7 +628,7 @@
}
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_b_displ_from_device,
TRANSFER_B_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {
@@ -338,9 +640,9 @@
print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40056);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_displ_from_device,
TRANSFER_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {
@@ -352,7 +654,7 @@
print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40066);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
/*
extern "C"
@@ -426,7 +728,7 @@
*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
// attenuation fields
extern "C"
@@ -461,9 +763,9 @@
exit_on_cuda_error("after transfer_b_fields_att_to_device");
#endif
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
// attenuation fields
extern "C"
@@ -498,8 +800,8 @@
exit_on_cuda_error("after transfer_fields_att_from_device");
#endif
}
+*/
-
/* ----------------------------------------------------------------------------------------------- */
extern "C"
@@ -548,7 +850,7 @@
// for ACOUSTIC simulations
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_fields_ac_to_device,
TRANSFER_FIELDS_AC_TO_DEVICE)(
@@ -572,9 +874,9 @@
exit_on_cuda_error("after transfer_fields_ac_to_device");
#endif
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_b_fields_ac_to_device,
TRANSFER_B_FIELDS_AC_TO_DEVICE)(
@@ -598,10 +900,10 @@
exit_on_cuda_error("after transfer_b_fields_ac_to_device");
#endif
}
+*/
-
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
int* size,
@@ -624,9 +926,9 @@
exit_on_cuda_error("after transfer_fields_ac_from_device");
#endif
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_b_fields_ac_from_device,
TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
@@ -650,9 +952,9 @@
exit_on_cuda_error("after transfer_b_fields_ac_from_device");
#endif
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_dot_dot_from_device,
TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, realw* potential_dot_dot_acoustic,long* Mesh_pointer_f) {
@@ -665,9 +967,9 @@
sizeof(realw)*(*size),cudaMemcpyDeviceToHost),50041);
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_b_dot_dot_from_device,
TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, realw* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {
@@ -680,8 +982,8 @@
sizeof(realw)*(*size),cudaMemcpyDeviceToHost),50042);
}
+*/
-
/* ----------------------------------------------------------------------------------------------- */
extern "C"
@@ -711,20 +1013,20 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_kernels_hess_el_tohost,
- TRANSFER_KERNELS_HESS_EL_TOHOST)(long* Mesh_pointer,
+void FC_FUNC_(transfer_kernels_hess_cm_tohost,
+ TRANSFER_KERNELS_HESS_CM_TOHOST)(long* Mesh_pointer,
realw* h_hess_kl,
- int* NSPEC_AB) {
-TRACE("transfer_kernels_hess_el_tohost");
+ int* NSPEC) {
+TRACE("transfer_kernels_hess_cm_tohost");
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- print_CUDA_error_if_any(cudaMemcpy(h_hess_kl,mp->d_hess_el_kl,NGLL3*(*NSPEC_AB)*sizeof(realw),
+ print_CUDA_error_if_any(cudaMemcpy(h_hess_kl,mp->d_hess_kl_crust_mantle,NGLL3*(*NSPEC)*sizeof(realw),
cudaMemcpyDeviceToHost),70201);
}
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_kernels_hess_ac_tohost,
TRANSFER_KERNELS_HESS_AC_TOHOST)(long* Mesh_pointer,
@@ -737,5 +1039,5 @@
print_CUDA_error_if_any(cudaMemcpy(h_hess_ac_kl,mp->d_hess_ac_kl,NGLL3*(*NSPEC_AB)*sizeof(realw),
cudaMemcpyDeviceToHost),70202);
}
+*/
-
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu 2012-02-22 04:38:32 UTC (rev 19659)
@@ -43,14 +43,18 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void transfer_stations_fields_from_device_kernel(int* number_receiver_global,
+__global__ void write_seismograms_transfer_from_device_kernel(int* number_receiver_global,
int* ispec_selected_rec,
int* ibool,
realw* station_seismo_field,
realw* desired_field,
int nrec_local) {
+
+// vector fields
+
int blockID = blockIdx.x + blockIdx.y*gridDim.x;
- if(blockID<nrec_local) {
+
+ if(blockID < nrec_local) {
int irec = number_receiver_global[blockID]-1;
int ispec = ispec_selected_rec[irec]-1;
int iglob = ibool[threadIdx.x + NGLL3*ispec]-1;
@@ -61,16 +65,37 @@
}
}
+/* ----------------------------------------------------------------------------------------------- */
+__global__ void write_seismograms_transfer_scalar_from_device_kernel(int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ibool,
+ realw* station_seismo_field,
+ realw* desired_field,
+ int nrec_local) {
+
+// scalar fields
+
+ int blockID = blockIdx.x + blockIdx.y*gridDim.x;
+
+ if(blockID < nrec_local) {
+ int irec = number_receiver_global[blockID]-1;
+ int ispec = ispec_selected_rec[irec]-1;
+ int iglob = ibool[threadIdx.x + NGLL3*ispec]-1;
+
+ station_seismo_field[NGLL3*blockID + threadIdx.x] = desired_field[iglob];
+ }
+}
+
/* ----------------------------------------------------------------------------------------------- */
-void transfer_field_from_device(Mesh* mp, realw* d_field,realw* h_field,
+void write_seismograms_transfer_from_device(Mesh* mp, realw* d_field,realw* h_field,
int* number_receiver_global,
int* d_ispec_selected,
int* h_ispec_selected,
int* ibool) {
-TRACE("transfer_field_from_device");
+TRACE("write_seismograms_transfer_from_device");
// checks if anything to do
if( mp->nrec_local == 0 ) return;
@@ -82,80 +107,170 @@
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
-
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
// prepare field transfer array on device
- transfer_stations_fields_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
- d_ispec_selected,
- mp->d_ibool,
- mp->d_station_seismo_field,
- d_field,
- mp->nrec_local);
+ write_seismograms_transfer_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
+ d_ispec_selected,
+ mp->d_ibool_crust_mantle,
+ mp->d_station_seismo_field,
+ d_field,
+ mp->nrec_local);
+ // copies array to CPU
cudaMemcpy(mp->h_station_seismo_field,mp->d_station_seismo_field,
- (3*NGLL3)*(mp->nrec_local)*sizeof(realw),cudaMemcpyDeviceToHost);
+ 3*NGLL3*(mp->nrec_local)*sizeof(realw),cudaMemcpyDeviceToHost);
int irec_local;
- for(irec_local=0;irec_local<mp->nrec_local;irec_local++) {
+ for(irec_local = 0 ; irec_local < mp->nrec_local; irec_local++) {
int irec = number_receiver_global[irec_local] - 1;
int ispec = h_ispec_selected[irec] - 1;
- for(int i=0;i<NGLL3;i++) {
+ for(int i = 0; i < NGLL3; i++) {
int iglob = ibool[i+NGLL3*ispec] - 1;
h_field[0+3*iglob] = mp->h_station_seismo_field[0+3*i+irec_local*NGLL3*3];
h_field[1+3*iglob] = mp->h_station_seismo_field[1+3*i+irec_local*NGLL3*3];
h_field[2+3*iglob] = mp->h_station_seismo_field[2+3*i+irec_local*NGLL3*3];
}
+ }
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("write_seismograms_transfer_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void write_seismograms_transfer_scalar_from_device(Mesh* mp,
+ realw* d_field,realw* h_field,
+ int* number_receiver_global,
+ int* d_ispec_selected,
+ int* h_ispec_selected,
+ int* ibool) {
+
+ TRACE("write_seismograms_transfer_scalar_from_device");
+
+ // checks if anything to do
+ if( mp->nrec_local == 0 ) return;
+
+ int blocksize = NGLL3;
+ int num_blocks_x = mp->nrec_local;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
}
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // prepare field transfer array on device
+ write_seismograms_transfer_scalar_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
+ d_ispec_selected,
+ mp->d_ibool_crust_mantle,
+ mp->d_station_seismo_field,
+ d_field,
+ mp->nrec_local);
+
+ // copies array to CPU
+ cudaMemcpy(mp->h_station_seismo_field,mp->d_station_seismo_field,
+ 1*NGLL3*(mp->nrec_local)*sizeof(realw),cudaMemcpyDeviceToHost);
+
+ int irec_local;
+ for(irec_local = 0 ; irec_local < mp->nrec_local; irec_local++) {
+ int irec = number_receiver_global[irec_local] - 1;
+ int ispec = h_ispec_selected[irec] - 1;
+
+ for(int i = 0; i < NGLL3; i++) {
+ int iglob = ibool[i+NGLL3*ispec] - 1;
+ h_field[iglob] = mp->h_station_seismo_field[i+irec_local*NGLL3];
+ }
+ }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("transfer_field_from_device");
+ exit_on_cuda_error("write_seismograms_transfer_scalar_from_device");
#endif
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_station_el_from_device,
- TRANSFER_STATION_EL_FROM_DEVICE)(realw* displ,realw* veloc,realw* accel,
- realw* b_displ, realw* b_veloc, realw* b_accel,
- long* Mesh_pointer_f,int* number_receiver_global,
- int* ispec_selected_rec,int* ispec_selected_source,
- int* ibool,int* SIMULATION_TYPEf) {
-TRACE("transfer_station_el_from_device");
+void FC_FUNC_(write_seismograms_transfer_cuda,
+ WRITE_SEISMOGRAMS_TRANSFER_CUDA)(realw* displ,
+ realw* b_displ,
+ realw* eps_trace_over_3,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz,
+ long* Mesh_pointer_f,
+ int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ispec_selected_source,
+ int* ibool) {
+TRACE("write_seismograms_transfer_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
// checks if anything to do
if( mp->nrec_local == 0 ) return;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
+ switch( mp->simulation_type ){
+ case 1:
+ write_seismograms_transfer_from_device(mp,mp->d_displ_crust_mantle,
+ displ,
+ number_receiver_global,
+ mp->d_ispec_selected_rec,
+ ispec_selected_rec, ibool);
+ break;
- if(SIMULATION_TYPE == 1) {
- transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ case 2:
+ write_seismograms_transfer_from_device(mp,mp->d_displ_crust_mantle,
+ displ,
+ number_receiver_global,
+ mp->d_ispec_selected_source,
+ ispec_selected_source, ibool);
+
+ write_seismograms_transfer_scalar_from_device(mp,mp->d_eps_trace_over_3_crust_mantle,
+ eps_trace_over_3,
+ number_receiver_global,
+ mp->d_ispec_selected_source,
+ ispec_selected_source, ibool);
+ write_seismograms_transfer_scalar_from_device(mp,mp->d_epsilondev_xx_crust_mantle,
+ epsilondev_xx,
+ number_receiver_global,
+ mp->d_ispec_selected_source,
+ ispec_selected_source, ibool);
+ write_seismograms_transfer_scalar_from_device(mp,mp->d_epsilondev_yy_crust_mantle,
+ epsilondev_yy,
+ number_receiver_global,
+ mp->d_ispec_selected_source,
+ ispec_selected_source, ibool);
+ write_seismograms_transfer_scalar_from_device(mp,mp->d_epsilondev_xy_crust_mantle,
+ epsilondev_xy,
+ number_receiver_global,
+ mp->d_ispec_selected_source,
+ ispec_selected_source, ibool);
+ write_seismograms_transfer_scalar_from_device(mp,mp->d_epsilondev_xz_crust_mantle,
+ epsilondev_xz,
+ number_receiver_global,
+ mp->d_ispec_selected_source,
+ ispec_selected_source, ibool);
+ write_seismograms_transfer_scalar_from_device(mp,mp->d_epsilondev_yz_crust_mantle,
+ epsilondev_yz,
+ number_receiver_global,
+ mp->d_ispec_selected_source,
+ ispec_selected_source, ibool);
+ break;
+
+ case 3:
+ write_seismograms_transfer_from_device(mp,mp->d_b_displ_crust_mantle,b_displ,
+ number_receiver_global,
+ mp->d_ispec_selected_rec,
+ ispec_selected_rec, ibool);
+ break;
}
- else if(SIMULATION_TYPE == 2) {
- transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
- transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
- transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
- }
- else if(SIMULATION_TYPE == 3) {
- transfer_field_from_device(mp,mp->d_b_displ,b_displ, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_from_device(mp,mp->d_b_veloc,b_veloc, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_from_device(mp,mp->d_b_accel,b_accel, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- }
}
@@ -164,7 +279,7 @@
// ACOUSTIC simulations
/* ----------------------------------------------------------------------------------------------- */
-
+/*
__global__ void transfer_stations_fields_acoustic_from_device_kernel(int* number_receiver_global,
int* ispec_selected_rec,
int* ibool,
@@ -182,9 +297,9 @@
station_seismo_potential[nodeID] = desired_potential[iglob];
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
void transfer_field_acoustic_from_device(Mesh* mp,
realw* d_potential,
realw* h_potential,
@@ -245,9 +360,9 @@
exit_on_cuda_error("transfer_field_acoustic_from_device");
#endif
}
-
+*/
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_station_ac_from_device,
TRANSFER_STATION_AC_FROM_DEVICE)(
@@ -313,4 +428,4 @@
exit_on_cuda_error("transfer_station_ac_from_device");
#endif
}
-
+*/
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -120,9 +120,9 @@
! gets callers rank
call MPI_COMM_RANK(MPI_COMM_WORLD,rank,ier)
-
- ! synchronizes MPI processes
+
+ ! synchronizes MPI processes
call MPI_BARRIER(MPI_COMM_WORLD,ier)
if( ier /= 0 ) call exit_mpi(rank,'error synchronize MPI processes')
-
+
end subroutine sync_all
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -736,7 +736,7 @@
use constants
use specfem_par,only: Mesh_pointer
-
+
implicit none
integer :: NPROC
@@ -799,7 +799,7 @@
use constants
use specfem_par,only: Mesh_pointer
-
+
implicit none
integer :: NPROC
@@ -813,7 +813,7 @@
integer :: FORWARD_OR_ADJOINT
! local parameters
- integer :: iinterface
+ integer :: iinterface
! assemble only if more than one partition
if(NPROC > 1) then
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -956,10 +956,10 @@
integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
-
+
integer :: IREGION
integer :: FORWARD_OR_ADJOINT
-
+
! local parameters
integer iinterface
@@ -1068,7 +1068,7 @@
! waits for data to receive and assembles
use constants
use specfem_par,only: Mesh_pointer
-
+
implicit none
integer :: NPROC
@@ -1083,7 +1083,7 @@
integer :: FORWARD_OR_ADJOINT
! local parameters
-
+
integer iinterface
! here we have to assemble all the contributions between partitions using MPI
@@ -1098,7 +1098,7 @@
! adding contributions of neighbours
call transfer_asmbl_accel_to_device(Mesh_pointer, &
- buffer_recv_vector_ext_mesh, &
+ buffer_recv_vector_ext_mesh, &
IREGION,FORWARD_OR_ADJOINT)
! This step is done via previous function transfer_and_assemble...
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -31,14 +31,15 @@
epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
SIMULATION_TYPE,OUTPUT_FILES,time_start,DT,t0,NSTEP, &
- myrank) !COMPUTE_AND_STORE_STRAIN,myrank)
+ myrank)
+ use constants
+ use specfem_par,only: GPU_MODE,Mesh_pointer
+
implicit none
include 'mpif.h'
- include "constants.h"
include "precision.h"
- include "OUTPUT_FILES/values_from_mesher.h"
! time step
integer it,NSTEP,myrank
@@ -71,7 +72,7 @@
! local parameters
! maximum of the norm of the displacement and of the potential in the fluid
real(kind=CUSTOM_REAL) Usolidnorm,Usolidnorm_all,Ufluidnorm,Ufluidnorm_all
- real(kind=CUSTOM_REAL) Strain_norm,Strain_norm_all,strain2_norm,strain2_norm_all
+ real(kind=CUSTOM_REAL) Strain_norm,Strain_norm_all,Strain2_norm,Strain2_norm_all
real(kind=CUSTOM_REAL) b_Usolidnorm,b_Usolidnorm_all,b_Ufluidnorm,b_Ufluidnorm_all
! names of the data files for all the processors in MPI
character(len=150) outputname
@@ -98,13 +99,25 @@
! compute maximum of norm of displacement in each slice
- Usolidnorm = max( &
- maxval(sqrt(displ_crust_mantle(1,:)**2 + &
- displ_crust_mantle(2,:)**2 + displ_crust_mantle(3,:)**2)), &
- maxval(sqrt(displ_inner_core(1,:)**2 + displ_inner_core(2,:)**2 + displ_inner_core(3,:)**2)))
+ if( .not. GPU_MODE) then
+ ! on CPU
+ Usolidnorm = max( &
+ maxval(sqrt(displ_crust_mantle(1,:)**2 + &
+ displ_crust_mantle(2,:)**2 + &
+ displ_crust_mantle(3,:)**2)), &
+ maxval(sqrt(displ_inner_core(1,:)**2 + &
+ displ_inner_core(2,:)**2 + &
+ displ_inner_core(3,:)**2)))
- Ufluidnorm = maxval(abs(displ_outer_core))
+ Ufluidnorm = maxval(abs(displ_outer_core))
+ else
+ ! on GPU
+ ! way 2: just get maximum of fields from GPU
+ call check_norm_elastic_from_device(Usolidnorm,Mesh_pointer,1)
+ call check_norm_acoustic_from_device(Ufluidnorm,Mesh_pointer,1)
+ endif
+
! compute the maximum of the maxima for all the slices using an MPI reduction
call MPI_REDUCE(Usolidnorm,Usolidnorm_all,1,CUSTOM_MPI_TYPE,MPI_MAX,0, &
MPI_COMM_WORLD,ier)
@@ -112,14 +125,21 @@
MPI_COMM_WORLD,ier)
if (SIMULATION_TYPE == 3) then
- b_Usolidnorm = max( &
+ if( .not. GPU_MODE) then
+ ! on CPU
+ b_Usolidnorm = max( &
maxval(sqrt(b_displ_crust_mantle(1,:)**2 + &
b_displ_crust_mantle(2,:)**2 + b_displ_crust_mantle(3,:)**2)), &
maxval(sqrt(b_displ_inner_core(1,:)**2 &
+ b_displ_inner_core(2,:)**2 &
+ b_displ_inner_core(3,:)**2)))
- b_Ufluidnorm = maxval(abs(b_displ_outer_core))
+ b_Ufluidnorm = maxval(abs(b_displ_outer_core))
+ else
+ ! on GPU
+ call check_norm_elastic_from_device(b_Usolidnorm,Mesh_pointer,3)
+ call check_norm_acoustic_from_device(b_Ufluidnorm,Mesh_pointer,3)
+ endif
! compute the maximum of the maxima for all the slices using an MPI reduction
call MPI_REDUCE(b_Usolidnorm,b_Usolidnorm_all,1,CUSTOM_MPI_TYPE,MPI_MAX,0, &
@@ -129,10 +149,19 @@
endif
if (COMPUTE_AND_STORE_STRAIN) then
- Strain_norm = maxval(abs(eps_trace_over_3_crust_mantle))
- strain2_norm= max( maxval(abs(epsilondev_xx_crust_mantle)),maxval(abs(epsilondev_yy_crust_mantle)), &
- maxval(abs(epsilondev_xy_crust_mantle)),maxval(abs(epsilondev_xz_crust_mantle)), &
- maxval(abs(epsilondev_yz_crust_mantle)) )
+ if( .not. GPU_MODE) then
+ ! on CPU
+ Strain_norm = maxval(abs(eps_trace_over_3_crust_mantle))
+ Strain2_norm= max( maxval(abs(epsilondev_xx_crust_mantle)), &
+ maxval(abs(epsilondev_yy_crust_mantle)), &
+ maxval(abs(epsilondev_xy_crust_mantle)), &
+ maxval(abs(epsilondev_xz_crust_mantle)), &
+ maxval(abs(epsilondev_yz_crust_mantle)) )
+ else
+ ! on GPU
+ call check_norm_strain_from_device(Strain_norm,Strain2_norm,Mesh_pointer)
+ endif
+
call MPI_REDUCE(Strain_norm,Strain_norm_all,1,CUSTOM_MPI_TYPE,MPI_MAX,0, &
MPI_COMM_WORLD,ier)
call MPI_REDUCE(Strain2_norm,Strain2_norm_all,1,CUSTOM_MPI_TYPE,MPI_MAX,0, &
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -25,10 +25,10 @@
!
!=====================================================================
- subroutine compute_add_sources()
+ subroutine compute_add_sources()
use specfem_par
- use specfem_par_crustmantle,only: accel_crust_mantle,ibool_crust_mantle
+ use specfem_par_crustmantle,only: accel_crust_mantle,ibool_crust_mantle
implicit none
! local parameters
@@ -106,8 +106,6 @@
else
! on GPU
- call load_GPU_elastic()
-
! prepares buffer with source time function values, to be copied onto GPU
if(USE_FORCE_POINT_SOURCE) then
do isource = 1,NSOURCES
@@ -117,13 +115,11 @@
else
do isource = 1,NSOURCES
stf_pre_compute(isource) = &
- comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
enddo
endif
! adds sources: only implements SIMTYPE=1 and NOISE_TOM=0
call compute_add_sources_el_cuda(Mesh_pointer,NSOURCES,stf_pre_compute)
-
- call load_CPU_elastic()
endif
@@ -136,7 +132,7 @@
subroutine compute_add_sources_adjoint()
use specfem_par
- use specfem_par_crustmantle,only: accel_crust_mantle,ibool_crust_mantle
+ use specfem_par_crustmantle,only: accel_crust_mantle,ibool_crust_mantle
implicit none
! local parameters
@@ -278,13 +274,9 @@
else
! on GPU
- call load_GPU_elastic()
-
call add_sources_el_sim_type_2_or_3(Mesh_pointer,nrec,adj_sourcearrays, &
islice_selected_rec,ispec_selected_rec, &
- iadj_vec(it))
-
- call load_CPU_elastic()
+ iadj_vec(it))
endif
end subroutine compute_add_sources_adjoint
@@ -386,8 +378,6 @@
else
! on GPU
- call load_GPU_elastic()
-
! prepares buffer with source time function values, to be copied onto GPU
if(USE_FORCE_POINT_SOURCE) then
do isource = 1,NSOURCES
@@ -397,13 +387,11 @@
else
do isource = 1,NSOURCES
stf_pre_compute(isource) = &
- comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
enddo
endif
! adds sources: only implements SIMTYPE=3 (and NOISE_TOM=0)
call compute_add_sources_el_s3_cuda(Mesh_pointer,NSOURCES,stf_pre_compute)
-
- call load_CPU_elastic()
endif
end subroutine compute_add_sources_backward
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -81,7 +81,7 @@
endif
if( .not. GPU_MODE ) then
- ! on CPU
+ ! on CPU
if( USE_DEVILLE_PRODUCTS_VAL ) then
! uses Deville et al. (2002) routine
call compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
@@ -114,12 +114,8 @@
else
! on GPU
- call load_GPU_acoustic()
-
! includes both forward and adjoint/kernel simulations
call compute_forces_outer_core_cuda(Mesh_pointer,iphase,time,b_time)
-
- call load_CPU_acoustic()
endif
@@ -135,6 +131,9 @@
!---
!--- couple with mantle at the top of the outer core
!---
+ call load_CPU_acoustic()
+ call load_CPU_elastic()
+
if(ACTUALLY_COUPLE_FLUID_CMB) &
call compute_coupling_fluid_CMB(displ_crust_mantle,b_displ_crust_mantle, &
ibool_crust_mantle,ibelm_bottom_crust_mantle, &
@@ -153,6 +152,9 @@
normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
+
+ call load_GPU_acoustic()
+
endif
@@ -160,7 +162,7 @@
! in outer core
if( iphase == 1 ) then
! sends out MPI interface data (non-blocking)
-
+
if(.NOT. GPU_MODE) then
! on CPU
call assemble_MPI_scalar_ext_mesh_s(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
@@ -172,7 +174,6 @@
request_send_scalar_outer_core,request_recv_scalar_outer_core)
else
! on GPU
- call load_GPU_acoustic()
! outer core
call assemble_MPI_scalar_send_cuda(NPROCTOT_VAL, &
buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
@@ -196,7 +197,6 @@
b_request_send_scalar_outer_core,b_request_recv_scalar_outer_core)
else
! on GPU
- call load_GPU_acoustic()
! outer core
call assemble_MPI_scalar_send_cuda(NPROCTOT_VAL, &
b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
@@ -207,10 +207,10 @@
3) ! <-- 3 == adjoint b_accel
endif ! GPU
endif ! SIMULATION_TYPE == 3
-
+
else
! make sure the last communications are finished and processed
- ! waits for send/receive requests to be completed and assembles values
+ ! waits for send/receive requests to be completed and assembles values
if(.NOT. GPU_MODE) then
! on CPU
call assemble_MPI_scalar_ext_mesh_w(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
@@ -226,7 +226,6 @@
num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
request_send_scalar_outer_core,request_recv_scalar_outer_core, &
1) ! <-- 1 == fwd accel
- call load_CPU_acoustic()
endif
! adjoint simulations
@@ -245,8 +244,7 @@
b_buffer_recv_scalar_outer_core, &
num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
b_request_send_scalar_outer_core,b_request_recv_scalar_outer_core, &
- 3) ! <-- 3 == adjoint b_accel
- call load_CPU_acoustic()
+ 3) ! <-- 3 == adjoint b_accel
endif
endif ! SIMULATION_TYPE == 3
endif ! iphase == 1
@@ -264,14 +262,12 @@
if (SIMULATION_TYPE == 3) &
call compute_forces_ac_update_veloc(b_veloc_outer_core,b_accel_outer_core, &
b_deltatover2,rmass_outer_core)
- else
+ else
! on GPU
- call load_GPU_acoustic()
call kernel_3_outer_core_cuda(Mesh_pointer, &
deltatover2,SIMULATION_TYPE,b_deltatover2)
- call load_CPU_acoustic()
endif
-
+
end subroutine compute_forces_acoustic
!=====================================================================
@@ -332,30 +328,30 @@
!=====================================================================
subroutine load_GPU_acoustic
-
+
use specfem_par
use specfem_par_outercore
implicit none
-
+
! daniel: TODO - temporary transfers to the GPU
call transfer_fields_oc_to_device(NGLOB_OUTER_CORE,displ_outer_core, &
veloc_outer_core,accel_outer_core,Mesh_pointer)
if( SIMULATION_TYPE == 3 ) then
call transfer_b_fields_oc_to_device(NGLOB_OUTER_CORE,b_displ_outer_core, &
- b_veloc_outer_core,b_accel_outer_core,Mesh_pointer)
+ b_veloc_outer_core,b_accel_outer_core,Mesh_pointer)
endif
-
- end subroutine
+ end subroutine
+
!=====================================================================
subroutine load_CPU_acoustic
-
+
use specfem_par
use specfem_par_outercore
implicit none
-
+
! daniel: TODO - temporary transfers to the CPU
call transfer_fields_oc_from_device(NGLOB_OUTER_CORE,displ_outer_core, &
veloc_outer_core,accel_outer_core,Mesh_pointer)
@@ -364,5 +360,5 @@
call transfer_b_fields_oc_from_device(NGLOB_OUTER_CORE,b_displ_outer_core, &
b_veloc_outer_core,b_accel_outer_core,Mesh_pointer)
endif
-
+
end subroutine
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -185,20 +185,15 @@
endif !SIMULATION_TYPE == 3
else
- ! on GPU
- call load_GPU_elastic()
-
+ ! on GPU
! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
! for crust/mantle
call compute_forces_crust_mantle_cuda(Mesh_pointer,iphase)
! for inner core
call compute_forces_inner_core_cuda(Mesh_pointer,iphase)
-
- call load_CPU_elastic()
endif ! GPU_MODE
-
! computes additional contributions to acceleration field
if( iphase == 1 ) then
@@ -226,7 +221,7 @@
! hence, instead of a moment tensor 'sourcearrays', a 'noise_sourcearray' for a point force is needed.
! furthermore, the CMTSOLUTION needs to be zero, i.e., no earthquakes.
! now this must be manually set in DATA/CMTSOLUTION, by USERS.
- call noise_add_source_master_rec()
+ call noise_add_source_master_rec()
case( 2 )
! second step of noise tomography, i.e., read the surface movie saved at every timestep
! use the movie to drive the ensemble forward wavefield
@@ -241,13 +236,15 @@
! use the movie to reconstruct the ensemble forward wavefield
! the ensemble adjoint wavefield is done as usual
! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal
- call noise_read_add_surface_movie(b_accel_crust_mantle,it)
+ call noise_read_add_surface_movie(b_accel_crust_mantle,it)
end select
! ****************************************************
! ********** add matching with fluid part **********
! ****************************************************
+ call load_CPU_elastic()
+ call load_CPU_acoustic()
! only for elements in first matching layer in the solid
@@ -277,18 +274,20 @@
RHO_BOTTOM_OC,minus_g_icb, &
SIMULATION_TYPE,NSPEC2D_TOP(IREGION_INNER_CORE))
+ call load_GPU_elastic()
+
endif ! iphase == 1
-
+
! assemble all the contributions between slices using MPI
! crust/mantle and inner core handled in the same call
! in order to reduce the number of MPI messages by 2
if( iphase == 1 ) then
- ! sends out MPI interface data
+ ! sends out MPI interface data
if(.NOT. GPU_MODE) then
- ! on CPU
+ ! on CPU
! sends accel values to corresponding MPI interface neighbors
! crust mantle
call assemble_MPI_vector_ext_mesh_s(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
@@ -307,8 +306,7 @@
my_neighbours_inner_core, &
request_send_vector_inner_core,request_recv_vector_inner_core)
else
- ! on GPU
- call load_GPU_elastic()
+ ! on GPU
! crust mantle
call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
@@ -332,7 +330,7 @@
! adjoint / kernel runs
if (SIMULATION_TYPE == 3) then
if(.NOT. GPU_MODE) then
- ! on CPU
+ ! on CPU
! sends accel values to corresponding MPI interface neighbors
! crust mantle
call assemble_MPI_vector_ext_mesh_s(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
@@ -350,9 +348,8 @@
nibool_interfaces_inner_core,ibool_interfaces_inner_core,&
my_neighbours_inner_core, &
b_request_send_vector_inner_core,b_request_recv_vector_inner_core)
- else
+ else
! on GPU
- call load_GPU_elastic()
! crust mantle
call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
b_buffer_send_vector_crust_mantle,b_buffer_recv_vector_crust_mantle, &
@@ -361,7 +358,7 @@
my_neighbours_crust_mantle, &
b_request_send_vector_crust_mantle,b_request_recv_vector_crust_mantle, &
IREGION_CRUST_MANTLE, &
- 3) ! <-- 3 == adjoint b_accel
+ 3) ! <-- 3 == adjoint b_accel
! inner core
call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
@@ -375,7 +372,7 @@
endif ! SIMULATION_TYPE == 3
else
- ! waits for send/receive requests to be completed and assembles values
+ ! waits for send/receive requests to be completed and assembles values
if(.NOT. GPU_MODE) then
! on CPU
! crust mantle
@@ -394,7 +391,7 @@
request_send_vector_inner_core,request_recv_vector_inner_core)
else
! on GPU
- ! crust mantle
+ ! crust mantle
call assemble_MPI_vector_write_cuda(NPROCTOT_VAL, &
buffer_recv_vector_crust_mantle, &
num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
@@ -407,14 +404,13 @@
num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
request_send_vector_inner_core,request_recv_vector_inner_core, &
IREGION_INNER_CORE, &
- 1)
- call load_CPU_elastic()
+ 1)
endif
-
+
! adjoint / kernel runs
if (SIMULATION_TYPE == 3) then
- ! waits for send/receive requests to be completed and assembles values
+ ! waits for send/receive requests to be completed and assembles values
if(.NOT. GPU_MODE) then
! on CPU
! crust mantle
@@ -431,7 +427,7 @@
max_nibool_interfaces_inner_core, &
nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
b_request_send_vector_inner_core,b_request_recv_vector_inner_core)
-
+
else
! on GPU
! crust mantle
@@ -440,16 +436,15 @@
num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
b_request_send_vector_crust_mantle,b_request_recv_vector_crust_mantle, &
IREGION_CRUST_MANTLE, &
- 3) ! <-- 3 == adjoint b_accel
+ 3) ! <-- 3 == adjoint b_accel
! inner core
call assemble_MPI_vector_write_cuda(NPROCTOT_VAL,&
b_buffer_recv_vector_inner_core, &
num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
b_request_send_vector_inner_core,b_request_recv_vector_inner_core, &
IREGION_INNER_CORE, &
- 3)
- call load_CPU_elastic()
- endif
+ 3)
+ endif
endif ! SIMULATION_TYPE == 3
endif ! iphase == 1
@@ -466,20 +461,22 @@
b_two_omega_earth,rmass_crust_mantle)
else
! on GPU
- call load_GPU_elastic()
call kernel_3_a_cuda(Mesh_pointer, &
deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS_VAL)
- call load_CPU_elastic()
endif
-
+
! couples ocean with crust mantle
! (updates acceleration with ocean load approximation)
- if(OCEANS_VAL) &
+ if(OCEANS_VAL) then
+ call load_CPU_elastic()
+
call compute_coupling_ocean(accel_crust_mantle,b_accel_crust_mantle, &
rmass_crust_mantle,rmass_ocean_load,normal_top_crust_mantle, &
ibool_crust_mantle,ibelm_top_crust_mantle, &
updated_dof_ocean_load, &
SIMULATION_TYPE,NSPEC2D_TOP(IREGION_CRUST_MANTLE))
+ call load_GPU_elastic()
+ endif
! Newmark time scheme:
! corrector terms for elastic parts
@@ -496,12 +493,10 @@
b_deltatover2,b_two_omega_earth,rmass_inner_core)
else
! on GPU
- call load_GPU_elastic()
call kernel_3_b_cuda(Mesh_pointer, &
deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS_VAL)
- call load_CPU_elastic()
endif
-
+
end subroutine compute_forces_elastic
@@ -694,12 +689,12 @@
!=====================================================================
subroutine load_GPU_elastic
-
+
use specfem_par
use specfem_par_crustmantle
use specfem_par_innercore
implicit none
-
+
! daniel: TODO - temporary transfers to the GPU
call transfer_fields_cm_to_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle, &
veloc_crust_mantle,accel_crust_mantle,Mesh_pointer)
@@ -712,29 +707,29 @@
call transfer_b_fields_ic_to_device(NDIM*NGLOB_INNER_CORE,b_displ_inner_core, &
b_veloc_inner_core,b_accel_inner_core,Mesh_pointer)
endif
-
- end subroutine
+ end subroutine
+
!=====================================================================
subroutine load_CPU_elastic
-
+
use specfem_par
use specfem_par_crustmantle
use specfem_par_innercore
implicit none
-
+
! daniel: TODO - temporary transfers back to the CPU
call transfer_fields_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle, &
- veloc_crust_mantle,accel_crust_mantle,Mesh_pointer)
+ veloc_crust_mantle,accel_crust_mantle,Mesh_pointer)
call transfer_fields_ic_from_device(NDIM*NGLOB_INNER_CORE,displ_inner_core, &
veloc_inner_core,accel_inner_core,Mesh_pointer)
if( SIMULATION_TYPE == 3 ) then
call transfer_b_fields_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,b_displ_crust_mantle, &
- b_veloc_crust_mantle,b_accel_crust_mantle,Mesh_pointer)
+ b_veloc_crust_mantle,b_accel_crust_mantle,Mesh_pointer)
call transfer_b_fields_ic_from_device(NDIM*NGLOB_INNER_CORE,b_displ_inner_core, &
- b_veloc_inner_core,b_accel_inner_core,Mesh_pointer)
+ b_veloc_inner_core,b_accel_inner_core,Mesh_pointer)
endif
-
+
end subroutine
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -101,7 +101,7 @@
logical :: phase_is_inner
! local parameters
-
+
real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
real(kind=CUSTOM_REAL), dimension(NGLLX, NGLLY, NGLLZ) :: factor_common_use
real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -94,6 +94,20 @@
! --- boundary kernels ------
if (SAVE_BOUNDARY_MESH) then
+
+ ! transfers wavefields onto CPU
+ if( GPU_MODE ) then
+ ! crust/mantle
+ call transfer_accel_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,accel_crust_mantle,Mesh_pointer)
+ call transfer_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,Mesh_pointer)
+ call transfer_b_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,b_displ_crust_mantle,Mesh_pointer)
+ ! inner core
+ call transfer_accel_ic_from_device(NDIM*NGLOB_CRUST_MANTLE,accel_inner_core,Mesh_pointer)
+ call transfer_displ_ic_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_inner_core,Mesh_pointer)
+ call transfer_b_displ_ic_from_device(NDIM*NGLOB_CRUST_MANTLE,b_displ_inner_core,Mesh_pointer)
+ endif
+
+ ! updates kernels on CPU
fluid_solid_boundary = .false.
iregion_code = IREGION_CRUST_MANTLE
@@ -1294,11 +1308,10 @@
accel_crust_mantle,b_accel_crust_mantle, &
deltat)
+ use constants
+ use specfem_par,only: GPU_MODE,Mesh_pointer
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
@@ -1314,26 +1327,34 @@
! local parameters
integer :: i,j,k,ispec,iglob
- ! crust_mantle
- do ispec = 1, NSPEC_CRUST_MANTLE
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool_crust_mantle(i,j,k,ispec)
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ ! crust_mantle
+ do ispec = 1, NSPEC_CRUST_MANTLE
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool_crust_mantle(i,j,k,ispec)
- ! approximates hessian
- ! term with adjoint acceleration and backward/reconstructed acceleration
- hess_kl_crust_mantle(i,j,k,ispec) = hess_kl_crust_mantle(i,j,k,ispec) &
- + deltat * (accel_crust_mantle(1,iglob) * b_accel_crust_mantle(1,iglob) &
- + accel_crust_mantle(2,iglob) * b_accel_crust_mantle(2,iglob) &
- + accel_crust_mantle(3,iglob) * b_accel_crust_mantle(3,iglob) )
+ ! approximates hessian
+ ! term with adjoint acceleration and backward/reconstructed acceleration
+ hess_kl_crust_mantle(i,j,k,ispec) = hess_kl_crust_mantle(i,j,k,ispec) &
+ + deltat * (accel_crust_mantle(1,iglob) * b_accel_crust_mantle(1,iglob) &
+ + accel_crust_mantle(2,iglob) * b_accel_crust_mantle(2,iglob) &
+ + accel_crust_mantle(3,iglob) * b_accel_crust_mantle(3,iglob) )
+ enddo
enddo
enddo
enddo
- enddo
+ else
+ ! updates kernels on GPU
+ ! computes contribution to density and bulk modulus kernel
+ call compute_kernels_hess_cuda(Mesh_pointer,deltat)
+ endif
+
end subroutine compute_kernels_hessian
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -32,7 +32,7 @@
use specfem_par,only: &
ichunk,SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it, &
wgllwgll_xz,wgllwgll_yz,wgllwgll_xy
-
+
use specfem_par,only: GPU_MODE,Mesh_pointer
use specfem_par_crustmantle, only: &
@@ -73,8 +73,6 @@
! crust & mantle
- if( GPU_MODE ) call load_GPU_elastic()
-
! xmin
! if two chunks exclude this face for one of them
if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC) then
@@ -130,19 +128,19 @@
enddo
enddo
enddo
-
+
else
! on GPU
if( nspec2D_xmin_crust_mantle > 0 ) call compute_stacey_elastic_cuda(Mesh_pointer, &
absorb_xmin_crust_mantle, &
- 0) ! <= xmin
+ 0) ! <= xmin
endif
-
+
! writes absorbing boundary values
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmin_crust_mantle > 0 ) then
call write_abs(0,absorb_xmin_crust_mantle, reclen_xmin_crust_mantle,it)
endif
-
+
endif ! NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC
! xmax
@@ -204,15 +202,15 @@
! on GPU
if( nspec2D_xmax_crust_mantle > 0 ) call compute_stacey_elastic_cuda(Mesh_pointer, &
absorb_xmax_crust_mantle, &
- 1) ! <= xmin
+ 1) ! <= xmin
endif
-
+
! writes absorbing boundary values
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmax_crust_mantle > 0 ) then
call write_abs(1,absorb_xmax_crust_mantle,reclen_xmax_crust_mantle,it)
endif
-
+
endif ! NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AB
! ymin
@@ -272,7 +270,7 @@
! on GPU
if( nspec2D_ymin_crust_mantle > 0 ) call compute_stacey_elastic_cuda(Mesh_pointer, &
absorb_ymin_crust_mantle, &
- 2) ! <= ymin
+ 2) ! <= ymin
endif
@@ -340,7 +338,7 @@
! on GPU
if( nspec2D_ymax_crust_mantle > 0 ) call compute_stacey_elastic_cuda(Mesh_pointer, &
absorb_ymax_crust_mantle, &
- 3) ! <= ymax
+ 3) ! <= ymax
endif
! writes absorbing boundary values
@@ -348,7 +346,5 @@
call write_abs(3,absorb_ymax_crust_mantle,reclen_ymax_crust_mantle,it)
endif
- if( GPU_MODE ) call load_CPU_elastic()
-
end subroutine compute_stacey_crust_mantle
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -32,7 +32,7 @@
use specfem_par,only: &
ichunk,SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it, &
- wgllwgll_xz,wgllwgll_yz,wgllwgll_xy
+ wgllwgll_xz,wgllwgll_yz,wgllwgll_xy
use specfem_par,only: GPU_MODE,Mesh_pointer
@@ -70,8 +70,7 @@
! file access (by process rank modulo 8) showed that the following,
! simple approach is still fastest. (assuming that files are accessed on a local scratch disk)
- if( GPU_MODE ) call load_GPU_acoustic()
-
+ ! outer core
! xmin
! if two chunks exclude this face for one of them
if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC) then
@@ -116,7 +115,7 @@
! on GPU
if( nspec2D_xmin_outer_core > 0 ) call compute_stacey_acoustic_cuda(Mesh_pointer, &
absorb_xmin_outer_core, &
- 4) ! <= xmin
+ 4) ! <= xmin
endif
! writes absorbing boundary values
@@ -311,11 +310,8 @@
8) ! <= zmin
endif
-
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_zmin_outer_core > 0 ) then
call write_abs(8,absorb_zmin_outer_core,reclen_zmin,it)
endif
- if( GPU_MODE) call load_CPU_acoustic()
-
end subroutine compute_stacey_outer_core
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -147,14 +147,14 @@
! boundary kernel
if (SAVE_BOUNDARY_MESH) then
call save_kernels_boundary_kl(myrank,scale_t,scale_displ, &
- moho_kl,d400_kl,d670_kl,cmb_kl,icb_kl, &
- LOCAL_PATH,HONOR_1D_SPHERICAL_MOHO)
+ moho_kl,d400_kl,d670_kl,cmb_kl,icb_kl, &
+ LOCAL_PATH,HONOR_1D_SPHERICAL_MOHO)
endif
! approximate hessian
if( APPROXIMATE_HESS_KL ) then
call save_kernels_hessian(myrank,scale_t,scale_displ, &
- hess_kl_crust_mantle,LOCAL_PATH)
+ hess_kl_crust_mantle,LOCAL_PATH)
endif
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -100,7 +100,9 @@
endif
! write the seismograms with time shift
- call write_seismograms()
+ if( nrec_local > 0 .or. ( WRITE_SEISMOGRAMS_BY_MASTER .and. myrank == 0 ) ) then
+ call write_seismograms()
+ endif
! adjoint simulations: kernels
if( SIMULATION_TYPE == 3 ) then
@@ -116,9 +118,6 @@
! modified from the subroutine 'write_movie_surface'
if ( NOISE_TOMOGRAPHY == 1 ) then
call noise_save_surface_movie()
-! displ_crust_mantle, &
-! ibelm_top_crust_mantle,ibool_crust_mantle, &
-! NSPEC2D_TOP(IREGION_CRUST_MANTLE),noise_surface_movie,it)
endif
enddo ! end of main time loop
@@ -175,7 +174,11 @@
! local parameters
integer :: i
- ! Newmark time scheme update
+ ! updates wavefields
+ if( .not. GPU_MODE) then
+ ! on CPU
+
+ ! Newmark time scheme update
#ifdef _HANDOPT_NEWMARK
! way 2:
! One common technique in computational science to help enhance pipelining is loop unrolling
@@ -186,240 +189,263 @@
! in most cases a real (CUSTOM_REAL) value will have 4 bytes,
! assuming a default cache size of about 128 bytes, we unroll here in steps of 3, thus 29 reals or 118 bytes,
! rather than with steps of 4
- ! mantle
- if(imodulo_NGLOB_CRUST_MANTLE >= 1) then
- do i = 1,imodulo_NGLOB_CRUST_MANTLE
+ ! mantle
+ if(imodulo_NGLOB_CRUST_MANTLE >= 1) then
+ do i = 1,imodulo_NGLOB_CRUST_MANTLE
+ displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+ + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
+
+ veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
+ + deltatover2*accel_crust_mantle(:,i)
+
+ accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ enddo
+ endif
+ do i = imodulo_NGLOB_CRUST_MANTLE+1,NGLOB_CRUST_MANTLE, 3 ! in steps of 3
displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+ deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
+ displ_crust_mantle(:,i+1) = displ_crust_mantle(:,i+1) &
+ + deltat*veloc_crust_mantle(:,i+1) + deltatsqover2*accel_crust_mantle(:,i+1)
+ displ_crust_mantle(:,i+2) = displ_crust_mantle(:,i+2) &
+ + deltat*veloc_crust_mantle(:,i+2) + deltatsqover2*accel_crust_mantle(:,i+2)
+
veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
+ deltatover2*accel_crust_mantle(:,i)
+ veloc_crust_mantle(:,i+1) = veloc_crust_mantle(:,i+1) &
+ + deltatover2*accel_crust_mantle(:,i+1)
+ veloc_crust_mantle(:,i+2) = veloc_crust_mantle(:,i+2) &
+ + deltatover2*accel_crust_mantle(:,i+2)
+ ! set acceleration to zero
+ ! note: we do initialize acceleration in this loop since it is read already into the cache,
+ ! otherwise it would have to be read in again for this explicitly,
+ ! which would make this step more expensive
accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ accel_crust_mantle(:,i+1) = 0._CUSTOM_REAL
+ accel_crust_mantle(:,i+2) = 0._CUSTOM_REAL
enddo
- endif
- do i = imodulo_NGLOB_CRUST_MANTLE+1,NGLOB_CRUST_MANTLE, 3 ! in steps of 3
- displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
- + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
- displ_crust_mantle(:,i+1) = displ_crust_mantle(:,i+1) &
- + deltat*veloc_crust_mantle(:,i+1) + deltatsqover2*accel_crust_mantle(:,i+1)
- displ_crust_mantle(:,i+2) = displ_crust_mantle(:,i+2) &
- + deltat*veloc_crust_mantle(:,i+2) + deltatsqover2*accel_crust_mantle(:,i+2)
+ ! outer core
+ do i=1,NGLOB_OUTER_CORE
+ displ_outer_core(i) = displ_outer_core(i) &
+ + deltat*veloc_outer_core(i) + deltatsqover2*accel_outer_core(i)
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
- + deltatover2*accel_crust_mantle(:,i)
- veloc_crust_mantle(:,i+1) = veloc_crust_mantle(:,i+1) &
- + deltatover2*accel_crust_mantle(:,i+1)
- veloc_crust_mantle(:,i+2) = veloc_crust_mantle(:,i+2) &
- + deltatover2*accel_crust_mantle(:,i+2)
+ veloc_outer_core(i) = veloc_outer_core(i) &
+ + deltatover2*accel_outer_core(i)
- ! set acceleration to zero
- ! note: we do initialize acceleration in this loop since it is read already into the cache,
- ! otherwise it would have to be read in again for this explicitly,
- ! which would make this step more expensive
- accel_crust_mantle(:,i) = 0._CUSTOM_REAL
- accel_crust_mantle(:,i+1) = 0._CUSTOM_REAL
- accel_crust_mantle(:,i+2) = 0._CUSTOM_REAL
- enddo
+ accel_outer_core(i) = 0._CUSTOM_REAL
+ enddo
- ! outer core
- do i=1,NGLOB_OUTER_CORE
- displ_outer_core(i) = displ_outer_core(i) &
- + deltat*veloc_outer_core(i) + deltatsqover2*accel_outer_core(i)
+ ! inner core
+ if(imodulo_NGLOB_INNER_CORE >= 1) then
+ do i = 1,imodulo_NGLOB_INNER_CORE
+ displ_inner_core(:,i) = displ_inner_core(:,i) &
+ + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
- veloc_outer_core(i) = veloc_outer_core(i) &
- + deltatover2*accel_outer_core(i)
+ veloc_inner_core(:,i) = veloc_inner_core(:,i) &
+ + deltatover2*accel_inner_core(:,i)
- accel_outer_core(i) = 0._CUSTOM_REAL
- enddo
-
- ! inner core
- if(imodulo_NGLOB_INNER_CORE >= 1) then
- do i = 1,imodulo_NGLOB_INNER_CORE
+ accel_inner_core(:,i) = 0._CUSTOM_REAL
+ enddo
+ endif
+ do i = imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE, 3 ! in steps of 3
displ_inner_core(:,i) = displ_inner_core(:,i) &
+ deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
+ displ_inner_core(:,i+1) = displ_inner_core(:,i+1) &
+ + deltat*veloc_inner_core(:,i+1) + deltatsqover2*accel_inner_core(:,i+1)
+ displ_inner_core(:,i+2) = displ_inner_core(:,i+2) &
+ + deltat*veloc_inner_core(:,i+2) + deltatsqover2*accel_inner_core(:,i+2)
+
veloc_inner_core(:,i) = veloc_inner_core(:,i) &
+ deltatover2*accel_inner_core(:,i)
+ veloc_inner_core(:,i+1) = veloc_inner_core(:,i+1) &
+ + deltatover2*accel_inner_core(:,i+1)
+ veloc_inner_core(:,i+2) = veloc_inner_core(:,i+2) &
+ + deltatover2*accel_inner_core(:,i+2)
accel_inner_core(:,i) = 0._CUSTOM_REAL
+ accel_inner_core(:,i+1) = 0._CUSTOM_REAL
+ accel_inner_core(:,i+2) = 0._CUSTOM_REAL
enddo
- endif
- do i = imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE, 3 ! in steps of 3
- displ_inner_core(:,i) = displ_inner_core(:,i) &
- + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
- displ_inner_core(:,i+1) = displ_inner_core(:,i+1) &
- + deltat*veloc_inner_core(:,i+1) + deltatsqover2*accel_inner_core(:,i+1)
- displ_inner_core(:,i+2) = displ_inner_core(:,i+2) &
- + deltat*veloc_inner_core(:,i+2) + deltatsqover2*accel_inner_core(:,i+2)
-
-
- veloc_inner_core(:,i) = veloc_inner_core(:,i) &
- + deltatover2*accel_inner_core(:,i)
- veloc_inner_core(:,i+1) = veloc_inner_core(:,i+1) &
- + deltatover2*accel_inner_core(:,i+1)
- veloc_inner_core(:,i+2) = veloc_inner_core(:,i+2) &
- + deltatover2*accel_inner_core(:,i+2)
-
- accel_inner_core(:,i) = 0._CUSTOM_REAL
- accel_inner_core(:,i+1) = 0._CUSTOM_REAL
- accel_inner_core(:,i+2) = 0._CUSTOM_REAL
- enddo
-
#else
! way 1:
- ! mantle
- do i=1,NGLOB_CRUST_MANTLE
- displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
- + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
- + deltatover2*accel_crust_mantle(:,i)
- accel_crust_mantle(:,i) = 0._CUSTOM_REAL
- enddo
- ! outer core
- do i=1,NGLOB_OUTER_CORE
- displ_outer_core(i) = displ_outer_core(i) &
- + deltat*veloc_outer_core(i) + deltatsqover2*accel_outer_core(i)
- veloc_outer_core(i) = veloc_outer_core(i) &
- + deltatover2*accel_outer_core(i)
- accel_outer_core(i) = 0._CUSTOM_REAL
- enddo
- ! inner core
- do i=1,NGLOB_INNER_CORE
- displ_inner_core(:,i) = displ_inner_core(:,i) &
- + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
- veloc_inner_core(:,i) = veloc_inner_core(:,i) &
- + deltatover2*accel_inner_core(:,i)
- accel_inner_core(:,i) = 0._CUSTOM_REAL
- enddo
+ ! mantle
+ do i=1,NGLOB_CRUST_MANTLE
+ displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+ + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
+ veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
+ + deltatover2*accel_crust_mantle(:,i)
+ accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ enddo
+ ! outer core
+ do i=1,NGLOB_OUTER_CORE
+ displ_outer_core(i) = displ_outer_core(i) &
+ + deltat*veloc_outer_core(i) + deltatsqover2*accel_outer_core(i)
+ veloc_outer_core(i) = veloc_outer_core(i) &
+ + deltatover2*accel_outer_core(i)
+ accel_outer_core(i) = 0._CUSTOM_REAL
+ enddo
+ ! inner core
+ do i=1,NGLOB_INNER_CORE
+ displ_inner_core(:,i) = displ_inner_core(:,i) &
+ + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
+ veloc_inner_core(:,i) = veloc_inner_core(:,i) &
+ + deltatover2*accel_inner_core(:,i)
+ accel_inner_core(:,i) = 0._CUSTOM_REAL
+ enddo
#endif
+ ! backward field
+ if (SIMULATION_TYPE == 3) then
-
-
- ! backward field
- if (SIMULATION_TYPE == 3) then
-
#ifdef _HANDOPT_NEWMARK
! way 2:
- ! mantle
- if(imodulo_NGLOB_CRUST_MANTLE >= 1) then
- do i=1,imodulo_NGLOB_CRUST_MANTLE
+ ! mantle
+ if(imodulo_NGLOB_CRUST_MANTLE >= 1) then
+ do i=1,imodulo_NGLOB_CRUST_MANTLE
+ b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
+ + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
+ b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
+ + b_deltatover2*b_accel_crust_mantle(:,i)
+ b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ enddo
+ endif
+ do i=imodulo_NGLOB_CRUST_MANTLE+1,NGLOB_CRUST_MANTLE,3
b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
+ b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
+ b_displ_crust_mantle(:,i+1) = b_displ_crust_mantle(:,i+1) &
+ + b_deltat*b_veloc_crust_mantle(:,i+1) + b_deltatsqover2*b_accel_crust_mantle(:,i+1)
+ b_displ_crust_mantle(:,i+2) = b_displ_crust_mantle(:,i+2) &
+ + b_deltat*b_veloc_crust_mantle(:,i+2) + b_deltatsqover2*b_accel_crust_mantle(:,i+2)
+
+
b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
+ b_deltatover2*b_accel_crust_mantle(:,i)
+ b_veloc_crust_mantle(:,i+1) = b_veloc_crust_mantle(:,i+1) &
+ + b_deltatover2*b_accel_crust_mantle(:,i+1)
+ b_veloc_crust_mantle(:,i+2) = b_veloc_crust_mantle(:,i+2) &
+ + b_deltatover2*b_accel_crust_mantle(:,i+2)
+
b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ b_accel_crust_mantle(:,i+1) = 0._CUSTOM_REAL
+ b_accel_crust_mantle(:,i+2) = 0._CUSTOM_REAL
enddo
- endif
- do i=imodulo_NGLOB_CRUST_MANTLE+1,NGLOB_CRUST_MANTLE,3
- b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
- + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
- b_displ_crust_mantle(:,i+1) = b_displ_crust_mantle(:,i+1) &
- + b_deltat*b_veloc_crust_mantle(:,i+1) + b_deltatsqover2*b_accel_crust_mantle(:,i+1)
- b_displ_crust_mantle(:,i+2) = b_displ_crust_mantle(:,i+2) &
- + b_deltat*b_veloc_crust_mantle(:,i+2) + b_deltatsqover2*b_accel_crust_mantle(:,i+2)
+ ! outer core
+ do i=1,NGLOB_OUTER_CORE
+ b_displ_outer_core(i) = b_displ_outer_core(i) &
+ + b_deltat*b_veloc_outer_core(i) + b_deltatsqover2*b_accel_outer_core(i)
+ b_veloc_outer_core(i) = b_veloc_outer_core(i) &
+ + b_deltatover2*b_accel_outer_core(i)
+ b_accel_outer_core(i) = 0._CUSTOM_REAL
+ enddo
- b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
- + b_deltatover2*b_accel_crust_mantle(:,i)
- b_veloc_crust_mantle(:,i+1) = b_veloc_crust_mantle(:,i+1) &
- + b_deltatover2*b_accel_crust_mantle(:,i+1)
- b_veloc_crust_mantle(:,i+2) = b_veloc_crust_mantle(:,i+2) &
- + b_deltatover2*b_accel_crust_mantle(:,i+2)
+ ! inner core
+ if(imodulo_NGLOB_INNER_CORE >= 1) then
+ do i=1,imodulo_NGLOB_INNER_CORE
+ b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
+ + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
+ b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
+ + b_deltatover2*b_accel_inner_core(:,i)
+ b_accel_inner_core(:,i) = 0._CUSTOM_REAL
+ enddo
+ endif
+ do i=imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE,3
+ b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
+ + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
+ b_displ_inner_core(:,i+1) = b_displ_inner_core(:,i+1) &
+ + b_deltat*b_veloc_inner_core(:,i+1) + b_deltatsqover2*b_accel_inner_core(:,i+1)
+ b_displ_inner_core(:,i+2) = b_displ_inner_core(:,i+2) &
+ + b_deltat*b_veloc_inner_core(:,i+2) + b_deltatsqover2*b_accel_inner_core(:,i+2)
- b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
- b_accel_crust_mantle(:,i+1) = 0._CUSTOM_REAL
- b_accel_crust_mantle(:,i+2) = 0._CUSTOM_REAL
- enddo
+ b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
+ + b_deltatover2*b_accel_inner_core(:,i)
+ b_veloc_inner_core(:,i+1) = b_veloc_inner_core(:,i+1) &
+ + b_deltatover2*b_accel_inner_core(:,i+1)
+ b_veloc_inner_core(:,i+2) = b_veloc_inner_core(:,i+2) &
+ + b_deltatover2*b_accel_inner_core(:,i+2)
- ! outer core
- do i=1,NGLOB_OUTER_CORE
- b_displ_outer_core(i) = b_displ_outer_core(i) &
- + b_deltat*b_veloc_outer_core(i) + b_deltatsqover2*b_accel_outer_core(i)
- b_veloc_outer_core(i) = b_veloc_outer_core(i) &
- + b_deltatover2*b_accel_outer_core(i)
- b_accel_outer_core(i) = 0._CUSTOM_REAL
- enddo
-
- ! inner core
- if(imodulo_NGLOB_INNER_CORE >= 1) then
- do i=1,imodulo_NGLOB_INNER_CORE
+ b_accel_inner_core(:,i) = 0._CUSTOM_REAL
+ b_accel_inner_core(:,i+1) = 0._CUSTOM_REAL
+ b_accel_inner_core(:,i+2) = 0._CUSTOM_REAL
+ enddo
+#else
+! way 1:
+ ! mantle
+ do i=1,NGLOB_CRUST_MANTLE
+ b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
+ + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
+ b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
+ + b_deltatover2*b_accel_crust_mantle(:,i)
+ b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ enddo
+ ! outer core
+ do i=1,NGLOB_OUTER_CORE
+ b_displ_outer_core(i) = b_displ_outer_core(i) &
+ + b_deltat*b_veloc_outer_core(i) + b_deltatsqover2*b_accel_outer_core(i)
+ b_veloc_outer_core(i) = b_veloc_outer_core(i) &
+ + b_deltatover2*b_accel_outer_core(i)
+ b_accel_outer_core(i) = 0._CUSTOM_REAL
+ enddo
+ ! inner core
+ do i=1,NGLOB_INNER_CORE
b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
+ b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
+ b_deltatover2*b_accel_inner_core(:,i)
b_accel_inner_core(:,i) = 0._CUSTOM_REAL
enddo
- endif
- do i=imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE,3
- b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
- + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
- b_displ_inner_core(:,i+1) = b_displ_inner_core(:,i+1) &
- + b_deltat*b_veloc_inner_core(:,i+1) + b_deltatsqover2*b_accel_inner_core(:,i+1)
- b_displ_inner_core(:,i+2) = b_displ_inner_core(:,i+2) &
- + b_deltat*b_veloc_inner_core(:,i+2) + b_deltatsqover2*b_accel_inner_core(:,i+2)
+#endif
+ endif ! SIMULATION_TYPE == 3
+ else
+ ! on GPU
+ ! Includes SIM_TYPE 1 & 3
- b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
- + b_deltatover2*b_accel_inner_core(:,i)
- b_veloc_inner_core(:,i+1) = b_veloc_inner_core(:,i+1) &
- + b_deltatover2*b_accel_inner_core(:,i+1)
- b_veloc_inner_core(:,i+2) = b_veloc_inner_core(:,i+2) &
- + b_deltatover2*b_accel_inner_core(:,i+2)
+ ! outer core region
+ call it_update_displacement_oc_cuda(Mesh_pointer, &
+ deltat, deltatsqover2, deltatover2, &
+ b_deltat, b_deltatsqover2, b_deltatover2)
+ ! inner core region
+ call it_update_displacement_ic_cuda(Mesh_pointer, &
+ deltat, deltatsqover2, deltatover2, &
+ b_deltat, b_deltatsqover2, b_deltatover2)
- b_accel_inner_core(:,i) = 0._CUSTOM_REAL
- b_accel_inner_core(:,i+1) = 0._CUSTOM_REAL
- b_accel_inner_core(:,i+2) = 0._CUSTOM_REAL
- enddo
-#else
-! way 1:
- ! mantle
- do i=1,NGLOB_CRUST_MANTLE
- b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
- + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
- b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
- + b_deltatover2*b_accel_crust_mantle(:,i)
- b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
- enddo
- ! outer core
- do i=1,NGLOB_OUTER_CORE
- b_displ_outer_core(i) = b_displ_outer_core(i) &
- + b_deltat*b_veloc_outer_core(i) + b_deltatsqover2*b_accel_outer_core(i)
- b_veloc_outer_core(i) = b_veloc_outer_core(i) &
- + b_deltatover2*b_accel_outer_core(i)
- b_accel_outer_core(i) = 0._CUSTOM_REAL
- enddo
- ! inner core
- do i=1,NGLOB_INNER_CORE
- b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
- + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
- b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
- + b_deltatover2*b_accel_inner_core(:,i)
- b_accel_inner_core(:,i) = 0._CUSTOM_REAL
- enddo
-#endif
- endif ! SIMULATION_TYPE == 3
+ ! crust/mantle region
+ call it_update_displacement_cm_cuda(Mesh_pointer, &
+ deltat, deltatsqover2, deltatover2, &
+ b_deltat, b_deltatsqover2, b_deltatover2)
+ endif
! integral of strain for adjoint movie volume
if(MOVIE_VOLUME .and. (MOVIE_VOLUME_TYPE == 2 .or. MOVIE_VOLUME_TYPE == 3) ) then
-! Iepsilondev_crust_mantle(:,:,:,:,:) = Iepsilondev_crust_mantle(:,:,:,:,:) &
-! + deltat*epsilondev_crust_mantle(:,:,:,:,:)
+ if( GPU_MODE ) then
+ ! transfers strain arrays onto CPU
+ call transfer_strain_cm_from_device(Mesh_pointer,eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle, &
+ epsilondev_xy_crust_mantle,epsilondev_xz_crust_mantle, &
+ epsilondev_yz_crust_mantle)
+ endif
+
+ ! updates integral values
Iepsilondev_crust_mantle(1,:,:,:,:) = Iepsilondev_crust_mantle(1,:,:,:,:) &
- + deltat*epsilondev_xx_crust_mantle(:,:,:,:)
+ + deltat*epsilondev_xx_crust_mantle(:,:,:,:)
Iepsilondev_crust_mantle(2,:,:,:,:) = Iepsilondev_crust_mantle(2,:,:,:,:) &
- + deltat*epsilondev_yy_crust_mantle(:,:,:,:)
+ + deltat*epsilondev_yy_crust_mantle(:,:,:,:)
Iepsilondev_crust_mantle(3,:,:,:,:) = Iepsilondev_crust_mantle(3,:,:,:,:) &
- + deltat*epsilondev_xy_crust_mantle(:,:,:,:)
+ + deltat*epsilondev_xy_crust_mantle(:,:,:,:)
Iepsilondev_crust_mantle(4,:,:,:,:) = Iepsilondev_crust_mantle(4,:,:,:,:) &
- + deltat*epsilondev_xz_crust_mantle(:,:,:,:)
+ + deltat*epsilondev_xz_crust_mantle(:,:,:,:)
Iepsilondev_crust_mantle(5,:,:,:,:) = Iepsilondev_crust_mantle(5,:,:,:,:) &
- + deltat*epsilondev_yz_crust_mantle(:,:,:,:)
+ + deltat*epsilondev_yz_crust_mantle(:,:,:,:)
Ieps_trace_over_3_crust_mantle(:,:,:,:) = Ieps_trace_over_3_crust_mantle(:,:,:,:) &
- + deltat*eps_trace_over_3_crust_mantle(:,:,:,:)
+ + deltat*eps_trace_over_3_crust_mantle(:,:,:,:)
endif
+
+
end subroutine it_update_displacement_scheme
@@ -477,8 +503,62 @@
! transfers fields on GPU back onto CPU
use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
implicit none
+ ! to store forward wave fields
+ if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+
+ call transfer_fields_cm_from_device(NDIM*NGLOB_CRUST_MANTLE, &
+ displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
+ Mesh_pointer)
+ call transfer_fields_ic_from_device(NDIM*NGLOB_INNER_CORE, &
+ displ_inner_core,veloc_inner_core,accel_inner_core, &
+ Mesh_pointer)
+ call transfer_fields_oc_from_device(NGLOB_OUTER_CORE, &
+ displ_outer_core,veloc_outer_core,accel_outer_core, &
+ Mesh_pointer)
+
+ call transfer_strain_cm_from_device(Mesh_pointer, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle, &
+ epsilondev_xy_crust_mantle,epsilondev_xz_crust_mantle, &
+ epsilondev_yz_crust_mantle)
+ call transfer_strain_ic_from_device(Mesh_pointer, &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core, &
+ epsilondev_xy_inner_core,epsilondev_xz_inner_core, &
+ epsilondev_yz_inner_core)
+
+ if (ROTATION_VAL) then
+ call transfer_rotation_from_device(Mesh_pointer,A_array_rotation,B_array_rotation)
+ endif
+
+ ! note: for kernel simulations (SIMULATION_TYPE == 3), attenuation is
+ ! only mimicking effects on phase shifts, but not on amplitudes.
+ ! flag USE_ATTENUATION_MIMIC will have to be set to true in this case.
+ !
+ ! arrays b_R_xx, ... are not used when USE_ATTENUATION_MIMIC is set,
+ ! therefore no need to transfer arrays from GPU to CPU
+ !if (ATTENUATION) then
+ !endif
+
+ else if (SIMULATION_TYPE == 3) then
+ ! to store kernels
+ !call transfer_kernels_ac_to_host(Mesh_pointer,rho_ac_kl,kappa_ac_kl,NSPEC_AB)
+ !call transfer_kernels_el_to_host(Mesh_pointer,rho_kl,mu_kl,kappa_kl,NSPEC_AB)
+
+ ! specific noise strength kernel
+ if( NOISE_TOMOGRAPHY == 3 ) then
+ !call transfer_kernels_noise_to_host(Mesh_pointer,Sigma_kl,NSPEC_AB)
+ endif
+
+ ! approximative hessian for preconditioning kernels
+ if ( APPROXIMATE_HESS_KL ) then
+ call transfer_kernels_hess_cm_tohost(Mesh_pointer,hess_kl_crust_mantle,NSPEC_CRUST_MANTLE)
+ endif
+ endif
+
! frees allocated memory on GPU
call prepare_cleanup_device(Mesh_pointer)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -703,7 +703,7 @@
! main process broadcasts the results to all the slices
call MPI_BCAST(nrec,1,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
-
+
call MPI_BCAST(islice_selected_rec,nrec,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
call MPI_BCAST(ispec_selected_rec,nrec,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
call MPI_BCAST(xi_receiver,nrec,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/noise_tomography.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/noise_tomography.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/noise_tomography.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -417,10 +417,10 @@
! furthermore, the CMTSOLUTION needs to be zero, i.e., no earthquakes.
! now this must be manually set in DATA/CMTSOLUTION, by USERS.
- use specfem_par
+ use specfem_par
use specfem_par_crustmantle
implicit none
-
+
! local parameters
integer :: i,j,k,iglob
@@ -442,11 +442,7 @@
else
! on GPU
- call load_GPU_elastic()
-
- call noise_add_source_master_rec_cu(Mesh_pointer,it,irec_master_noise,islice_selected_rec)
-
- call load_CPU_elastic()
+ call noise_add_source_master_rec_cu(Mesh_pointer,it,irec_master_noise,islice_selected_rec)
endif
end subroutine noise_add_source_master_rec
@@ -475,7 +471,7 @@
! local parameters
integer :: ispec2D,ispec,i,j,k,iglob
-
+
! get coordinates of surface mesh and surface displacement
if( .not. GPU_MODE ) then
! on CPU
@@ -493,7 +489,7 @@
! on GPU
call noise_transfer_surface_to_host(Mesh_pointer,noise_surface_movie)
endif
-
+
! save surface motion to disk
call write_abs(9,noise_surface_movie,CUSTOM_REAL*NDIM*NGLLX*NGLLY*nspec_top,it)
@@ -522,17 +518,17 @@
use specfem_par
use specfem_par_crustmantle
-
+
implicit none
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE),intent(inout) :: accel
integer,intent(in) :: it_index
-
+
! local parameters
integer :: ipoin,ispec2D,ispec,i,j,k,iglob
real(kind=CUSTOM_REAL) :: eta
-
-
+
+
! read surface movie
call read_abs(9,noise_surface_movie,CUSTOM_REAL*NDIM*NGLLX*NGLLY*nspec_top,it_index)
@@ -572,7 +568,7 @@
else
! on GPU
- call noise_add_surface_movie_cuda(Mesh_pointer,noise_surface_movie)
+ call noise_add_surface_movie_cuda(Mesh_pointer,noise_surface_movie)
endif
end subroutine noise_read_add_surface_movie
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -30,12 +30,12 @@
use specfem_par
use specfem_par_movie
implicit none
-
+
include 'mpif.h'
! get MPI starting time
time_start = MPI_WTIME()
-
+
! user output infos
call prepare_timerun_user_output()
@@ -44,7 +44,7 @@
! convert x/y/z into r/theta/phi spherical coordinates
call prepare_timerun_convert_coord()
-
+
! allocate files to save movies
! for noise tomography, store_val_x/y/z/ux/uy/uz needed for 'surface movie'
if(MOVIE_SURFACE .or. NOISE_TOMOGRAPHY /= 0 ) then
@@ -310,13 +310,13 @@
use specfem_par
use specfem_par_crustmantle
use specfem_par_innercore
- use specfem_par_outercore
+ use specfem_par_outercore
implicit none
! local parameters
integer :: i
real(kind=CUSTOM_REAL) :: rval,thetaval,phival
-
+
! change x, y, z to r, theta and phi once and for all
! IMPROVE dangerous: old name kept (xstore ystore zstore) for new values
@@ -361,7 +361,7 @@
use specfem_par
use specfem_par_movie
implicit none
-
+
! local parameters
integer :: ier
@@ -415,7 +415,7 @@
use specfem_par_crustmantle
use specfem_par_movie
implicit none
-
+
! local parameters
integer :: ier
@@ -815,7 +815,7 @@
subroutine prepare_timerun_init_wavefield()
! initializes arrays
-
+
use specfem_par
use specfem_par_crustmantle
use specfem_par_innercore
@@ -1082,7 +1082,8 @@
ANISOTROPIC_3D_MANTLE_VAL, &
ANISOTROPIC_INNER_CORE_VAL, &
SAVE_BOUNDARY_MESH, &
- USE_MESH_COLORING_GPU)
+ USE_MESH_COLORING_GPU, &
+ APPROXIMATE_HESS_KL)
call sync_all()
! prepares rotation arrays
@@ -1209,13 +1210,13 @@
jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core, &
jacobian2D_bottom_outer_core, &
vp_outer_core)
-
+
endif
call sync_all()
-
+
! prepares MPI interfaces
if(myrank == 0 ) write(IMAIN,*) " loading mpi interfaces"
-
+
call prepare_mpi_buffers_device(Mesh_pointer, &
num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_forward_arrays.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_forward_arrays.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_forward_arrays.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -73,7 +73,7 @@
if(NUMBER_OF_THIS_RUN > 1) then
write(outputname,"('dump_all_arrays',i6.6)") myrank
open(unit=55,file=trim(LOCAL_PATH)//'/'//outputname,status='old',action='read',form='unformatted')
-
+
read(55) displ_crust_mantle
read(55) veloc_crust_mantle
read(55) accel_crust_mantle
@@ -175,11 +175,18 @@
implicit none
!local parameters
+ integer :: ier
character(len=150) outputname
write(outputname,'(a,i6.6,a)') 'proc',myrank,'_save_forward_arrays.bin'
- open(unit=55,file=trim(LOCAL_PATH)//'/'//outputname,status='old',action='read',form='unformatted')
-
+ open(unit=55,file=trim(LOCAL_PATH)//'/'//outputname, &
+ status='old',action='read',form='unformatted',iostat=ier)
+ if( ier /= 0 ) then
+ print*,'error: opening proc_****_save_forward_arrays.bin'
+ print*,'path: ',trim(LOCAL_PATH)//'/'//outputname
+ call exit_mpi(myrank,'error open file save_forward_arrays.bin')
+ endif
+
read(55) b_displ_crust_mantle
read(55) b_veloc_crust_mantle
read(55) b_accel_crust_mantle
@@ -202,10 +209,41 @@
read(55) b_epsilondev_xz_inner_core
read(55) b_epsilondev_yz_inner_core
+ ! transfers fields onto GPU
+ if(GPU_MODE) then
+ call transfer_b_fields_cm_to_device(NDIM*NGLOB_CRUST_MANTLE, &
+ b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
+ Mesh_pointer)
+
+ call transfer_b_fields_ic_to_device(NDIM*NGLOB_INNER_CORE, &
+ b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
+ Mesh_pointer)
+
+ call transfer_b_fields_oc_to_device(NGLOB_OUTER_CORE, &
+ b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core, &
+ Mesh_pointer)
+
+ call transfer_b_strain_cm_to_device(Mesh_pointer, &
+ b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle, &
+ b_epsilondev_xy_crust_mantle,b_epsilondev_xz_crust_mantle, &
+ b_epsilondev_yz_crust_mantle)
+
+ call transfer_b_strain_ic_to_device(Mesh_pointer, &
+ b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core, &
+ b_epsilondev_xy_inner_core,b_epsilondev_xz_inner_core, &
+ b_epsilondev_yz_inner_core)
+ endif
+
+
if (ROTATION_VAL) then
read(55) b_A_array_rotation
read(55) b_B_array_rotation
+ ! transfers to GPU
+ if(GPU_MODE) then
+ call transfer_b_rotation_to_device(Mesh_pointer,b_A_array_rotation,b_B_array_rotation)
+ endif
endif
+
if (ATTENUATION_VAL) then
read(55) b_R_xx_crust_mantle
read(55) b_R_yy_crust_mantle
@@ -219,8 +257,17 @@
read(55) b_R_xz_inner_core
read(55) b_R_yz_inner_core
+ ! note: for kernel simulations (SIMULATION_TYPE == 3), attenuation is
+ ! only mimicking effects on phase shifts, but not on amplitudes.
+ ! flag USE_ATTENUATION_MIMIC will have to be set to true in this case.
+ !
+ ! arrays b_R_xx, ... are not used when USE_ATTENUATION_MIMIC is set,
+ ! therefore no need to transfer arrays onto GPU
+ !if(GPU_MODE) then
+ !endif
+
endif
-
+
close(55)
end subroutine read_forward_arrays
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -34,7 +34,7 @@
implicit none
include 'mpif.h'
-
+
! local parameters
integer :: ier
@@ -53,16 +53,16 @@
! reads "solver_data_1.bin" & "solver_data_2.bin" files for crust and mantle
call read_mesh_databases_CM()
-
+
! reads "solver_data_1.bin" & "solver_data_2.bin" files for outer core
call read_mesh_databases_OC()
-
+
! reads "solver_data_1.bin" & "solver_data_2.bin" files for inner core
call read_mesh_databases_IC()
! reads "boundary.bin" files to couple mantle with outer core and inner core boundaries
call read_mesh_databases_coupling()
-
+
! reads "addressing.txt" 2-D addressing for summation between slices with MPI
call read_mesh_databases_addressing()
@@ -89,7 +89,7 @@
write(IMAIN,*) 'Elapsed time for reading mesh in seconds = ',sngl(tCPU)
write(IMAIN,*)
endif
-
+
! frees temporary allocated arrays
deallocate(is_on_a_slice_edge_crust_mantle, &
is_on_a_slice_edge_outer_core, &
@@ -134,7 +134,7 @@
READ_KAPPA_MU = .true.
READ_TISO = .true.
endif
-
+
call read_arrays_solver(IREGION_CRUST_MANTLE,myrank, &
rho_vp_crust_mantle,rho_vs_crust_mantle, &
xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
@@ -181,13 +181,13 @@
! local parameters
integer :: nspec_iso,nspec_tiso,nspec_ani
logical :: READ_KAPPA_MU,READ_TISO
-
+
! dummy array that does not need to be actually read
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
logical, dimension(:),allocatable:: dummy_ispec_is_tiso
integer, dimension(:),allocatable :: dummy_idoubling_outer_core
-
+
! outer core (no anisotropy nor S velocity)
! rmass_ocean_load is not used in this routine because it is meaningless in the outer core
READ_KAPPA_MU = .false.
@@ -195,7 +195,7 @@
nspec_iso = NSPEC_OUTER_CORE
nspec_tiso = 1
nspec_ani = 1
-
+
! dummy allocation
allocate(dummy_ispec_is_tiso(NSPEC_OUTER_CORE))
allocate(dummy_idoubling_outer_core(NSPEC_OUTER_CORE))
@@ -251,7 +251,7 @@
! dummy array that does not need to be actually read
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
logical, dimension(:),allocatable:: dummy_ispec_is_tiso
-
+
! inner core (no anisotropy)
! rmass_ocean_load is not used in this routine because it is meaningless in the inner core
READ_KAPPA_MU = .true.
@@ -263,10 +263,10 @@
else
nspec_ani = 1
endif
-
+
! dummy allocation
allocate(dummy_ispec_is_tiso(NSPEC_INNER_CORE))
-
+
call read_arrays_solver(IREGION_INNER_CORE,myrank, &
dummy_array,dummy_array, &
xstore_inner_core,ystore_inner_core,zstore_inner_core, &
@@ -313,7 +313,7 @@
implicit none
include 'mpif.h'
-
+
! local parameters
integer njunk1,njunk2,njunk3
@@ -595,7 +595,7 @@
use specfem_par
use specfem_par_crustmantle
use specfem_par_innercore
- use specfem_par_outercore
+ use specfem_par_outercore
implicit none
! local parameters
@@ -606,7 +606,7 @@
! mantle and crust
if(myrank == 0) write(IMAIN,*) 'crust/mantle region:'
-
+
call read_arrays_buffers_solver(IREGION_CRUST_MANTLE,myrank,iboolleft_xi_crust_mantle, &
iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
@@ -634,7 +634,7 @@
! inner core
if(myrank == 0) write(IMAIN,*) 'inner core region:'
-
+
call read_arrays_buffers_solver(IREGION_INNER_CORE,myrank, &
iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
@@ -721,11 +721,11 @@
! note: fix_... routines below update is_on_a_slice_edge_.. arrays:
! assign flags for each element which is on a rim of the slice
- ! thus, they include elements on top and bottom not shared with other MPI partitions
+ ! thus, they include elements on top and bottom not shared with other MPI partitions
!
! we will re-set these flags when setting up inner/outer elements, but will
- ! use these arrays for now as initial guess for the search for elements which share a global point
- ! between different MPI processes
+ ! use these arrays for now as initial guess for the search for elements which share a global point
+ ! between different MPI processes
call fix_non_blocking_slices(is_on_a_slice_edge_crust_mantle,iboolright_xi_crust_mantle, &
iboolleft_xi_crust_mantle,iboolright_eta_crust_mantle,iboolleft_eta_crust_mantle, &
npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle,ibool_crust_mantle, &
@@ -748,31 +748,31 @@
idoubling_inner_core,npoin2D_cube_from_slices,ibool_central_cube, &
NSPEC2D_BOTTOM(IREGION_INNER_CORE),ichunk)
endif
-
+
! debug: saves element flags
! crust mantle
- !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_crust_mantle_proc',myrank
+ !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_crust_mantle_proc',myrank
!call write_VTK_data_elem_l(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
! xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
! ibool_crust_mantle, &
- ! is_on_a_slice_edge_crust_mantle,filename)
+ ! is_on_a_slice_edge_crust_mantle,filename)
! outer core
- !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_outer_core_proc',myrank
+ !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_outer_core_proc',myrank
!call write_VTK_data_elem_l(NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
! xstore_outer_core,ystore_outer_core,zstore_outer_core, &
! ibool_outer_core, &
! is_on_a_slice_edge_outer_core,filename)
-!daniel
+!daniel
! inner core
- write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_inner_core_proc',myrank
+ write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_inner_core_proc',myrank
call write_VTK_data_elem_l(NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
xstore_inner_core,ystore_inner_core,zstore_inner_core, &
ibool_inner_core, &
is_on_a_slice_edge_inner_core,filename)
-
+
end subroutine read_mesh_databases_MPIbuffers
-
-
+
+
!
!-------------------------------------------------------------------------------------------------
!
@@ -784,29 +784,29 @@
use specfem_par
use specfem_par_crustmantle
use specfem_par_innercore
- use specfem_par_outercore
+ use specfem_par_outercore
implicit none
include 'mpif.h'
-
- ! local parameters
+
+ ! local parameters
integer :: ier,ndim_assemble
character(len=150) :: filename
! temporary buffers for send and receive between faces of the slices and the chunks
real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC) :: &
buffer_send_faces_scalar,buffer_received_faces_scalar
-
+
! assigns initial maximum arrays
! for global slices, maximum number of neighbor is around 17 ( 8 horizontal, max of 8 on bottom )
integer, parameter :: MAX_NEIGHBOURS = 8 + NCORNERSCHUNKS_VAL
integer, dimension(MAX_NEIGHBOURS) :: my_neighbours,nibool_neighbours
integer, dimension(:,:),allocatable :: ibool_neighbours
- integer :: max_nibool
+ integer :: max_nibool
real(kind=CUSTOM_REAL),dimension(:),allocatable :: test_flag
real(kind=CUSTOM_REAL),dimension(:),allocatable :: test_flag_cc
integer :: i,j,k,ispec,iglob
-
+
! estimates initial maximum ibool array
max_nibool = npoin2D_max_all_CM_IC * NUMFACES_SHARED &
+ non_zero_nb_msgs_theor_in_cube*npoin2D_cube_from_slices
@@ -815,13 +815,13 @@
if( ier /= 0 ) call exit_mpi(myrank,'error allocating ibool_neighbours')
-! sets up MPI interfaces
-! crust mantle region
+! sets up MPI interfaces
+! crust mantle region
if( myrank == 0 ) write(IMAIN,*) 'crust mantle mpi:'
allocate(test_flag(NGLOB_CRUST_MANTLE), &
stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag')
-
+
! sets flag to rank id (+1 to avoid problems with zero rank)
test_flag(:) = myrank + 1.0
@@ -839,7 +839,7 @@
NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_XY,NCHUNKS_VAL)
-
+
! removes own myrank id (+1)
test_flag(:) = test_flag(:) - ( myrank + 1.0)
@@ -848,21 +848,21 @@
!call write_VTK_glob_points(NGLOB_CRUST_MANTLE, &
! xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
! test_flag,filename)
-
- ! determines neighbor rank for shared faces
+
+ ! determines neighbor rank for shared faces
call rmd_get_MPI_interfaces(myrank,NGLOB_CRUST_MANTLE,NSPEC_CRUST_MANTLE, &
test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
max_nibool,MAX_NEIGHBOURS, &
ibool_crust_mantle,&
is_on_a_slice_edge_crust_mantle, &
- IREGION_CRUST_MANTLE,.false.)
+ IREGION_CRUST_MANTLE,.false.)
deallocate(test_flag)
-
+
! stores MPI interfaces informations
allocate(my_neighbours_crust_mantle(num_interfaces_crust_mantle), &
- nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
+ nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_crust_mantle etc.')
@@ -877,13 +877,13 @@
! number of global ibool entries on each interface
nibool_interfaces_crust_mantle(:) = nibool_neighbours(1:num_interfaces_crust_mantle)
! global iglob point ids on each interface
- ibool_interfaces_crust_mantle(:,:) = ibool_neighbours(1:max_nibool_interfaces_crust_mantle,1:num_interfaces_crust_mantle)
+ ibool_interfaces_crust_mantle(:,:) = ibool_neighbours(1:max_nibool_interfaces_crust_mantle,1:num_interfaces_crust_mantle)
else
! dummy allocation (fortran90 should allow allocate statement with zero array size)
max_nibool_interfaces_crust_mantle = 0
allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)
endif
-
+
! debug: saves 1. MPI interface
!if( num_interfaces_crust_mantle >= 1 ) then
! write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_1_points_crust_mantle_proc',myrank
@@ -894,13 +894,13 @@
!endif
-! outer core region
+! outer core region
if( myrank == 0 ) write(IMAIN,*) 'outer core mpi:'
allocate(test_flag(NGLOB_OUTER_CORE), &
stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag outer core')
-
+
! sets flag to rank id (+1 to avoid problems with zero rank)
test_flag(:) = myrank + 1.0
@@ -919,7 +919,7 @@
NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE),NGLOB2DMAX_XY,NCHUNKS_VAL)
-
+
! removes own myrank id (+1)
test_flag(:) = test_flag(:) - ( myrank + 1.0)
@@ -929,21 +929,21 @@
! xstore_outer_core,ystore_outer_core,zstore_outer_core, &
! test_flag,filename)
-
- ! determines neighbor rank for shared faces
+
+ ! determines neighbor rank for shared faces
call rmd_get_MPI_interfaces(myrank,NGLOB_OUTER_CORE,NSPEC_OUTER_CORE, &
test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
max_nibool,MAX_NEIGHBOURS, &
ibool_outer_core,&
is_on_a_slice_edge_outer_core, &
- IREGION_OUTER_CORE,.false.)
+ IREGION_OUTER_CORE,.false.)
deallocate(test_flag)
-
+
! stores MPI interfaces informations
allocate(my_neighbours_outer_core(num_interfaces_outer_core), &
- nibool_interfaces_outer_core(num_interfaces_outer_core), &
+ nibool_interfaces_outer_core(num_interfaces_outer_core), &
stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_outer_core etc.')
@@ -958,7 +958,7 @@
! number of global ibool entries on each interface
nibool_interfaces_outer_core(:) = nibool_neighbours(1:num_interfaces_outer_core)
! global iglob point ids on each interface
- ibool_interfaces_outer_core(:,:) = ibool_neighbours(1:max_nibool_interfaces_outer_core,1:num_interfaces_outer_core)
+ ibool_interfaces_outer_core(:,:) = ibool_neighbours(1:max_nibool_interfaces_outer_core,1:num_interfaces_outer_core)
else
! dummy allocation (fortran90 should allow allocate statement with zero array size)
max_nibool_interfaces_outer_core = 0
@@ -980,16 +980,16 @@
allocate(test_flag(NGLOB_INNER_CORE), &
stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag inner core')
-
+
! sets flag to rank id (+1 to avoid problems with zero rank)
test_flag(:) = 0.0
do ispec=1,NSPEC_INNER_CORE
! suppress fictitious elements in central cube
- if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE) cycle
+ if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE) cycle
do k = 1,NGLLZ
do j = 1,NGLLY
do i = 1,NGLLX
- iglob = ibool_inner_core(i,j,k,ispec)
+ iglob = ibool_inner_core(i,j,k,ispec)
test_flag(iglob) = myrank + 1.0
enddo
enddo
@@ -1012,7 +1012,7 @@
NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE),NGLOB2DMAX_XY,NCHUNKS_VAL)
! removes own myrank id (+1)
- test_flag(:) = test_flag(:) - ( myrank + 1.0)
+ test_flag(:) = test_flag(:) - ( myrank + 1.0)
where( test_flag(:) < 0.0 ) test_flag(:) = 0.0
! debug: saves array
@@ -1022,14 +1022,14 @@
test_flag,filename)
! ! gets new interfaces for inner_core without central cube yet
-! ! determines neighbor rank for shared faces
+! ! determines neighbor rank for shared faces
! call rmd_get_MPI_interfaces(myrank,NGLOB_INNER_CORE,NSPEC_INNER_CORE, &
! test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
! num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
! max_nibool,MAX_NEIGHBOURS, &
! ibool_inner_core,&
! is_on_a_slice_edge_inner_core, &
-! IREGION_INNER_CORE,.false.,idoubling_inner_core)
+! IREGION_INNER_CORE,.false.,idoubling_inner_core)
! including central cube
if(INCLUDE_CENTRAL_CUBE) then
@@ -1038,22 +1038,22 @@
allocate(test_flag_cc(NGLOB_INNER_CORE), &
stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag_cc inner core')
-
+
! re-sets flag to rank id (+1 to avoid problems with zero rank)
test_flag_cc(:) = 0.0
do ispec=1,NSPEC_INNER_CORE
! suppress fictitious elements in central cube
- if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE) cycle
+ if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE) cycle
do k = 1,NGLLZ
do j = 1,NGLLY
do i = 1,NGLLX
- iglob = ibool_inner_core(i,j,k,ispec)
+ iglob = ibool_inner_core(i,j,k,ispec)
test_flag_cc(iglob) = myrank + 1.0
enddo
enddo
enddo
- enddo
-
+ enddo
+
! test_flag is a scalar, not a vector
ndim_assemble = 1
! use central cube buffers to assemble the inner core mass matrix with the central cube
@@ -1065,12 +1065,12 @@
ibelm_bottom_inner_core, NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
NGLOB_INNER_CORE, &
test_flag_cc,ndim_assemble)
-
+
! removes own myrank id (+1)
- test_flag_cc(:) = test_flag_cc(:) - ( myrank + 1.0)
+ test_flag_cc(:) = test_flag_cc(:) - ( myrank + 1.0)
where( test_flag_cc(:) < 0.0 ) test_flag_cc(:) = 0.0
-
+
write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_inner_core_B_proc',myrank
call write_VTK_glob_points(NGLOB_INNER_CORE, &
xstore_inner_core,ystore_inner_core,zstore_inner_core, &
@@ -1084,35 +1084,35 @@
! ibool_inner_core,&
! is_on_a_slice_edge_inner_core, &
! IREGION_INNER_CORE,.true.,idoubling_inner_core)
-
- ! adds both together
+
+ ! adds both together
test_flag(:) = test_flag(:) + test_flag_cc(:)
- deallocate(test_flag_cc)
+ deallocate(test_flag_cc)
! debug: saves array
write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_inner_core_C_proc',myrank
call write_VTK_glob_points(NGLOB_INNER_CORE, &
xstore_inner_core,ystore_inner_core,zstore_inner_core, &
test_flag,filename)
-
+
endif
! gets new interfaces for inner_core without central cube yet
- ! determines neighbor rank for shared faces
+ ! determines neighbor rank for shared faces
call rmd_get_MPI_interfaces(myrank,NGLOB_INNER_CORE,NSPEC_INNER_CORE, &
test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
max_nibool,MAX_NEIGHBOURS, &
ibool_inner_core,&
is_on_a_slice_edge_inner_core, &
- IREGION_INNER_CORE,.false.,idoubling_inner_core)
-
+ IREGION_INNER_CORE,.false.,idoubling_inner_core)
+
deallocate(test_flag)
-
+
! stores MPI interfaces informations
allocate(my_neighbours_inner_core(num_interfaces_inner_core), &
- nibool_interfaces_inner_core(num_interfaces_inner_core), &
+ nibool_interfaces_inner_core(num_interfaces_inner_core), &
stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_inner_core etc.')
@@ -1127,7 +1127,7 @@
! number of global ibool entries on each interface
nibool_interfaces_inner_core(:) = nibool_neighbours(1:num_interfaces_inner_core)
! global iglob point ids on each interface
- ibool_interfaces_inner_core(:,:) = ibool_neighbours(1:max_nibool_interfaces_inner_core,1:num_interfaces_inner_core)
+ ibool_interfaces_inner_core(:,:) = ibool_neighbours(1:max_nibool_interfaces_inner_core,1:num_interfaces_inner_core)
else
! dummy allocation (fortran90 should allow allocate statement with zero array size)
max_nibool_interfaces_inner_core = 0
@@ -1136,19 +1136,19 @@
! debug: saves 1. MPI interface
if( myrank == 0 .and. num_interfaces_inner_core >= 1 ) then
- write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_1_points_inner_core_proc',myrank
+ write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_1_points_inner_core_proc',myrank
call write_VTK_data_points(NGLOB_INNER_CORE, &
xstore_inner_core,ystore_inner_core,zstore_inner_core, &
ibool_interfaces_inner_core(1:nibool_interfaces_inner_core(1),1), &
nibool_interfaces_inner_core(1),filename)
!print*,'saved: ',trim(filename)//'.vtk'
- endif
+ endif
! synchronizes MPI processes
call sync_all()
! frees temporary array
- deallocate(ibool_neighbours)
+ deallocate(ibool_neighbours)
! allocates MPI buffers
@@ -1183,7 +1183,7 @@
stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_outer_core etc.')
endif
-
+
! inner core
allocate(buffer_send_vector_inner_core(NDIM,max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
buffer_recv_vector_inner_core(NDIM,max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
@@ -1199,7 +1199,7 @@
stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_inner_core etc.')
endif
-
+
end subroutine read_mesh_databases_MPIinter
@@ -1216,7 +1216,7 @@
IREGION,add_central_cube,idoubling)
use constants
-
+
implicit none
include 'mpif.h'
@@ -1233,14 +1233,14 @@
integer,intent(out) :: num_interfaces,max_nibool_interfaces
integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool
-
+
logical,dimension(NSPEC),intent(inout) :: is_on_a_slice_edge
-
- integer,intent(in) :: IREGION
+
+ integer,intent(in) :: IREGION
logical,intent(in) :: add_central_cube
integer,dimension(NSPEC),optional:: idoubling
-
- ! local parameters
+
+ ! local parameters
integer :: ispec,iglob,j,k
integer :: iface,iedge,icorner
integer :: ii,iinterface,icurrent,rank
@@ -1248,12 +1248,12 @@
logical :: is_done,ispec_is_outer
integer,dimension(NGLOB) :: work_test_flag
logical,dimension(NSPEC) :: work_ispec_is_outer
-
- ! initializes
+
+ ! initializes
if( add_central_cube) then
! adds points to existing inner_core interfaces
iinterface = num_interfaces
- work_ispec_is_outer(:) = is_on_a_slice_edge(:)
+ work_ispec_is_outer(:) = is_on_a_slice_edge(:)
else
! creates new interfaces
iinterface = 0
@@ -1264,7 +1264,7 @@
ibool_neighbours(:,:) = 0
work_ispec_is_outer(:) = .false.
endif
-
+
! makes working copy (converted to nearest integers)
work_test_flag(:) = nint( test_flag(:) )
@@ -1279,44 +1279,44 @@
if( idoubling(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) cycle
endif
- ! sets flag if element has global points shared with other processes
- ispec_is_outer = .false.
+ ! sets flag if element has global points shared with other processes
+ ispec_is_outer = .false.
! 1. finds neighbours which share a whole face with this process
! (faces are shared only with 1 other neighbour process)
-
+
! loops over all faces of element
do iface = 1, 6
-
+
! chooses a point inside face
select case( iface )
case( 1 )
- ! face I == 1
- iglob = ibool(1,2,2,ispec)
+ ! face I == 1
+ iglob = ibool(1,2,2,ispec)
case( 2 )
! face I == NGLLX
- iglob = ibool(NGLLX,2,2,ispec)
+ iglob = ibool(NGLLX,2,2,ispec)
case( 3 )
! face J == 1
- iglob = ibool(2,1,2,ispec)
+ iglob = ibool(2,1,2,ispec)
case( 4 )
! face J == NGLLY
- iglob = ibool(2,NGLLY,2,ispec)
+ iglob = ibool(2,NGLLY,2,ispec)
case( 5 )
! face K == 1
iglob = ibool(2,2,1,ispec)
case( 6 )
! face K == NGLLZ
iglob = ibool(2,2,NGLLZ,ispec)
- end select
+ end select
! checks assembled flag on global point
if( work_test_flag(iglob) > 0 ) then
ispec_is_outer = .true.
-
+
! rank of neighbor process
rank = work_test_flag(iglob) - 1
-
+
! checks ranks range
if( rank < 0 .or. rank >= NPROCTOT_VAL ) then
print*,'error face rank: ',myrank,'ispec=',ispec
@@ -1324,18 +1324,18 @@
print*,' face ',iface
call exit_mpi(myrank,'error face neighbor mpi rank')
endif
-
+
! checks if already stored
icurrent = 0
is_done = .false.
do ii = 1,iinterface
- if( rank == my_neighbours(ii) ) then
+ if( rank == my_neighbours(ii) ) then
icurrent = ii
is_done = .true.
exit
endif
enddo
-
+
! updates interfaces array
if( .not. is_done ) then
iinterface = iinterface + 1
@@ -1347,43 +1347,43 @@
endif
if( icurrent == 0 ) &
call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
-
+
! adds interface points and removes neighbor flag from face
! assumes NGLLX == NGLLY == NGLLZ
do k=1,NGLLX
do j=1,NGLLX
select case( iface )
case( 1 )
- ! face I == 1
- iglob = ibool(1,j,k,ispec)
+ ! face I == 1
+ iglob = ibool(1,j,k,ispec)
case( 2 )
! face I == NGLLX
- iglob = ibool(NGLLX,j,k,ispec)
+ iglob = ibool(NGLLX,j,k,ispec)
case( 3 )
! face J == 1
- iglob = ibool(j,1,k,ispec)
+ iglob = ibool(j,1,k,ispec)
case( 4 )
! face J == NGLLY
- iglob = ibool(j,NGLLY,k,ispec)
+ iglob = ibool(j,NGLLY,k,ispec)
case( 5 )
! face K == 1
iglob = ibool(j,k,1,ispec)
case( 6 )
! face K == NGLLZ
iglob = ibool(j,k,NGLLZ,ispec)
- end select
-
+ end select
+
! checks that we take each global point (on edges and corners) only once
if( work_test_flag(iglob) <= 0 ) cycle ! continues to next point
-
+
! increases number of total points on this interface
nibool_neighbours(icurrent) = nibool_neighbours(icurrent) + 1
if( nibool_neighbours(icurrent) > max_nibool) &
call exit_mpi(myrank,'interface face exceeds max_nibool range')
-
+
! stores interface iglob index
ibool_neighbours( nibool_neighbours(icurrent),icurrent ) = iglob
-
+
! re-sets flag
work_test_flag(iglob) = work_test_flag(iglob) - ( rank + 1 )
! debug
@@ -1394,7 +1394,7 @@
endif
enddo
enddo
- endif
+ endif
enddo ! iface
! 2. finds neighbours which share a single edge with this process
@@ -1402,12 +1402,12 @@
! loops over all edges of element
do iedge = 1, 12
-
+
! chooses a point inside edge but not corner
select case( iedge )
case( 1 )
! face I == 1, J == 1
- iglob = ibool(1,1,2,ispec)
+ iglob = ibool(1,1,2,ispec)
case( 2 )
! face I == 1, J == NGLLY
iglob = ibool(1,NGLLY,2,ispec)
@@ -1446,10 +1446,10 @@
! checks assembled flag on global point
if( work_test_flag(iglob) > 0 ) then
ispec_is_outer = .true.
-
+
! rank of neighbor process
rank = work_test_flag(iglob) - 1
-
+
! checks ranks range
if( rank < 0 .or. rank >= NPROCTOT_VAL ) then
print*,'error egde rank: ',myrank
@@ -1457,18 +1457,18 @@
print*,' edge ',iedge
call exit_mpi(myrank,'error edge neighbor mpi rank')
endif
-
+
! checks if already stored
icurrent = 0
is_done = .false.
do ii = 1,iinterface
- if( rank == my_neighbours(ii) ) then
+ if( rank == my_neighbours(ii) ) then
icurrent = ii
is_done = .true.
exit
endif
enddo
-
+
! updates interfaces array
if( .not. is_done ) then
iinterface = iinterface + 1
@@ -1480,14 +1480,14 @@
endif
if( icurrent == 0 ) &
call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
-
+
! adds interface points and removes neighbor flag from edge
! assumes NGLLX == NGLLY == NGLLZ
do k = 1,NGLLX
select case( iedge )
case( 1 )
! face I == 1, J == 1
- iglob = ibool(1,1,k,ispec)
+ iglob = ibool(1,1,k,ispec)
case( 2 )
! face I == 1, J == NGLLY
iglob = ibool(1,NGLLY,k,ispec)
@@ -1530,18 +1530,18 @@
nibool_neighbours(icurrent) = nibool_neighbours(icurrent) + 1
if( nibool_neighbours(icurrent) > max_nibool) &
call exit_mpi(myrank,'interface edge exceeds max_nibool range')
-
- ! stores interface iglob index
+
+ ! stores interface iglob index
ibool_neighbours( nibool_neighbours(icurrent),icurrent ) = iglob
-
+
! re-sets flag
work_test_flag(iglob) = work_test_flag(iglob) - ( rank + 1 )
! debug
if( work_test_flag(iglob) < 0 ) call exit_mpi(myrank,'error edge flag')
-
- enddo
- endif
+
+ enddo
+ endif
enddo ! iedge
@@ -1550,7 +1550,7 @@
! loops over all corners of element
do icorner = 1, 8
-
+
! chooses a corner point
select case( icorner )
case( 1 )
@@ -1584,7 +1584,7 @@
! note: there can be elements which have an edge or corner shared with
! other mpi partitions, but have the work_test_flag value already set to zero
! since the iglob point was found before.
- ! also, this check here would suffice to determine the outer flag, but we also include the
+ ! also, this check here would suffice to determine the outer flag, but we also include the
! check everywhere we encounter it too
if( test_flag(iglob) > 0.5 ) then
ispec_is_outer = .true.
@@ -1593,10 +1593,10 @@
! checks assembled flag on global point
if( work_test_flag(iglob) > 0 ) then
ispec_is_outer = .true.
-
+
! rank of neighbor process
rank = work_test_flag(iglob) - 1
-
+
! checks ranks range
if( rank < 0 .or. rank >= NPROCTOT_VAL ) then
print*,'error corner: ',myrank
@@ -1604,18 +1604,18 @@
print*,' corner ',icorner
call exit_mpi(myrank,'error corner neighbor mpi rank')
endif
-
+
! checks if already stored
icurrent = 0
is_done = .false.
do ii = 1,iinterface
- if( rank == my_neighbours(ii) ) then
+ if( rank == my_neighbours(ii) ) then
icurrent = ii
is_done = .true.
exit
endif
enddo
-
+
! updates interfaces array
if( .not. is_done ) then
iinterface = iinterface + 1
@@ -1627,41 +1627,41 @@
endif
if( icurrent == 0 ) &
call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
-
+
! adds this corner as interface point and removes neighbor flag from face
! increases number of total points on this interface
nibool_neighbours(icurrent) = nibool_neighbours(icurrent) + 1
if( nibool_neighbours(icurrent) > max_nibool) &
call exit_mpi(myrank,'interface corner exceeds max_nibool range')
-
- ! stores interface iglob index
+
+ ! stores interface iglob index
ibool_neighbours( nibool_neighbours(icurrent),icurrent ) = iglob
-
+
! re-sets flag
work_test_flag(iglob) = work_test_flag(iglob) - ( rank + 1 )
! debug
if( work_test_flag(iglob) < 0 ) call exit_mpi(myrank,'error corner flag')
-
+
endif
-
+
enddo ! icorner
-
- ! stores flags for outer elements when recognized as such
+
+ ! stores flags for outer elements when recognized as such
! (inner/outer elements separated for non-blocking mpi communications)
if( ispec_is_outer ) then
work_ispec_is_outer(ispec) = .true.
endif
-
- enddo
+ enddo
+
! number of outer elements (on MPI interfaces)
npoin = count( work_ispec_is_outer )
-
+
! debug: user output
if( myrank == 0 ) then
write(IMAIN,*) ' interfaces : ',iinterface
- write(IMAIN,*) ' my_neighbours: ',my_neighbours(1:iinterface)
+ write(IMAIN,*) ' my_neighbours: ',my_neighbours(1:iinterface)
write(IMAIN,*) ' nibool_neighbours: ',nibool_neighbours(1:iinterface)
write(IMAIN,*) ' test flag min/max: ',minval(work_test_flag),maxval(work_test_flag)
write(IMAIN,*) ' outer elements: ',npoin
@@ -1688,13 +1688,13 @@
max_nibool_interfaces = maxval( nibool_neighbours(1:num_interfaces) )
! optional: ibool usually is already sorted,
- ! this makes sure ibool_neighbours arrays are still sorted
- ! (iglob indices in increasing order; we will access acceleration fields accel(:,iglob),
+ ! this makes sure ibool_neighbours arrays are still sorted
+ ! (iglob indices in increasing order; we will access acceleration fields accel(:,iglob),
! thus it helps if iglob strides are short and accesses are close-by)
do iinterface = 1,num_interfaces
- npoin = nibool_neighbours(iinterface)
+ npoin = nibool_neighbours(iinterface)
call heap_sort( npoin, ibool_neighbours(1:npoin,iinterface) )
-
+
! debug: checks if unique set of iglob values
do j=1,npoin-1
if( ibool_neighbours(j,iinterface) == ibool_neighbours(j+1,iinterface) ) then
@@ -1722,9 +1722,9 @@
use specfem_par
use specfem_par_crustmantle
use specfem_par_innercore
- use specfem_par_outercore
+ use specfem_par_outercore
implicit none
-
+
! local parameters
real :: percentage_edge
integer :: ier,ispec,iinner,iouter
@@ -1734,16 +1734,16 @@
!
! note: arrays is_on_a_slice_edge_.. have flags set for elements which need to
! communicate with other MPI processes
-
+
! crust_mantle
nspec_outer_crust_mantle = count( is_on_a_slice_edge_crust_mantle )
nspec_inner_crust_mantle = NSPEC_CRUST_MANTLE - nspec_outer_crust_mantle
-
+
num_phase_ispec_crust_mantle = max(nspec_inner_crust_mantle,nspec_outer_crust_mantle)
-
+
allocate(phase_ispec_inner_crust_mantle(num_phase_ispec_crust_mantle,2),stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_crust_mantle')
-
+
phase_ispec_inner_crust_mantle(:,:) = 0
iinner = 0
iouter = 0
@@ -1764,10 +1764,10 @@
nspec_inner_outer_core = NSPEC_OUTER_CORE - nspec_outer_outer_core
num_phase_ispec_outer_core = max(nspec_inner_outer_core,nspec_outer_outer_core)
-
+
allocate(phase_ispec_inner_outer_core(num_phase_ispec_outer_core,2),stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_outer_core')
-
+
phase_ispec_inner_outer_core(:,:) = 0
iinner = 0
iouter = 0
@@ -1788,10 +1788,10 @@
nspec_inner_inner_core = NSPEC_INNER_CORE - nspec_outer_inner_core
num_phase_ispec_inner_core = max(nspec_inner_inner_core,nspec_outer_inner_core)
-
+
allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2),stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_inner_core')
-
+
phase_ispec_inner_inner_core(:,:) = 0
iinner = 0
iouter = 0
@@ -1806,7 +1806,7 @@
phase_ispec_inner_inner_core(iinner,2) = ispec
endif
enddo
-
+
! user output
if(myrank == 0) then
@@ -1832,19 +1832,19 @@
! debug: saves element flags
! crust mantle
- !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_crust_mantle_proc',myrank
+ !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_crust_mantle_proc',myrank
!call write_VTK_data_elem_l(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
! xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
! ibool_crust_mantle, &
- ! is_on_a_slice_edge_crust_mantle,filename)
+ ! is_on_a_slice_edge_crust_mantle,filename)
! outer core
- !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_outer_core_proc',myrank
+ !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_outer_core_proc',myrank
!call write_VTK_data_elem_l(NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
! xstore_outer_core,ystore_outer_core,zstore_outer_core, &
! ibool_outer_core, &
! is_on_a_slice_edge_outer_core,filename)
! inner core
- write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_inner_core_proc',myrank
+ write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_inner_core_proc',myrank
call write_VTK_data_elem_l(NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
xstore_inner_core,ystore_inner_core,zstore_inner_core, &
ibool_inner_core, &
@@ -1967,7 +1967,7 @@
! crust and mantle
-
+
! create name of database
call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
@@ -2093,7 +2093,7 @@
! outer core
-
+
! create name of database
call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
@@ -2261,23 +2261,23 @@
! heap sort algorithm
! sorts integer array (in increasing order, like 1 - 5 - 6 - 9 - 12 - 13 - 14 -...)
-
+
implicit none
integer,intent(in) :: N
integer,dimension(N),intent(inout) :: array
-
+
! local parameters
integer :: tmp
integer :: i
-
+
! checks if anything to do
if( N < 2 ) return
-
+
! builds heap
- do i = N/2, 1, -1
+ do i = N/2, 1, -1
call heap_sort_siftdown(N,array,i,N)
enddo
-
+
! sorts array
do i = N, 2, -1
! swaps last and first entry in this section
@@ -2286,45 +2286,45 @@
array(i) = tmp
call heap_sort_siftdown(N,array,1,i-1)
enddo
-
+
end subroutine heap_sort
!
!----
-!
+!
subroutine heap_sort_siftdown(N,array,start,bottom)
implicit none
-
+
integer,intent(in):: N
integer,dimension(N),intent(inout) :: array
integer :: start,bottom
-
+
! local parameters
integer :: i,j
integer :: tmp
-
+
i = start
- tmp = array(i)
+ tmp = array(i)
j = 2*i
do while( j <= bottom )
! chooses larger value first in this section
if( j < bottom ) then
if( array(j) <= array(j+1) ) j = j + 1
endif
-
+
! checks if section already smaller than inital value
if( array(j) < tmp ) exit
-
+
array(i) = array(j)
i = j
j = 2*i
enddo
-
- array(i) = tmp
+
+ array(i) = tmp
return
-
+
end subroutine heap_sort_siftdown
!
@@ -2381,7 +2381,7 @@
write(IOVTK,'(3e18.6)') xstore_dummy(iglob),ystore_dummy(iglob),zstore_dummy(iglob)
enddo
write(IOVTK,*) ""
-
+
close(IOVTK)
end subroutine write_VTK_data_points
@@ -2438,7 +2438,7 @@
write(IOVTK,*) glob_values(iglob)
enddo
write(IOVTK,*) ""
-
+
close(IOVTK)
end subroutine write_VTK_glob_points
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -37,7 +37,7 @@
! local parameters
integer :: ier
-
+
! get MPI starting time
time_start = MPI_WTIME()
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_forward_arrays.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_forward_arrays.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_forward_arrays.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -41,7 +41,7 @@
if(NUMBER_OF_RUNS > 1 .and. NUMBER_OF_THIS_RUN < NUMBER_OF_RUNS) then
write(outputname,"('dump_all_arrays',i6.6)") myrank
open(unit=55,file=trim(LOCAL_PATH)//'/'//outputname,status='unknown',form='unformatted',action='write')
-
+
write(55) displ_crust_mantle
write(55) veloc_crust_mantle
write(55) accel_crust_mantle
@@ -86,7 +86,7 @@
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
write(outputname,'(a,i6.6,a)') 'proc',myrank,'_save_forward_arrays.bin'
open(unit=55,file=trim(LOCAL_PATH)//'/'//outputname,status='unknown',form='unformatted',action='write')
-
+
write(55) displ_crust_mantle
write(55) veloc_crust_mantle
write(55) accel_crust_mantle
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -35,10 +35,10 @@
! reads in stations file and locates receivers
call setup_receivers()
-
+
! write source and receiver VTK files for Paraview
call setup_sources_receivers_VTKfile()
-
+
! pre-compute source arrays
call setup_sources_precompute_arrays()
@@ -46,7 +46,7 @@
call setup_receivers_precompute_intp()
! user output
- if(myrank == 0) then
+ if(myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) 'Total number of samples for seismograms = ',NSTEP
write(IMAIN,*)
@@ -62,7 +62,7 @@
subroutine setup_sources()
use specfem_par
- use specfem_par_crustmantle
+ use specfem_par_crustmantle
use specfem_par_movie
implicit none
@@ -70,7 +70,7 @@
double precision :: min_tshift_cmt_original
double precision :: sec
integer :: yr,jda,ho,mi
- integer :: isource
+ integer :: isource
character(len=256) :: filename
integer :: ier
@@ -249,7 +249,7 @@
integer :: irec,isource,nrec_tot_found
integer :: icomp,itime,nadj_files_found,nadj_files_found_tot
character(len=3),dimension(NDIM) :: comp
- character(len=256) :: filename,adj_source_file
+ character(len=256) :: filename,adj_source_file
character(len=2) :: bic
integer :: ier
@@ -307,7 +307,7 @@
! counter for adjoint receiver stations in local slice, used to allocate adjoint source arrays
nadj_rec_local = 0
-
+
! counts receivers for adjoint simulations
if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
! by Ebru
@@ -380,7 +380,13 @@
write(IMAIN,*) 'this total is okay'
endif
endif
-
+
+ ! check that the sum of the number of receivers in each slice is nrec
+ if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3 ) then
+ if(myrank == 0 .and. nrec_tot_found /= nrec) &
+ call exit_MPI(myrank,'total number of receivers is incorrect')
+ endif
+
end subroutine setup_receivers
@@ -442,7 +448,7 @@
! local parameters
integer :: ier
-
+
! allocates source arrays
if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
! source interpolated on all GLL points in source element
@@ -489,7 +495,7 @@
endif
endif
- end subroutine setup_sources_precompute_arrays
+ end subroutine setup_sources_precompute_arrays
!
!-------------------------------------------------------------------------------------------------
@@ -507,7 +513,7 @@
use specfem_par
use specfem_par_crustmantle
-
+
implicit none
! include "constants.h"
@@ -669,7 +675,7 @@
! local parameters
integer :: ier
-
+
! define local to global receiver numbering mapping
! needs to be allocate for subroutine calls (even if nrec_local == 0)
allocate(number_receiver_global(nrec_local),stat=ier)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -360,11 +360,11 @@
integer :: num_interfaces_outer_core
integer :: max_nibool_interfaces_outer_core
integer, dimension(:), allocatable :: my_neighbours_outer_core,nibool_interfaces_outer_core
- integer, dimension(:,:), allocatable :: ibool_interfaces_outer_core
+ integer, dimension(:,:), allocatable :: ibool_interfaces_outer_core
real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core
real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core
-
+
integer, dimension(:), allocatable :: request_send_scalar_outer_core,request_recv_scalar_outer_core
integer, dimension(:), allocatable :: b_request_send_scalar_outer_core,b_request_recv_scalar_outer_core
@@ -548,7 +548,7 @@
real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: noise_surface_movie
integer :: irec_master_noise
integer :: nspec_top
-
+
! inner / outer elements crust/mantle region
integer :: num_phase_ispec_crust_mantle
integer :: nspec_inner_crust_mantle,nspec_outer_crust_mantle
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -42,6 +42,14 @@
! save movie on surface
if( MOVIE_SURFACE ) then
if( mod(it,NTSTEP_BETWEEN_FRAMES) == 0) then
+
+ ! gets resulting array values onto CPU
+ if( GPU_MODE ) then
+ ! transfers whole fields
+ call transfer_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,Mesh_pointer)
+ call transfer_veloc_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,veloc_crust_mantle,Mesh_pointer)
+ endif
+
! save velocity here to avoid static offset on displacement for movies
call write_movie_surface(myrank,nmovie_points,scale_veloc,veloc_crust_mantle, &
scale_displ,displ_crust_mantle, &
@@ -62,8 +70,19 @@
if( mod(it-MOVIE_START,NTSTEP_BETWEEN_FRAMES) == 0 &
.and. it >= MOVIE_START .and. it <= MOVIE_STOP) then
- if (MOVIE_VOLUME_TYPE == 1) then ! output strains
+ select case( MOVIE_VOLUME_TYPE )
+ case( 1 )
+ ! output strains
+ ! gets resulting array values onto CPU
+ if( GPU_MODE ) then
+ call transfer_strain_cm_from_device(Mesh_pointer, &
+ eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle, &
+ epsilondev_xy_crust_mantle,epsilondev_xz_crust_mantle, &
+ epsilondev_yz_crust_mantle)
+ endif
+
call write_movie_volume_strains(myrank,npoints_3dmovie, &
LOCAL_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE, &
it,eps_trace_over_3_crust_mantle, &
@@ -72,7 +91,7 @@
muvstore_crust_mantle_3dmovie, &
mask_3dmovie,nu_3dmovie)
- else if (MOVIE_VOLUME_TYPE == 2 .or. MOVIE_VOLUME_TYPE == 3) then
+ case( 2, 3 )
! output the Time Integral of Strain, or \mu*TIS
call write_movie_volume_strains(myrank,npoints_3dmovie, &
LOCAL_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE, &
@@ -80,8 +99,31 @@
muvstore_crust_mantle_3dmovie, &
mask_3dmovie,nu_3dmovie)
- else if (MOVIE_VOLUME_TYPE == 4) then ! output divergence and curl in whole volume
+ case( 4 )
+ ! output divergence and curl in whole volume
+ ! gets resulting array values onto CPU
+ if( GPU_MODE ) then
+ ! strains
+ call transfer_strain_cm_from_device(Mesh_pointer, &
+ eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle, &
+ epsilondev_xy_crust_mantle,epsilondev_xz_crust_mantle, &
+ epsilondev_yz_crust_mantle)
+ call transfer_strain_ic_from_device(Mesh_pointer, &
+ eps_trace_over_3_inner_core, &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core, &
+ epsilondev_xy_inner_core,epsilondev_xz_inner_core, &
+ epsilondev_yz_inner_core)
+ ! wavefields
+ call transfer_fields_cm_from_device(NDIM*NGLOB_CRUST_MANTLE, &
+ displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle,Mesh_pointer)
+ call transfer_fields_ic_from_device(NDIM*NGLOB_INNER_CORE, &
+ displ_inner_core,veloc_inner_core,accel_inner_core,Mesh_pointer)
+ call transfer_fields_oc_from_device(NGLOB_OUTER_CORE, &
+ displ_outer_core,veloc_outer_core,accel_outer_core,Mesh_pointer)
+ endif
+
call write_movie_volume_divcurl(myrank,it,eps_trace_over_3_crust_mantle,&
div_displ_outer_core, &
accel_outer_core,kappavstore_outer_core,rhostore_outer_core,ibool_outer_core, &
@@ -96,25 +138,34 @@
accel_crust_mantle,accel_inner_core, &
ibool_crust_mantle,ibool_inner_core)
- else if (MOVIE_VOLUME_TYPE == 5) then !output displacement
+ case( 5 )
+ !output displacement
+ if( GPU_MODE ) then
+ call transfer_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,Mesh_pointer)
+ endif
+
scalingval = scale_displ
call write_movie_volume_vector(myrank,it,npoints_3dmovie, &
LOCAL_PATH,MOVIE_VOLUME_TYPE, &
MOVIE_COARSE,ibool_crust_mantle,displ_crust_mantle, &
scalingval,mask_3dmovie,nu_3dmovie)
- else if (MOVIE_VOLUME_TYPE == 6) then !output velocity
+ case( 6 )
+ !output velocity
+ if( GPU_MODE ) then
+ call transfer_veloc_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,veloc_crust_mantle,Mesh_pointer)
+ endif
+
scalingval = scale_veloc
call write_movie_volume_vector(myrank,it,npoints_3dmovie, &
LOCAL_PATH,MOVIE_VOLUME_TYPE, &
MOVIE_COARSE,ibool_crust_mantle,veloc_crust_mantle, &
scalingval,mask_3dmovie,nu_3dmovie)
- else
-
+ case default
call exit_MPI(myrank, 'MOVIE_VOLUME_TYPE has to be 1,2,3,4,5 or 6')
+ end select ! MOVIE_VOLUME_TYPE
- endif ! MOVIE_VOLUME_TYPE
endif
endif ! MOVIE_VOLUME
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90 2012-02-21 07:23:26 UTC (rev 19658)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90 2012-02-22 04:38:32 UTC (rev 19659)
@@ -40,7 +40,22 @@
! compute & store the seismograms only if there is at least one receiver located in this slice
if (nrec_local > 0) then
- if (SIMULATION_TYPE == 1) then
+
+ ! gets resulting array values onto CPU
+ if( GPU_MODE ) then
+ ! this transfers fields only in elements with stations for efficiency
+ call write_seismograms_transfer_cuda(displ_crust_mantle,b_displ_crust_mantle, &
+ eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ Mesh_pointer,number_receiver_global, &
+ ispec_selected_rec,ispec_selected_source, &
+ ibool_crust_mantle)
+ endif
+
+ ! computes traces at interpolated receiver locations
+ select case( SIMULATION_TYPE )
+ case( 1 )
call compute_seismograms(nrec_local,nrec,displ_crust_mantle, &
nu,hxir_store,hetar_store,hgammar_store, &
scale_displ,ibool_crust_mantle, &
@@ -48,7 +63,7 @@
seismo_current,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
seismograms)
- else if (SIMULATION_TYPE == 2) then
+ case( 2 )
call compute_seismograms_adjoint(NSOURCES,nrec_local,displ_crust_mantle, &
eps_trace_over_3_crust_mantle, &
epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
@@ -66,7 +81,7 @@
ibool_crust_mantle,ispec_selected_source,number_receiver_global, &
NSTEP,it,nit_written)
- else if (SIMULATION_TYPE == 3) then
+ case( 3 )
call compute_seismograms_backward(nrec_local,nrec,b_displ_crust_mantle, &
nu,hxir_store,hetar_store,hgammar_store, &
scale_displ,ibool_crust_mantle, &
@@ -74,22 +89,15 @@
seismo_current,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
seismograms)
- endif
+ end select
+
endif ! nrec_local
! write the current or final seismograms
if(seismo_current == NTSTEP_BETWEEN_OUTPUT_SEISMOS .or. it == it_end) then
if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
- call write_seismograms_to_file(myrank,seismograms,number_receiver_global,station_name, &
- network_name,stlat,stlon,stele,stbur, &
- nrec,nrec_local,ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,DT,t0,it_end, &
- yr_SAC,jda_SAC,ho_SAC,mi_SAC,sec_SAC,t_cmt_SAC,t_shift_SAC, &
- elat_SAC,elon_SAC,depth_SAC,event_name_SAC,cmt_lat_SAC,cmt_lon_SAC,&
- cmt_depth_SAC,cmt_hdur_SAC,NPROCTOT_VAL, &
- OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM, &
- OUTPUT_SEISMOS_SAC_BINARY,ROTATE_SEISMOGRAMS_RT,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
- seismo_offset,seismo_current,WRITE_SEISMOGRAMS_BY_MASTER, &
- SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE,MODEL)
+ call write_seismograms_to_file()
+
if(myrank==0) then
write(IMAIN,*)
write(IMAIN,*) ' Total number of time steps written: ', it-it_begin+1
@@ -108,84 +116,46 @@
end subroutine write_seismograms
-!=====================================================================
+!
+!-------------------------------------------------------------------------------------------------
+!
! write seismograms to files
- subroutine write_seismograms_to_file(myrank,seismograms,number_receiver_global,station_name, &
- network_name,stlat,stlon,stele,stbur, &
- nrec,nrec_local,ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,DT,hdur,it_end, &
- yr,jda,ho,mi,sec,tshift_cmt,t_shift, &
- elat,elon,depth,event_name,cmt_lat,cmt_lon, &
- cmt_depth,cmt_hdur,NPROCTOT, &
- OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM, &
- OUTPUT_SEISMOS_SAC_BINARY,ROTATE_SEISMOGRAMS_RT,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
- seismo_offset,seismo_current,WRITE_SEISMOGRAMS_BY_MASTER,&
- SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE,MODEL)
+ subroutine write_seismograms_to_file()
- implicit none
+ use constants
+ use specfem_par,only: &
+ NPROCTOT_VAL,myrank,nrec,nrec_local, &
+ number_receiver_global,seismograms, &
+ islice_selected_rec, &
+ seismo_offset,seismo_current, &
+ OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM, &
+ OUTPUT_SEISMOS_SAC_BINARY,ROTATE_SEISMOGRAMS_RT,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
+ SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE, &
+ MODEL,OUTPUT_FILES, &
+ WRITE_SEISMOGRAMS_BY_MASTER
-! standard include of the MPI library
- include 'mpif.h'
+ implicit none
- include "constants.h"
- include "precision.h"
+ ! standard include of the MPI library
+ include 'mpif.h'
+ include "precision.h"
-! parameters
- integer nrec,nrec_local,myrank,it_end,NPROCTOT,NEX_XI !,NSOURCES
- character(len=256) sisname
+ ! local parameters
+ double precision :: write_time_begin,write_time
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: one_seismogram
- integer :: seismo_offset, seismo_current, NTSTEP_BETWEEN_OUTPUT_SEISMOS
- integer, dimension(nrec_local) :: number_receiver_global
+ integer :: iproc,sender,irec_local,irec,ier,receiver
+ integer :: nrec_local_received
+ integer :: total_seismos,total_seismos_local
+ integer,dimension(:),allocatable:: islice_num_rec_local
+ integer :: msg_status(MPI_STATUS_SIZE)
+ character(len=256) :: sisname
- real(kind=CUSTOM_REAL), dimension(NDIM,nrec_local,NTSTEP_BETWEEN_OUTPUT_SEISMOS) :: seismograms
- double precision hdur,DT,ANGULAR_WIDTH_XI_IN_DEGREES
-
- character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
- character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
- double precision tshift_cmt,t_shift,elat,elon,depth
- double precision cmt_lat,cmt_lon,cmt_depth,cmt_hdur
- double precision, dimension(nrec) :: stlat,stlon,stele,stbur
- integer yr,jda,ho,mi
- double precision sec
- !real mb
-! character(len=12) ename
- character(len=20) event_name
-
-! variables
- integer :: iproc,sender,irec_local,irec,ier,receiver,nrec_local_received,nrec_tot_found
- integer :: total_seismos,total_seismos_local
- double precision :: write_time_begin,write_time
-
- real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: one_seismogram
-
- integer msg_status(MPI_STATUS_SIZE)
-
- character(len=150) OUTPUT_FILES, MODEL
-
-! new flags to decide on seismogram type BS BS 06/2007
- logical OUTPUT_SEISMOS_ASCII_TEXT, OUTPUT_SEISMOS_SAC_ALPHANUM, &
- OUTPUT_SEISMOS_SAC_BINARY
-! flag whether seismograms are ouput for North-East-Z component or Radial-Transverse-Z
- logical ROTATE_SEISMOGRAMS_RT
-
-! flag to decide if seismograms are written by master proc only or
-! by all processes in parallel (doing the later may create problems on some
-! file systems)
- logical WRITE_SEISMOGRAMS_BY_MASTER
-
-! save all seismograms in one large combined file instead of one file per seismogram
-! to avoid overloading shared non-local file systems such as GPFS for instance
- logical SAVE_ALL_SEISMOS_IN_ONE_FILE
- logical USE_BINARY_FOR_LARGE_FILE
-
+ ! allocates single station seismogram
allocate(one_seismogram(NDIM,NTSTEP_BETWEEN_OUTPUT_SEISMOS),stat=ier)
- if(ier /= 0) stop 'error while allocating one temporary seismogram'
+ if(ier /= 0) call exit_mpi(myrank,'error while allocating one temporary seismogram')
- ! check that the sum of the number of receivers in each slice is nrec
- call MPI_REDUCE(nrec_local,nrec_tot_found,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
- if(myrank == 0 .and. nrec_tot_found /= nrec) &
- call exit_MPI(myrank,'total number of receivers is incorrect')
-
! get the base pathname for output files
call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', 'OUTPUT_FILES')
@@ -227,16 +197,7 @@
one_seismogram = seismograms(:,irec_local,:)
! write this seismogram
- call write_one_seismogram(one_seismogram,irec, &
- station_name,network_name,stlat,stlon,stele,stbur,nrec, &
- ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,DT,hdur,it_end, &
- yr,jda,ho,mi,sec,tshift_cmt,t_shift, &
- elat,elon,depth,event_name,cmt_lat, &
- cmt_lon,cmt_depth,cmt_hdur,OUTPUT_FILES, &
- OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM, &
- OUTPUT_SEISMOS_SAC_BINARY,ROTATE_SEISMOGRAMS_RT, &
- NTSTEP_BETWEEN_OUTPUT_SEISMOS,seismo_offset,seismo_current, &
- SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE,MODEL,myrank)
+ call write_one_seismogram(one_seismogram,irec)
enddo
@@ -245,12 +206,12 @@
if(total_seismos_local/= nrec_local) call exit_MPI(myrank,'incorrect total number of receivers saved')
- write_time = MPI_WTIME() - write_time_begin
if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) 'Writing the seismograms in parallel took ',write_time,' seconds'
- write(IMAIN,*)
+ write_time = MPI_WTIME() - write_time_begin
+ write(IMAIN,*)
+ write(IMAIN,*) 'Writing the seismograms in parallel took ',write_time,' seconds'
+ write(IMAIN,*)
endif
! now only the master process does the writing of seismograms and
@@ -261,96 +222,102 @@
if(myrank == 0) then ! on the master, gather all the seismograms
- ! create one large file instead of one small file per station to avoid file system overload
- if(OUTPUT_SEISMOS_ASCII_TEXT .and. SAVE_ALL_SEISMOS_IN_ONE_FILE) then
- write(sisname,'(A)') '/all_seismograms'
+ ! create one large file instead of one small file per station to avoid file system overload
+ if(OUTPUT_SEISMOS_ASCII_TEXT .and. SAVE_ALL_SEISMOS_IN_ONE_FILE) then
+ write(sisname,'(A)') '/all_seismograms'
- if(USE_BINARY_FOR_LARGE_FILE) then
- if (seismo_offset==0) then
- open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(sisname)//'.bin',status='unknown',form='unformatted',action='write')
- else
- open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(sisname)//'.bin',status='old',&
- form='unformatted',position='append',action='write')
- endif
+ if(USE_BINARY_FOR_LARGE_FILE) then
+ if (seismo_offset==0) then
+ open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(sisname)//'.bin',status='unknown',form='unformatted',action='write')
else
- if (seismo_offset==0) then
- open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(sisname)//'.ascii',status='unknown',form='formatted',action='write')
- else
- open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(sisname)//'.ascii',status='old',&
- form='formatted',position='append',action='write')
- endif
+ open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(sisname)//'.bin',status='old',&
+ form='unformatted',position='append',action='write')
endif
-
+ else
+ if (seismo_offset==0) then
+ open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(sisname)//'.ascii',status='unknown',form='formatted',action='write')
+ else
+ open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(sisname)//'.ascii',status='old',&
+ form='formatted',position='append',action='write')
+ endif
endif
- total_seismos = 0
+ endif
- ! loop on all the slices
- do iproc = 0,NPROCTOT-1
+ ! counts number of local receivers for each slice
+ allocate(islice_num_rec_local(0:NPROCTOT_VAL-1),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating islice_num_rec_local')
- ! receive except from proc 0, which is me and therefore I already have this value
- sender = iproc
- if(iproc /= 0) then
- call MPI_RECV(nrec_local_received,1,MPI_INTEGER,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- if(nrec_local_received < 0) call exit_MPI(myrank,'error while receiving local number of receivers')
- else
- nrec_local_received = nrec_local
- endif
- if (nrec_local_received > 0) then
- do irec_local = 1,nrec_local_received
- ! receive except from proc 0, which is myself and therefore I already have these values
- if(iproc == 0) then
- ! get global number of that receiver
- irec = number_receiver_global(irec_local)
- one_seismogram(:,:) = seismograms(:,irec_local,:)
- else
- call MPI_RECV(irec,1,MPI_INTEGER,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- if(irec < 1 .or. irec > nrec) call exit_MPI(myrank,'error while receiving global receiver number')
- call MPI_RECV(one_seismogram,NDIM*seismo_current,CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- endif
+ islice_num_rec_local(:) = 0
+ do irec = 1,nrec
+ iproc = islice_selected_rec(irec)
+ islice_num_rec_local(iproc) = islice_num_rec_local(iproc) + 1
+ enddo
- total_seismos = total_seismos + 1
- ! write this seismogram
- call write_one_seismogram(one_seismogram,irec, &
- station_name,network_name,stlat,stlon,stele,stbur,nrec, &
- ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,DT,hdur,it_end, &
- yr,jda,ho,mi,sec,tshift_cmt,t_shift, &
- elat,elon,depth,event_name,cmt_lat, &
- cmt_lon,cmt_depth,cmt_hdur,OUTPUT_FILES, &
- OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM, &
- OUTPUT_SEISMOS_SAC_BINARY,ROTATE_SEISMOGRAMS_RT, &
- NTSTEP_BETWEEN_OUTPUT_SEISMOS,seismo_offset,seismo_current,&
- SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE,MODEL,myrank)
- enddo
- endif
- enddo
+ total_seismos = 0
- write(IMAIN,*)
- write(IMAIN,*) 'Total number of receivers saved is ',total_seismos,' out of ',nrec
- write(IMAIN,*)
+ ! loop on all the slices
+ do iproc = 0,NPROCTOT_VAL-1
- if(total_seismos /= nrec) call exit_MPI(myrank,'incorrect total number of receivers saved')
+ ! communicates only with processes which contain local receivers
+ if( islice_num_rec_local(iproc) == 0 ) cycle
- ! create one large file instead of one small file per station to avoid file system overload
- if(SAVE_ALL_SEISMOS_IN_ONE_FILE) close(IOUT)
+ ! receive except from proc 0, which is me and therefore I already have this value
+ sender = iproc
+ if(iproc /= 0) then
+ call MPI_RECV(nrec_local_received,1,MPI_INTEGER,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ if(nrec_local_received < 0) call exit_MPI(myrank,'error while receiving local number of receivers')
+ else
+ nrec_local_received = nrec_local
+ endif
+ if (nrec_local_received > 0) then
+ do irec_local = 1,nrec_local_received
+ ! receive except from proc 0, which is myself and therefore I already have these values
+ if(iproc == 0) then
+ ! get global number of that receiver
+ irec = number_receiver_global(irec_local)
+ one_seismogram(:,:) = seismograms(:,irec_local,:)
+ else
+ call MPI_RECV(irec,1,MPI_INTEGER,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ if(irec < 1 .or. irec > nrec) call exit_MPI(myrank,'error while receiving global receiver number')
+ call MPI_RECV(one_seismogram,NDIM*seismo_current,CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ endif
- else ! on the nodes, send the seismograms to the master
- receiver = 0
- call MPI_SEND(nrec_local,1,MPI_INTEGER,receiver,itag,MPI_COMM_WORLD,ier)
- if (nrec_local > 0) then
- do irec_local = 1,nrec_local
- ! get global number of that receiver
- irec = number_receiver_global(irec_local)
- call MPI_SEND(irec,1,MPI_INTEGER,receiver,itag,MPI_COMM_WORLD,ier)
- one_seismogram(:,:) = seismograms(:,irec_local,:)
- call MPI_SEND(one_seismogram,NDIM*seismo_current,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+ total_seismos = total_seismos + 1
+ ! write this seismogram
+ call write_one_seismogram(one_seismogram,irec)
+
enddo
endif
+ enddo
+ deallocate(islice_num_rec_local)
+
+ write(IMAIN,*)
+ write(IMAIN,*) 'Total number of receivers saved is ',total_seismos,' out of ',nrec
+ write(IMAIN,*)
+
+ if(total_seismos /= nrec) call exit_MPI(myrank,'incorrect total number of receivers saved')
+
+ ! create one large file instead of one small file per station to avoid file system overload
+ if(SAVE_ALL_SEISMOS_IN_ONE_FILE) close(IOUT)
+
+ else ! on the nodes, send the seismograms to the master
+ receiver = 0
+ call MPI_SEND(nrec_local,1,MPI_INTEGER,receiver,itag,MPI_COMM_WORLD,ier)
+ if (nrec_local > 0) then
+ do irec_local = 1,nrec_local
+ ! get global number of that receiver
+ irec = number_receiver_global(irec_local)
+ call MPI_SEND(irec,1,MPI_INTEGER,receiver,itag,MPI_COMM_WORLD,ier)
+ one_seismogram(:,:) = seismograms(:,irec_local,:)
+ call MPI_SEND(one_seismogram,NDIM*seismo_current,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+ enddo
+ endif
endif
- write_time = MPI_WTIME() - write_time_begin
if(myrank == 0) then
+ write_time = MPI_WTIME() - write_time_begin
write(IMAIN,*)
write(IMAIN,*) 'Writing the seismograms by master proc alone took ',write_time,' seconds'
write(IMAIN,*)
@@ -362,79 +329,50 @@
end subroutine write_seismograms_to_file
-!=====================================================================
+!
+!-------------------------------------------------------------------------------------------------
+!
- subroutine write_one_seismogram(one_seismogram,irec, &
- station_name,network_name,stlat,stlon,stele,stbur,nrec, &
- ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,DT,hdur,it_end, &
- yr,jda,ho,mi,sec,tshift_cmt,t_shift,&
- elat,elon,depth,event_name,cmt_lat,cmt_lon,cmt_depth,cmt_hdur, &
- OUTPUT_FILES, &
- OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM, &
- OUTPUT_SEISMOS_SAC_BINARY,ROTATE_SEISMOGRAMS_RT, &
- NTSTEP_BETWEEN_OUTPUT_SEISMOS,seismo_offset,seismo_current, &
- SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE,MODEL,myrank)
+ subroutine write_one_seismogram(one_seismogram,irec)
- implicit none
+ use constants
+ use specfem_par,only: &
+ ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI, &
+ myrank,nrec, &
+ number_receiver_global, &
+ station_name,network_name,stlat,stlon,stele,stbur, &
+ DT,seismo_offset,seismo_current,it_end, &
+ OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM, &
+ OUTPUT_SEISMOS_SAC_BINARY,ROTATE_SEISMOGRAMS_RT,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
+ SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE, &
+ MODEL,OUTPUT_FILES
- include "constants.h"
+ use specfem_par,only: &
+ hdur=>t0,yr=>yr_SAC,jda=>jda_SAC,ho=>ho_SAC,mi=>mi_SAC,sec=>sec_SAC, &
+ tshift_cmt=>t_cmt_SAC,t_shift=>t_shift_SAC, &
+ elat=>elat_SAC,elon=>elon_SAC,depth=>depth_SAC, &
+ event_name=>event_name_SAC,cmt_lat=>cmt_lat_SAC,cmt_lon=>cmt_lon_SAC,&
+ cmt_depth=>cmt_depth_SAC,cmt_hdur=>cmt_hdur_SAC
- integer nrec,it_end,NEX_XI
+ implicit none
- integer :: seismo_offset, seismo_current, NTSTEP_BETWEEN_OUTPUT_SEISMOS
-
+ integer :: irec
real(kind=CUSTOM_REAL), dimension(NDIM,NTSTEP_BETWEEN_OUTPUT_SEISMOS) :: one_seismogram
+ ! local parameters
real(kind=CUSTOM_REAL), dimension(5,NTSTEP_BETWEEN_OUTPUT_SEISMOS) :: seismogram_tmp
+ integer :: iorientation,length_station_name,length_network_name
+ character(len=4) :: chn
+ character(len=256) :: sisname,sisname_big_file
+ character(len=2) :: bic
+ integer :: ior_start,ior_end
+ double precision :: backaz
+ real(kind=CUSTOM_REAL) :: phi,cphi,sphi
+ integer :: isample
- integer myrank
- double precision hdur,DT,ANGULAR_WIDTH_XI_IN_DEGREES
-
- character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
- character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
-
- integer irec,length_station_name,length_network_name
- integer iorientation
-
- character(len=4) chn
- character(len=256) sisname,sisname_big_file
- character(len=150) OUTPUT_FILES
-
- ! section added for SAC
- double precision tshift_cmt,t_shift,elat,elon,depth
- double precision cmt_lat,cmt_lon,cmt_depth,cmt_hdur
-
- double precision, dimension(nrec) :: stlat,stlon,stele,stbur
-
- ! variables for SAC header fields
- integer yr,jda,ho,mi
- double precision sec
- character(len=20) event_name
- character(len=150) MODEL
-
- ! flags to determine seismogram type
- logical OUTPUT_SEISMOS_ASCII_TEXT, OUTPUT_SEISMOS_SAC_ALPHANUM, &
- OUTPUT_SEISMOS_SAC_BINARY
- ! flag whether seismograms are ouput for North-East-Z component or Radial-Transverse-Z
- logical ROTATE_SEISMOGRAMS_RT
-
- ! save all seismograms in one large combined file instead of one file per seismogram
- ! to avoid overloading shared non-local file systems such as GPFS for instance
- logical SAVE_ALL_SEISMOS_IN_ONE_FILE
- logical USE_BINARY_FOR_LARGE_FILE
-
-! local parameters
- character(len=2) bic
- ! variables used for calculation of backazimuth and
- ! rotation of components if ROTATE_SEISMOGRAMS=.true.
- integer ior_start,ior_end
- double precision backaz
- real(kind=CUSTOM_REAL) phi,cphi,sphi
- integer isample
-
- !----------------------------------------------------------------
-
+ ! gets band code
call band_instrument_code(DT,bic)
+
if (ROTATE_SEISMOGRAMS_RT) then ! iorientation 1=N,2=E,3=Z,4=R,5=T
ior_start=3 ! starting from Z
ior_end =5 ! ending with T => ZRT
@@ -443,8 +381,6 @@
ior_end =3 ! ending with Z => NEZ
endif
- !do iorientation = 1,NDIM
- !do iorientation = 1,5 ! BS BS changed from 3 (NEZ) to 5 (NEZRT) components
do iorientation = ior_start,ior_end ! BS BS changed according to ROTATE_SEISMOGRAMS_RT
if(iorientation == 1) then
@@ -528,7 +464,6 @@
! SAC output format
if (OUTPUT_SEISMOS_SAC_ALPHANUM .or. OUTPUT_SEISMOS_SAC_BINARY) then
-
call write_output_SAC(seismogram_tmp,irec, &
station_name,network_name,stlat,stlon,stele,stbur,nrec, &
ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,DT,hdur,it_end, &
@@ -543,7 +478,6 @@
! ASCII output format
if(OUTPUT_SEISMOS_ASCII_TEXT) then
-
call write_output_ASCII(seismogram_tmp, &
DT,hdur,OUTPUT_FILES, &
NTSTEP_BETWEEN_OUTPUT_SEISMOS,seismo_offset,seismo_current, &
@@ -556,108 +490,109 @@
end subroutine write_one_seismogram
-!=====================================================================
+!
+!-------------------------------------------------------------------------------------------------
+!
! write adjoint seismograms to text files
- subroutine write_adj_seismograms(seismograms,number_receiver_global, &
+ subroutine write_adj_seismograms(seismograms,number_receiver_global, &
nrec_local,it,nit_written,DT,NSTEP, &
NTSTEP_BETWEEN_OUTPUT_SEISMOS,hdur,LOCAL_PATH)
- implicit none
+ implicit none
- include "constants.h"
+ include "constants.h"
- integer nrec_local,NSTEP,NTSTEP_BETWEEN_OUTPUT_SEISMOS,it,nit_written
- integer, dimension(nrec_local) :: number_receiver_global
- real(kind=CUSTOM_REAL), dimension(9,nrec_local,NSTEP) :: seismograms
- double precision hdur,DT
- character(len=150) LOCAL_PATH
+ integer :: nrec_local,NSTEP,NTSTEP_BETWEEN_OUTPUT_SEISMOS,it,nit_written
+ integer, dimension(nrec_local) :: number_receiver_global
+ real(kind=CUSTOM_REAL), dimension(9,nrec_local,NSTEP) :: seismograms
+ double precision :: hdur,DT
+ character(len=150) :: LOCAL_PATH
- integer irec,irec_local
- integer iorientation,isample
+ integer :: irec,irec_local
+ integer :: iorientation,isample
- character(len=4) chn
- character(len=150) clean_LOCAL_PATH,final_LOCAL_PATH
- character(len=256) sisname
- character(len=2) bic
+ character(len=4) :: chn
+ character(len=150) :: clean_LOCAL_PATH,final_LOCAL_PATH
+ character(len=256) :: sisname
+ character(len=2) :: bic
- call band_instrument_code(DT,bic)
+ call band_instrument_code(DT,bic)
- do irec_local = 1,nrec_local
+ do irec_local = 1,nrec_local
-! get global number of that receiver
- irec = number_receiver_global(irec_local)
+ ! get global number of that receiver
+ irec = number_receiver_global(irec_local)
- do iorientation = 1,9
-
- if(iorientation == 1) then
+ do iorientation = 1,9
+ if(iorientation == 1) then
chn = 'SNN'
- else if(iorientation == 2) then
+ else if(iorientation == 2) then
chn = 'SEE'
- else if(iorientation == 3) then
+ else if(iorientation == 3) then
chn = 'SZZ'
- else if(iorientation == 4) then
+ else if(iorientation == 4) then
chn = 'SNE'
- else if(iorientation == 5) then
+ else if(iorientation == 5) then
chn = 'SNZ'
- else if(iorientation == 6) then
+ else if(iorientation == 6) then
chn = 'SEZ'
- else if(iorientation == 7) then
+ else if(iorientation == 7) then
!chn = 'LHN'
chn = bic(1:2)//'N'
- else if(iorientation == 8) then
+ else if(iorientation == 8) then
chn = bic(1:2)//'E'
- else if(iorientation == 9) then
+ else if(iorientation == 9) then
chn = bic(1:2)//'Z'
- endif
+ endif
-! create the name of the seismogram file for each slice
-! file name includes the name of the station, the network and the component
- write(sisname,"(a,i6.6,'.',a,'.',a3,'.sem')") 'S',irec,'NT',chn
+ ! create the name of the seismogram file for each slice
+ ! file name includes the name of the station, the network and the component
+ write(sisname,"(a,i6.6,'.',a,'.',a3,'.sem')") 'S',irec,'NT',chn
-! suppress white spaces if any
- clean_LOCAL_PATH = adjustl(LOCAL_PATH)
+ ! suppress white spaces if any
+ clean_LOCAL_PATH = adjustl(LOCAL_PATH)
-! create full final local path
- final_LOCAL_PATH = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // '/'
+ ! create full final local path
+ final_LOCAL_PATH = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // '/'
-! save seismograms in text format with no subsampling.
-! Because we do not subsample the output, this can result in large files
-! if the simulation uses many time steps. However, subsampling the output
-! here would result in a loss of accuracy when one later convolves
-! the results with the source time function
- if(it <= NTSTEP_BETWEEN_OUTPUT_SEISMOS) then
- !open new file
- open(unit=IOUT,file=final_LOCAL_PATH(1:len_trim(final_LOCAL_PATH))//sisname(1:len_trim(sisname)),&
- status='unknown',action='write')
- else if(it > NTSTEP_BETWEEN_OUTPUT_SEISMOS) then
- !append to existing file
- open(unit=IOUT,file=final_LOCAL_PATH(1:len_trim(final_LOCAL_PATH))//sisname(1:len_trim(sisname)),&
- status='old',position='append',action='write')
- endif
-! make sure we never write more than the maximum number of time steps
-! subtract half duration of the source to make sure travel time is correct
- do isample = nit_written+1,min(it,NSTEP)
-! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- write(IOUT,*) sngl(dble(isample-1)*DT - hdur),' ',seismograms(iorientation,irec_local,isample-nit_written)
- else
- write(IOUT,*) dble(isample-1)*DT - hdur,' ',seismograms(iorientation,irec_local,isample-nit_written)
- endif
- enddo
+ ! save seismograms in text format with no subsampling.
+ ! Because we do not subsample the output, this can result in large files
+ ! if the simulation uses many time steps. However, subsampling the output
+ ! here would result in a loss of accuracy when one later convolves
+ ! the results with the source time function
+ if(it <= NTSTEP_BETWEEN_OUTPUT_SEISMOS) then
+ !open new file
+ open(unit=IOUT,file=final_LOCAL_PATH(1:len_trim(final_LOCAL_PATH))//sisname(1:len_trim(sisname)),&
+ status='unknown',action='write')
+ else if(it > NTSTEP_BETWEEN_OUTPUT_SEISMOS) then
+ !append to existing file
+ open(unit=IOUT,file=final_LOCAL_PATH(1:len_trim(final_LOCAL_PATH))//sisname(1:len_trim(sisname)),&
+ status='old',position='append',action='write')
+ endif
+ ! make sure we never write more than the maximum number of time steps
+ ! subtract half duration of the source to make sure travel time is correct
+ do isample = nit_written+1,min(it,NSTEP)
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ write(IOUT,*) sngl(dble(isample-1)*DT - hdur),' ',seismograms(iorientation,irec_local,isample-nit_written)
+ else
+ write(IOUT,*) dble(isample-1)*DT - hdur,' ',seismograms(iorientation,irec_local,isample-nit_written)
+ endif
+ enddo
- close(IOUT)
+ close(IOUT)
+ enddo
+ enddo
- enddo
+ end subroutine write_adj_seismograms
- enddo
+!
+!-------------------------------------------------------------------------------------------------
+!
- end subroutine write_adj_seismograms
-
-!=====================================================================
-
subroutine band_instrument_code(DT,bic)
! This subroutine is to choose the appropriate band and instrument codes for channel names of seismograms
! based on the IRIS convention (first two letters of channel codes which were LH(Z/E/N) previously).
More information about the CIG-COMMITS
mailing list