[cig-commits] r20591 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src: cuda meshfem3D specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Sat Aug 18 18:24:47 PDT 2012
Author: danielpeter
Date: 2012-08-18 18:24:46 -0700 (Sat, 18 Aug 2012)
New Revision: 20591
Modified:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/initialize_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90
Log:
fixes inner core coupling; fixes acceleration updates with rotation
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -79,7 +79,7 @@
int size_padded = ((int)ceil(((double)(mp->max_nibool_interfaces_outer_core))/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -161,7 +161,7 @@
int size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_outer_core)/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -84,7 +84,7 @@
size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_crust_mantle)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -124,7 +124,7 @@
size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_inner_core)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -210,7 +210,7 @@
size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_crust_mantle)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -256,7 +256,7 @@
size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_inner_core)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -374,7 +374,7 @@
int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -519,7 +519,7 @@
size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -557,7 +557,7 @@
size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -629,7 +629,7 @@
size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -665,7 +665,7 @@
size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -59,23 +59,25 @@
int i = threadIdx.x;
int j = threadIdx.y;
int k = threadIdx.z;
-
int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx
- if(isource < NSOURCES) { // when NSOURCES > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
-
+ // when NSOURCES > MAXIMUM_GRID_DIM, but mod(nspec_top,2) > 0, we end up with an extra block.
+ if(isource < NSOURCES) {
if(myrank == islice_selected_source[isource]) {
ispec = ispec_selected_source[isource]-1;
stf = (realw) stf_pre_compute[isource];
- iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+ iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
// note: for global version, sourcearrays has dimensions
// sourcearrays(NDIM,NGLLX,NGLLY,NGLLZ,NSOURCES)
- atomicAdd(&accel[3*iglob], sourcearrays[INDEX5(3,5,5,5, 0,i,j,k,isource)]*stf);
- atomicAdd(&accel[3*iglob+1], sourcearrays[INDEX5(3,5,5,5, 1,i,j,k,isource)]*stf);
- atomicAdd(&accel[3*iglob+2], sourcearrays[INDEX5(3,5,5,5, 2,i,j,k,isource)]*stf);
+ atomicAdd(&accel[3*iglob], sourcearrays[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
+ 0,i,j,k,isource)]*stf);
+ atomicAdd(&accel[3*iglob+1], sourcearrays[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
+ 1,i,j,k,isource)]*stf);
+ atomicAdd(&accel[3*iglob+2], sourcearrays[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
+ 2,i,j,k,isource)]*stf);
}
}
}
@@ -100,12 +102,12 @@
int num_blocks_x = NSOURCES;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(5,5,5);
+ dim3 threads(NGLLX,NGLLX,NGLLX);
// copies source time function buffer values to GPU
print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
@@ -148,12 +150,12 @@
int num_blocks_x = NSOURCES;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(5,5,5);
+ dim3 threads(NGLLX,NGLLX,NGLLX);
// copies source time function buffer values to GPU
print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
@@ -193,8 +195,8 @@
int irec_local = blockIdx.x + gridDim.x*blockIdx.y;
- if(irec_local < nadj_rec_local) { // when nrec > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
-
+ // when nrec > MAXIMUM_GRID_DIM, but mod(nspec_top,2) > 0, we end up with an extra block.
+ if(irec_local < nadj_rec_local) {
irec = pre_computed_irec[irec_local];
ispec = ispec_selected_rec[irec]-1;
@@ -205,13 +207,13 @@
// atomic operations are absolutely necessary for correctness!
atomicAdd(&accel[3*iglob], adj_sourcearrays[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
- 0,i,j,k,irec_local)]);
+ 0,i,j,k,irec_local)]);
atomicAdd(&accel[3*iglob+1], adj_sourcearrays[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
- 1,i,j,k,irec_local)]);
+ 1,i,j,k,irec_local)]);
atomicAdd(&accel[3*iglob+2], adj_sourcearrays[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
- 2,i,j,k,irec_local)]);
+ 2,i,j,k,irec_local)]);
}
}
@@ -233,15 +235,15 @@
// check if anything to do
if( mp->nadj_rec_local == 0 ) return;
- // make sure grid dimension is less than 65535 in x dimension
+ // make sure grid dimension is less than MAXIMUM_GRID_DIM in x dimension
int num_blocks_x = mp->nadj_rec_local;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
dim3 grid(num_blocks_x,num_blocks_y,1);
- dim3 threads(5,5,5);
+ dim3 threads(NGLLX,NGLLX,NGLLX);
// build slice of adj_sourcearrays because full array is *very* large.
// note: this extracts array values for local adjoint sources at given time step "time_index"
@@ -260,17 +262,17 @@
// note: global version uses dimensions
// h_adj_sourcearrays(NDIM,NGLLX,NGLLY,NGLLZ,nadj_rec_local,NTSTEP_BETWEEN_READ_ADJSRC)
mp->h_adj_sourcearrays_slice[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
- 0,i,j,k,irec_local-1)]
+ 0,i,j,k,irec_local-1)]
= h_adj_sourcearrays[INDEX6(NDIM,NGLLX,NGLLX,NGLLX,mp->nadj_rec_local,
0,i,j,k,irec_local-1,*time_index-1)];
mp->h_adj_sourcearrays_slice[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
- 1,i,j,k,irec_local-1)]
+ 1,i,j,k,irec_local-1)]
= h_adj_sourcearrays[INDEX6(NDIM,NGLLX,NGLLX,NGLLX,mp->nadj_rec_local,
1,i,j,k,irec_local-1,*time_index-1)];
mp->h_adj_sourcearrays_slice[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
- 2,i,j,k,irec_local-1)]
+ 2,i,j,k,irec_local-1)]
= h_adj_sourcearrays[INDEX6(NDIM,NGLLX,NGLLX,NGLLX,mp->nadj_rec_local,
2,i,j,k,irec_local-1,*time_index-1)];
}
@@ -283,7 +285,7 @@
// copies extracted array values onto GPU
cudaMemcpy(mp->d_adj_sourcearrays, mp->h_adj_sourcearrays_slice,
- (mp->nadj_rec_local)*3*NGLL3*sizeof(realw),cudaMemcpyHostToDevice);
+ (mp->nadj_rec_local)*NDIM*NGLL3*sizeof(realw),cudaMemcpyHostToDevice);
// the irec_local variable needs to be precomputed (as
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -100,9 +100,7 @@
iglob_oc = ibool_outer_core[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1;
// update fluid acceleration/pressure
- atomicAdd(&accel_outer_core[iglob_oc],+weight*displ_n);
-
- // }
+ atomicAdd(&accel_outer_core[iglob_oc], + weight*displ_n);
}
}
@@ -165,9 +163,8 @@
iglob_oc = ibool_outer_core[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1;
// update fluid acceleration/pressure
- atomicAdd(&accel_outer_core[iglob_oc],+ weight*displ_n);
-
- // }
+ // note: sign changes to minus because of normal pointing down into inner core
+ atomicAdd(&accel_outer_core[iglob_oc], - weight*displ_n);
}
}
@@ -184,7 +181,7 @@
int num_blocks_x = mp->nspec2D_top_outer_core;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -238,7 +235,7 @@
int num_blocks_x = mp->nspec2D_bottom_outer_core;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -294,9 +291,9 @@
realw* wgllwgll_xy,
int* ibool_outer_core,
int* ibelm_top_outer_core,
- double RHO_TOP_OC,
+ realw RHO_TOP_OC,
realw minus_g_cmb,
- int GRAVITY_VAL,
+ int GRAVITY,
int NSPEC_BOTTOM_CM) {
int i = threadIdx.x;
@@ -331,11 +328,11 @@
iglob_cm = ibool_crust_mantle[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1;
// compute pressure, taking gravity into account
- if( GRAVITY_VAL ){
- pressure = RHO_TOP_OC * ( - accel_outer_core[iglob_oc] + minus_g_cmb
- * (displ_crust_mantle[iglob_cm*3]*nx
- + displ_crust_mantle[iglob_cm*3+1]*ny
- + displ_crust_mantle[iglob_cm*3+2]*nz) );
+ if( GRAVITY ){
+ pressure = RHO_TOP_OC * ( - accel_outer_core[iglob_oc]
+ + minus_g_cmb * (displ_crust_mantle[iglob_cm*3]*nx
+ + displ_crust_mantle[iglob_cm*3+1]*ny
+ + displ_crust_mantle[iglob_cm*3+2]*nz) );
}else{
pressure = - RHO_TOP_OC * accel_outer_core[iglob_oc];
}
@@ -344,11 +341,9 @@
weight = jacobian2D_top_outer_core[INDEX3(NGLLX,NGLLX,i,j,iface)]*wgllwgll_xy[INDEX2(NGLLX,i,j)];
// update fluid acceleration/pressure
- atomicAdd(&accel_crust_mantle[iglob_cm*3],+ weight*nx*pressure);
- atomicAdd(&accel_crust_mantle[iglob_cm*3+1],+ weight*ny*pressure);
- atomicAdd(&accel_crust_mantle[iglob_cm*3+2],+ weight*nz*pressure);
-
- // }
+ atomicAdd(&accel_crust_mantle[iglob_cm*3], + weight*nx*pressure);
+ atomicAdd(&accel_crust_mantle[iglob_cm*3+1], + weight*ny*pressure);
+ atomicAdd(&accel_crust_mantle[iglob_cm*3+2], + weight*nz*pressure);
}
}
@@ -364,9 +359,9 @@
realw* wgllwgll_xy,
int* ibool_outer_core,
int* ibelm_bottom_outer_core,
- double RHO_BOTTOM_OC,
+ realw RHO_BOTTOM_OC,
realw minus_g_icb,
- int GRAVITY_VAL,
+ int GRAVITY,
int NSPEC_TOP_IC) {
int i = threadIdx.x;
@@ -401,11 +396,11 @@
iglob_ic = ibool_inner_core[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1;
// compute pressure, taking gravity into account
- if( GRAVITY_VAL ){
- pressure = RHO_BOTTOM_OC * ( - accel_outer_core[iglob_oc] + minus_g_icb
- * (displ_inner_core[iglob_ic*3]*nx
- + displ_inner_core[iglob_ic*3+1]*ny
- + displ_inner_core[iglob_ic*3+2]*nz) );
+ if( GRAVITY ){
+ pressure = RHO_BOTTOM_OC * ( - accel_outer_core[iglob_oc]
+ + minus_g_icb * (displ_inner_core[iglob_ic*3]*nx
+ + displ_inner_core[iglob_ic*3+1]*ny
+ + displ_inner_core[iglob_ic*3+2]*nz) );
}else{
pressure = - RHO_BOTTOM_OC * accel_outer_core[iglob_oc];
}
@@ -414,11 +409,10 @@
weight = jacobian2D_bottom_outer_core[INDEX3(NGLLX,NGLLX,i,j,iface)]*wgllwgll_xy[INDEX2(NGLLX,i,j)];
// update fluid acceleration/pressure
- atomicAdd(&accel_inner_core[iglob_ic*3],+ weight*nx*pressure);
- atomicAdd(&accel_inner_core[iglob_ic*3+1],+ weight*ny*pressure);
- atomicAdd(&accel_inner_core[iglob_ic*3+2],+ weight*nz*pressure);
-
- // }
+ // note: sign changes to minus because of normal pointing down into inner core
+ atomicAdd(&accel_inner_core[iglob_ic*3], - weight*nx*pressure);
+ atomicAdd(&accel_inner_core[iglob_ic*3+1], - weight*ny*pressure);
+ atomicAdd(&accel_inner_core[iglob_ic*3+2], - weight*nz*pressure);
}
}
@@ -426,10 +420,7 @@
extern "C"
void FC_FUNC_(compute_coupling_cmb_fluid_cuda,
- COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f,
- double RHO_TOP_OC,
- realw minus_g_cmb,
- int GRAVITY_VAL) {
+ COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f) {
TRACE("compute_coupling_cmb_fluid_cuda");
//double start_time = get_time();
@@ -438,7 +429,7 @@
int num_blocks_x = mp->nspec2D_bottom_crust_mantle;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -457,9 +448,9 @@
mp->d_wgllwgll_xy,
mp->d_ibool_outer_core,
mp->d_ibelm_top_outer_core,
- RHO_TOP_OC,
- minus_g_cmb,
- GRAVITY_VAL,
+ mp->RHO_TOP_OC,
+ mp->minus_g_cmb,
+ mp->gravity,
mp->nspec2D_bottom_crust_mantle);
// adjoint simulations
@@ -474,9 +465,9 @@
mp->d_wgllwgll_xy,
mp->d_ibool_outer_core,
mp->d_ibelm_top_outer_core,
- RHO_TOP_OC,
- minus_g_cmb,
- GRAVITY_VAL,
+ mp->RHO_TOP_OC,
+ mp->minus_g_cmb,
+ mp->gravity,
mp->nspec2D_bottom_crust_mantle);
}
@@ -491,10 +482,7 @@
extern "C"
void FC_FUNC_(compute_coupling_icb_fluid_cuda,
- COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f,
- double RHO_BOTTOM_OC,
- realw minus_g_icb,
- int GRAVITY_VAL) {
+ COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f) {
TRACE("compute_coupling_icb_fluid_cuda");
//double start_time = get_time();
@@ -503,7 +491,7 @@
int num_blocks_x = mp->nspec2D_top_inner_core;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -522,9 +510,9 @@
mp->d_wgllwgll_xy,
mp->d_ibool_outer_core,
mp->d_ibelm_bottom_outer_core,
- RHO_BOTTOM_OC,
- minus_g_icb,
- GRAVITY_VAL,
+ mp->RHO_BOTTOM_OC,
+ mp->minus_g_icb,
+ mp->gravity,
mp->nspec2D_top_inner_core);
// adjoint simulations
@@ -539,9 +527,9 @@
mp->d_wgllwgll_xy,
mp->d_ibool_outer_core,
mp->d_ibelm_bottom_outer_core,
- RHO_BOTTOM_OC,
- minus_g_icb,
- GRAVITY_VAL,
+ mp->RHO_BOTTOM_OC,
+ mp->minus_g_icb,
+ mp->gravity,
mp->nspec2D_top_inner_core);
}
@@ -559,13 +547,13 @@
/* ----------------------------------------------------------------------------------------------- */
__global__ void compute_coupling_ocean_cuda_kernel(realw* accel_crust_mantle,
- realw* rmassx_crust_mantle,
- realw* rmassy_crust_mantle,
- realw* rmassz_crust_mantle,
- realw* rmass_ocean_load,
- int npoin_ocean_load,
- int* iglob_ocean_load,
- realw* normal_ocean_load) {
+ realw* rmassx_crust_mantle,
+ realw* rmassy_crust_mantle,
+ realw* rmassz_crust_mantle,
+ realw* rmass_ocean_load,
+ int npoin_ocean_load,
+ int* ibool_ocean_load,
+ realw* normal_ocean_load) {
int ipoin = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
@@ -579,7 +567,7 @@
// get global point number
// "-1" from index values to convert from Fortran-> C indexing
- iglob = iglob_ocean_load[ipoin] - 1;
+ iglob = ibool_ocean_load[ipoin] - 1;
// get normal
nx = normal_ocean_load[INDEX2(NDIM,0,ipoin)]; // (1,ipoin)
@@ -624,7 +612,7 @@
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -639,7 +627,7 @@
mp->d_rmassz_crust_mantle,
mp->d_rmass_ocean_load,
mp->npoin_oceans,
- mp->d_iglob_ocean_load,
+ mp->d_ibool_ocean_load,
mp->d_normal_ocean_load);
// for backward/reconstructed potentials
@@ -650,7 +638,7 @@
mp->d_rmassz_crust_mantle,
mp->d_rmass_ocean_load,
mp->npoin_oceans,
- mp->d_iglob_ocean_load,
+ mp->d_ibool_ocean_load,
mp->d_normal_ocean_load);
}
}else{
@@ -660,7 +648,7 @@
mp->d_rmassz_crust_mantle,
mp->d_rmass_ocean_load,
mp->npoin_oceans,
- mp->d_iglob_ocean_load,
+ mp->d_ibool_ocean_load,
mp->d_normal_ocean_load);
// for backward/reconstructed potentials
@@ -671,7 +659,7 @@
mp->d_rmassz_crust_mantle,
mp->d_rmass_ocean_load,
mp->npoin_oceans,
- mp->d_iglob_ocean_load,
+ mp->d_ibool_ocean_load,
mp->d_normal_ocean_load);
}
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -85,7 +85,6 @@
*sigma_xz = *sigma_xz - R_xz[i_sls + N_SLS*(tx + NGLL3*working_element)];
*sigma_yz = *sigma_yz - R_yz[i_sls + N_SLS*(tx + NGLL3*working_element)];
}
- return;
}
/* ----------------------------------------------------------------------------------------------- */
@@ -167,7 +166,6 @@
R_yz[i_sls + N_SLS*(tx + NGLL3*working_element)] =
alphaval_loc * R_yz[i_sls + N_SLS*(tx + NGLL3*working_element)] + betaval_loc * Sn + gammaval_loc * Snp1;
}
- return;
}
/* ----------------------------------------------------------------------------------------------- */
@@ -224,10 +222,17 @@
theta = d_ystore[iglob];
phi = d_zstore[iglob];
- cos_theta = cos(theta);
- sin_theta = sin(theta);
- cos_phi = cos(phi);
- sin_phi = sin(phi);
+ if( sizeof( theta ) == sizeof( float ) ){
+ // float operations
+ // sincos function return sinus and cosine for given value
+ sincosf(theta, &sin_theta, &cos_theta);
+ sincosf(phi, &sin_phi, &cos_phi);
+ }else{
+ cos_theta = cos(theta);
+ sin_theta = sin(theta);
+ cos_phi = cos(phi);
+ sin_phi = sin(phi);
+ }
// for efficiency replace with lookup table every 100 m in radial direction
// note: radius in crust mantle should never be zero,
@@ -288,8 +293,6 @@
*rho_s_H1 = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl);
*rho_s_H2 = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl);
*rho_s_H3 = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl);
-
- return;
}
/* ----------------------------------------------------------------------------------------------- */
@@ -367,7 +370,6 @@
c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl;
*sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl +
c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl;
- return;
}
/* ----------------------------------------------------------------------------------------------- */
@@ -674,8 +676,6 @@
*sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl +
c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl;
-
- return;
}
@@ -695,7 +695,7 @@
int* d_phase_ispec_inner,
int num_phase_ispec,
int d_iphase,
- realw d_deltat,
+ realw deltat,
int use_mesh_coloring_gpu,
realw* d_displ,
realw* d_veloc,
@@ -737,9 +737,9 @@
realw* wgll_cube,
int NSPEC_CRUST_MANTLE_STRAIN_ONLY){
- /* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
+ // block id == spectral-element id
int bx = blockIdx.y*gridDim.x+blockIdx.x;
- /* int bx = blockIdx.x; */
+ // thread id == GLL point id
int tx = threadIdx.x;
int K = (tx/NGLL2);
@@ -838,13 +838,13 @@
// use first order Taylor expansion of displacement for local storage of stresses
// at this current time step, to fix attenuation in a consistent way
#ifdef USE_TEXTURES_FIELDS
- s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + d_deltat * tex1Dfetch(d_displ_cm_tex, iglob*3);
- s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * tex1Dfetch(d_displ_cm_tex, iglob*3 + 1);
- s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * tex1Dfetch(d_displ_cm_tex, iglob*3 + 2);
+ s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + deltat * tex1Dfetch(d_displ_cm_tex, iglob*3);
+ s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + deltat * tex1Dfetch(d_displ_cm_tex, iglob*3 + 1);
+ s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + deltat * tex1Dfetch(d_displ_cm_tex, iglob*3 + 2);
#else
- s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + d_deltat * d_veloc[iglob*3];
- s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * d_veloc[iglob*3 + 1];
- s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * d_veloc[iglob*3 + 2];
+ s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + deltat * d_veloc[iglob*3];
+ s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + deltat * d_veloc[iglob*3 + 1];
+ s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + deltat * d_veloc[iglob*3 + 2];
#endif
}else{
// takes old routines
@@ -1419,7 +1419,6 @@
/* ----------------------------------------------------------------------------------------------- */
void Kernel_2_crust_mantle(int nb_blocks_to_compute,Mesh* mp,
- realw d_deltat,
int d_iphase,
int* d_ibool,
int* d_ispec_is_tiso,
@@ -1459,8 +1458,7 @@
realw* d_c25store,realw* d_c26store,realw* d_c33store,
realw* d_c34store,realw* d_c35store,realw* d_c36store,
realw* d_c44store,realw* d_c45store,realw* d_c46store,
- realw* d_c55store,realw* d_c56store,realw* d_c66store
- ){
+ realw* d_c55store,realw* d_c56store,realw* d_c66store){
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("before kernel Kernel_2_crust_mantle");
@@ -1472,7 +1470,7 @@
int num_blocks_x = nb_blocks_to_compute;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -1495,7 +1493,7 @@
mp->d_phase_ispec_inner_crust_mantle,
mp->num_phase_ispec_crust_mantle,
d_iphase,
- d_deltat,
+ mp->deltat,
mp->use_mesh_coloring_gpu,
mp->d_displ_crust_mantle,
mp->d_veloc_crust_mantle,
@@ -1546,7 +1544,7 @@
mp->d_phase_ispec_inner_crust_mantle,
mp->num_phase_ispec_crust_mantle,
d_iphase,
- d_deltat,
+ mp->b_deltat,
mp->use_mesh_coloring_gpu,
mp->d_b_displ_crust_mantle,
mp->d_b_veloc_crust_mantle,
@@ -1609,7 +1607,6 @@
extern "C"
void FC_FUNC_(compute_forces_crust_mantle_cuda,
COMPUTE_FORCES_CRUST_MANTLE_CUDA)(long* Mesh_pointer_f,
- realw* deltat,
int* iphase) {
TRACE("compute_forces_crust_mantle_cuda");
@@ -1701,7 +1698,6 @@
#endif
Kernel_2_crust_mantle(nb_blocks_to_compute,mp,
- *deltat,
*iphase,
mp->d_ibool_crust_mantle + color_offset_nonpadded,
mp->d_ispec_is_tiso_crust_mantle + color_offset_ispec,
@@ -1794,7 +1790,6 @@
// no mesh coloring: uses atomic updates
Kernel_2_crust_mantle(num_elements,mp,
- *deltat,
*iphase,
mp->d_ibool_crust_mantle,
mp->d_ispec_is_tiso_crust_mantle,
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -87,7 +87,6 @@
*sigma_xz = *sigma_xz - R_xz[offset];
*sigma_yz = *sigma_yz - R_yz[offset];
}
- return;
}
/* ----------------------------------------------------------------------------------------------- */
@@ -157,7 +156,6 @@
Snp1 = factor_loc * epsilondev_yz_loc;
R_yz[offset] = alphaval_loc * R_yz[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
}
- return;
}
/* ----------------------------------------------------------------------------------------------- */
@@ -220,10 +218,17 @@
theta = d_ystore[iglob];
phi = d_zstore[iglob];
- cos_theta = cos(theta);
- sin_theta = sin(theta);
- cos_phi = cos(phi);
- sin_phi = sin(phi);
+ if( sizeof( theta ) == sizeof( float ) ){
+ // float operations
+ // sincos function return sinus and cosine for given value
+ sincosf(theta, &sin_theta, &cos_theta);
+ sincosf(phi, &sin_phi, &cos_phi);
+ }else{
+ cos_theta = cos(theta);
+ sin_theta = sin(theta);
+ cos_phi = cos(phi);
+ sin_phi = sin(phi);
+ }
// for efficiency replace with lookup table every 100 m in radial direction
// note: radius in crust mantle should never be zero,
@@ -286,8 +291,6 @@
*rho_s_H1 = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl);
*rho_s_H2 = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl);
*rho_s_H3 = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl);
-
- return;
}
@@ -306,7 +309,7 @@
int* d_phase_ispec_inner,
int num_phase_ispec,
int d_iphase,
- realw d_deltat,
+ realw deltat,
int use_mesh_coloring_gpu,
realw* d_displ,
realw* d_veloc,
@@ -343,9 +346,9 @@
int NSPEC_INNER_CORE_STRAIN_ONLY,
int NSPEC_INNER_CORE){
- /* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
+ // block id
int bx = blockIdx.y*gridDim.x+blockIdx.x;
- /* int bx = blockIdx.x; */
+ // thread id
int tx = threadIdx.x;
int K = (tx/NGLL2);
@@ -449,13 +452,13 @@
// use first order Taylor expansion of displacement for local storage of stresses
// at this current time step, to fix attenuation in a consistent way
#ifdef USE_TEXTURES_FIELDS
- s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + d_deltat * tex1Dfetch(d_displ_ic_tex, iglob*3);
- s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * tex1Dfetch(d_displ_ic_tex, iglob*3 + 1);
- s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * tex1Dfetch(d_displ_ic_tex, iglob*3 + 2);
+ s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + deltat * tex1Dfetch(d_displ_ic_tex, iglob*3);
+ s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + deltat * tex1Dfetch(d_displ_ic_tex, iglob*3 + 1);
+ s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + deltat * tex1Dfetch(d_displ_ic_tex, iglob*3 + 2);
#else
- s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + d_deltat * d_veloc[iglob*3];
- s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * d_veloc[iglob*3 + 1];
- s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * d_veloc[iglob*3 + 2];
+ s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + deltat * d_veloc[iglob*3];
+ s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + deltat * d_veloc[iglob*3 + 1];
+ s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + deltat * d_veloc[iglob*3 + 2];
#endif
}
else{
@@ -1070,7 +1073,6 @@
/* ----------------------------------------------------------------------------------------------- */
void Kernel_2_inner_core(int nb_blocks_to_compute,Mesh* mp,
- realw d_deltat,
int d_iphase,
int* d_ibool,
int* d_idoubling,
@@ -1116,7 +1118,7 @@
int num_blocks_x = nb_blocks_to_compute;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -1139,7 +1141,7 @@
mp->d_phase_ispec_inner_inner_core,
mp->num_phase_ispec_inner_core,
d_iphase,
- d_deltat,
+ mp->deltat,
mp->use_mesh_coloring_gpu,
mp->d_displ_inner_core,
mp->d_veloc_inner_core,
@@ -1188,7 +1190,7 @@
mp->d_phase_ispec_inner_inner_core,
mp->num_phase_ispec_inner_core,
d_iphase,
- d_deltat,
+ mp->b_deltat,
mp->use_mesh_coloring_gpu,
mp->d_b_displ_inner_core,
mp->d_b_veloc_inner_core,
@@ -1249,7 +1251,6 @@
extern "C"
void FC_FUNC_(compute_forces_inner_core_cuda,
COMPUTE_FORCES_INNER_CORE_CUDA)(long* Mesh_pointer_f,
- realw* deltat,
int* iphase) {
TRACE("compute_forces_inner_core_cuda");
@@ -1386,7 +1387,6 @@
#endif
Kernel_2_inner_core(nb_blocks_to_compute,mp,
- *deltat,
*iphase,
mp->d_ibool_inner_core + color_offset_nonpadded,
mp->d_idoubling_inner_core + color_offset_ispec,
@@ -1429,8 +1429,7 @@
mp->d_c12store_inner_core + color_offset,
mp->d_c13store_inner_core + color_offset,
mp->d_c33store_inner_core + color_offset,
- mp->d_c44store_inner_core + color_offset
- );
+ mp->d_c44store_inner_core + color_offset);
// for padded and aligned arrays
color_offset += nb_blocks_to_compute * NGLL3_PADDED;
@@ -1459,7 +1458,6 @@
// no mesh coloring: uses atomic updates
Kernel_2_inner_core(num_elements,mp,
- *deltat,
*iphase,
mp->d_ibool_inner_core,
mp->d_idoubling_inner_core,
@@ -1493,8 +1491,7 @@
mp->d_b_R_xz_inner_core,
mp->d_b_R_yz_inner_core,
mp->d_c11store_inner_core,mp->d_c12store_inner_core,mp->d_c13store_inner_core,
- mp->d_c33store_inner_core,mp->d_c44store_inner_core
- );
+ mp->d_c33store_inner_core,mp->d_c44store_inner_core);
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -60,39 +60,40 @@
realw deltat,
realw* d_A_array_rotation,
realw* d_B_array_rotation,
- realw dpotentialdxl, realw dpotentialdyl,
+ realw dpotentialdxl,
+ realw dpotentialdyl,
realw* dpotentialdx_with_rot,
realw* dpotentialdy_with_rot) {
realw two_omega_deltat,cos_two_omega_t,sin_two_omega_t;
realw A_rotation,B_rotation;
- realw ux_rotation,uy_rotation;
realw source_euler_A,source_euler_B;
// store the source for the Euler scheme for A_rotation and B_rotation
+ if( sizeof( sin_two_omega_t ) == sizeof( float ) ){
+ // float operations
+ // sincos function return sinus and cosine for given value
+ sincosf(two_omega_earth*time, &sin_two_omega_t, &cos_two_omega_t);
+ }else{
+ cos_two_omega_t = cos(two_omega_earth*time);
+ sin_two_omega_t = sin(two_omega_earth*time);
+ }
+
+ // time step deltat of Euler scheme is included in the source
two_omega_deltat = deltat * two_omega_earth;
- cos_two_omega_t = cos(two_omega_earth*time);
- sin_two_omega_t = sin(two_omega_earth*time);
-
- // time step deltat of Euler scheme is included in the source
source_euler_A = two_omega_deltat * (cos_two_omega_t * dpotentialdyl + sin_two_omega_t * dpotentialdxl);
source_euler_B = two_omega_deltat * (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl);
A_rotation = d_A_array_rotation[tx + working_element*NGLL3]; // (non-padded offset)
B_rotation = d_B_array_rotation[tx + working_element*NGLL3];
- ux_rotation = A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t;
- uy_rotation = - A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t;
+ *dpotentialdx_with_rot = dpotentialdxl + ( A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t);
+ *dpotentialdy_with_rot = dpotentialdyl + (- A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t);
- *dpotentialdx_with_rot = dpotentialdxl + ux_rotation;
- *dpotentialdy_with_rot = dpotentialdyl + uy_rotation;
-
// updates rotation term with Euler scheme (non-padded offset)
d_A_array_rotation[tx + working_element*NGLL3] += source_euler_A;
d_B_array_rotation[tx + working_element*NGLL3] += source_euler_B;
-
- return;
}
@@ -105,31 +106,35 @@
__global__ void Kernel_2_outer_core_impl(int nb_blocks_to_compute,
- int NGLOB, int* d_ibool,
- int* d_phase_ispec_inner,
- int num_phase_ispec,
- int d_iphase,
- int use_mesh_coloring_gpu,
- realw* d_potential, realw* d_potential_dot_dot,
- realw* d_xix, realw* d_xiy, realw* d_xiz,
- realw* d_etax, realw* d_etay, realw* d_etaz,
- realw* d_gammax, realw* d_gammay, realw* d_gammaz,
- realw* d_hprime_xx,
- realw* hprimewgll_xx, realw* hprimewgll_yy, realw* hprimewgll_zz,
- realw* wgllwgll_xy,realw* wgllwgll_xz,realw* wgllwgll_yz,
- int GRAVITY,
- realw* d_xstore, realw* d_ystore, realw* d_zstore,
- realw* d_d_ln_density_dr_table,
- realw* d_minus_rho_g_over_kappa_fluid,
- realw* wgll_cube,
- int ROTATION,
- realw time,
- realw two_omega_earth,
- realw deltat,
- realw* d_A_array_rotation,realw* d_B_array_rotation,
- int NSPEC_OUTER_CORE){
+ int NGLOB,
+ int* d_ibool,
+ int* d_phase_ispec_inner,
+ int num_phase_ispec,
+ int d_iphase,
+ int use_mesh_coloring_gpu,
+ realw* d_potential, realw* d_potential_dot_dot,
+ realw* d_xix, realw* d_xiy, realw* d_xiz,
+ realw* d_etax, realw* d_etay, realw* d_etaz,
+ realw* d_gammax, realw* d_gammay, realw* d_gammaz,
+ realw* d_hprime_xx,
+ realw* hprimewgll_xx, realw* hprimewgll_yy, realw* hprimewgll_zz,
+ realw* wgllwgll_xy,realw* wgllwgll_xz,realw* wgllwgll_yz,
+ int GRAVITY,
+ realw* d_xstore, realw* d_ystore, realw* d_zstore,
+ realw* d_d_ln_density_dr_table,
+ realw* d_minus_rho_g_over_kappa_fluid,
+ realw* wgll_cube,
+ int ROTATION,
+ realw time,
+ realw two_omega_earth,
+ realw deltat,
+ realw* d_A_array_rotation,
+ realw* d_B_array_rotation,
+ int NSPEC_OUTER_CORE){
+ // block id == spectral-element id
int bx = blockIdx.y*gridDim.x+blockIdx.x;
+ // thread id == GLL point id
int tx = threadIdx.x;
// R_EARTH_KM is the radius of the bottom of the oceans (radius of Earth in km)
@@ -142,34 +147,33 @@
int I = (tx-K*NGLL2-J*NGLLX);
int active,offset;
- int iglob = 0;
+ int iglob;
int working_element;
realw temp1l,temp2l,temp3l;
realw xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl;
+
realw dpotentialdxl,dpotentialdyl,dpotentialdzl;
realw dpotentialdx_with_rot,dpotentialdy_with_rot;
+
realw fac1,fac2,fac3;
realw sum_terms;
realw gravity_term;
realw gxl,gyl,gzl;
+
realw radius,theta,phi;
realw cos_theta,sin_theta,cos_phi,sin_phi;
realw grad_x_ln_rho,grad_y_ln_rho,grad_z_ln_rho;
int int_radius;
-
-
#ifndef MANUALLY_UNROLLED_LOOPS
int l;
int offset1,offset2,offset3;
#endif
__shared__ realw s_dummy_loc[NGLL3];
-
__shared__ realw s_temp1[NGLL3];
__shared__ realw s_temp2[NGLL3];
__shared__ realw s_temp3[NGLL3];
-
__shared__ realw sh_hprime_xx[NGLL2];
// use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
@@ -275,8 +279,8 @@
// compute the jacobian
jacobianl = 1.f / (xixl*(etayl*gammazl-etazl*gammayl)
- -xiyl*(etaxl*gammazl-etazl*gammaxl)
- +xizl*(etaxl*gammayl-etayl*gammaxl));
+ - xiyl*(etaxl*gammazl-etazl*gammaxl)
+ + xizl*(etaxl*gammayl-etayl*gammaxl));
// derivatives of potential
dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l;
@@ -285,12 +289,12 @@
// compute contribution of rotation and add to gradient of potential
// this term has no Z component
- if(ROTATION){
- compute_element_oc_rotation(tx,working_element,time,two_omega_earth,deltat,
+ if( ROTATION ){
+ compute_element_oc_rotation(tx,working_element,
+ time,two_omega_earth,deltat,
d_A_array_rotation,d_B_array_rotation,
dpotentialdxl,dpotentialdyl,
&dpotentialdx_with_rot,&dpotentialdy_with_rot);
-
}else{
dpotentialdx_with_rot = dpotentialdxl;
dpotentialdy_with_rot = dpotentialdyl;
@@ -304,16 +308,28 @@
theta = d_ystore[iglob];
phi = d_zstore[iglob];
- cos_theta = cos(theta);
- sin_theta = sin(theta);
- cos_phi = cos(phi);
- sin_phi = sin(phi);
+ if( sizeof( theta ) == sizeof( float ) ){
+ // float operations
+ // sincos function return sinus and cosine for given value
+ sincosf(theta, &sin_theta, &cos_theta);
+ sincosf(phi, &sin_phi, &cos_phi);
+ }else{
+ cos_theta = cos(theta);
+ sin_theta = sin(theta);
+ cos_phi = cos(phi);
+ sin_phi = sin(phi);
+ }
// for efficiency replace with lookup table every 100 m in radial direction
// note: radius in outer core should never be zero,
// and arrays in C start from 0, thus we need to subtract -1
int_radius = rint(radius * R_EARTH_KM * 10.0f ) - 1;
+ //debug: checks bounds NRAD_GRAVITY == 70000
+ //if( int_radius < 0 || int_radius >= 70000 ){
+ // printf("gravity: in_radius out of bounds %d radius=%e\n",int_radius,radius);
+ //}
+
// depending on gravity or not, different potential definitions are used
if( ! GRAVITY ){
// add (chi/rho)grad(rho) term in no gravity case
@@ -324,9 +340,9 @@
grad_z_ln_rho = cos_theta * d_d_ln_density_dr_table[int_radius];
// adding (chi/rho)grad(rho)
- dpotentialdx_with_rot = dpotentialdx_with_rot + s_dummy_loc[tx] * grad_x_ln_rho;
- dpotentialdy_with_rot = dpotentialdy_with_rot + s_dummy_loc[tx] * grad_y_ln_rho;
- dpotentialdzl = dpotentialdzl + s_dummy_loc[tx] * grad_z_ln_rho;
+ dpotentialdx_with_rot += s_dummy_loc[tx] * grad_x_ln_rho;
+ dpotentialdy_with_rot += s_dummy_loc[tx] * grad_y_ln_rho;
+ dpotentialdzl += s_dummy_loc[tx] * grad_z_ln_rho;
}else{
@@ -360,9 +376,12 @@
}
// form the dot product with the test vector
- s_temp1[tx] = jacobianl*(xixl*dpotentialdx_with_rot + xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl);
- s_temp2[tx] = jacobianl*(etaxl*dpotentialdx_with_rot + etayl*dpotentialdy_with_rot + etazl*dpotentialdzl);
- s_temp3[tx] = jacobianl*(gammaxl*dpotentialdx_with_rot + gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl);
+ s_temp1[tx] = jacobianl*(xixl*dpotentialdx_with_rot
+ + xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl);
+ s_temp2[tx] = jacobianl*(etaxl*dpotentialdx_with_rot
+ + etayl*dpotentialdy_with_rot + etazl*dpotentialdzl);
+ s_temp3[tx] = jacobianl*(gammaxl*dpotentialdx_with_rot
+ + gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl);
}
// synchronize all the threads (one thread for each of the NGLL grid points of the
@@ -454,7 +473,6 @@
atomicAdd(&d_potential_dot_dot[iglob],sum_terms);
}
-
}else{
atomicAdd(&d_potential_dot_dot[iglob],sum_terms);
@@ -473,8 +491,10 @@
realw* d_etax,realw* d_etay,realw* d_etaz,
realw* d_gammax,realw* d_gammay,realw* d_gammaz,
realw time, realw b_time,
- realw* d_A_array_rotation,realw* d_B_array_rotation,
- realw* d_b_A_array_rotation,realw* d_b_B_array_rotation){
+ realw* d_A_array_rotation,
+ realw* d_B_array_rotation,
+ realw* d_b_A_array_rotation,
+ realw* d_b_B_array_rotation){
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("before outer_core kernel Kernel_2");
@@ -486,14 +506,19 @@
int num_blocks_x = nb_blocks_to_compute;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
- int threads_2 = NGLL3_PADDED;//BLOCK_SIZE_K2;
- dim3 grid_2(num_blocks_x,num_blocks_y);
+ int blocksize = NGLL3_PADDED;
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+ // 2d grid
+ //int threads_2 = NGLL3_PADDED;//BLOCK_SIZE_K2;
+ //dim3 grid_2(num_blocks_x,num_blocks_y);
+
// Cuda timing
// cudaEvent_t start, stop;
// realw time;
@@ -501,7 +526,7 @@
// cudaEventCreate(&stop);
// cudaEventRecord( start, 0 );
- Kernel_2_outer_core_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
+ Kernel_2_outer_core_impl<<<grid,threads>>>(nb_blocks_to_compute,
mp->NGLOB_OUTER_CORE,
d_ibool,
mp->d_phase_ispec_inner_outer_core,
@@ -523,13 +548,14 @@
mp->d_wgll_cube,
mp->rotation,
time,
- mp->d_two_omega_earth,
- mp->d_deltat,
- d_A_array_rotation,d_B_array_rotation,
+ mp->two_omega_earth,
+ mp->deltat,
+ d_A_array_rotation,
+ d_B_array_rotation,
mp->NSPEC_OUTER_CORE);
if(mp->simulation_type == 3) {
- Kernel_2_outer_core_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
+ Kernel_2_outer_core_impl<<<grid,threads>>>(nb_blocks_to_compute,
mp->NGLOB_OUTER_CORE,
d_ibool,
mp->d_phase_ispec_inner_outer_core,
@@ -551,9 +577,10 @@
mp->d_wgll_cube,
mp->rotation,
b_time,
- mp->d_b_two_omega_earth,
- mp->d_b_deltat,
- d_b_A_array_rotation,d_b_B_array_rotation,
+ mp->b_two_omega_earth,
+ mp->b_deltat,
+ d_b_A_array_rotation,
+ d_b_B_array_rotation,
mp->NSPEC_OUTER_CORE);
}
@@ -563,7 +590,6 @@
// cudaEventDestroy( start );
// cudaEventDestroy( stop );
// printf("Kernel2 Execution Time: %f ms\n",time);
-
/* cudaThreadSynchronize(); */
/* TRACE("Kernel 2 finished"); */
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -581,9 +607,9 @@
extern "C"
void FC_FUNC_(compute_forces_outer_core_cuda,
COMPUTE_FORCES_OUTER_CORE_CUDA)(long* Mesh_pointer_f,
- int* iphase,
- realw* time_f,
- realw* b_time_f) {
+ int* iphase,
+ realw* time_f,
+ realw* b_time_f) {
TRACE("compute_forces_outer_core_cuda");
@@ -592,11 +618,11 @@
//double start_time = get_time();
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-
- int num_elements;
realw time = *time_f;
realw b_time = *b_time_f;
+ int num_elements;
+
if( *iphase == 1 )
num_elements = mp->nspec_outer_outer_core;
else
@@ -683,8 +709,7 @@
mp->d_A_array_rotation + color_offset_nonpadded,
mp->d_B_array_rotation + color_offset_nonpadded,
mp->d_b_A_array_rotation + color_offset_nonpadded,
- mp->d_b_B_array_rotation + color_offset_nonpadded
- );
+ mp->d_b_B_array_rotation + color_offset_nonpadded);
// for padded and aligned arrays
color_offset += nb_blocks_to_compute * NGLL3_PADDED;
@@ -702,9 +727,10 @@
mp->d_etax_outer_core,mp->d_etay_outer_core,mp->d_etaz_outer_core,
mp->d_gammax_outer_core,mp->d_gammay_outer_core,mp->d_gammaz_outer_core,
time,b_time,
- mp->d_A_array_rotation,mp->d_B_array_rotation,
- mp->d_b_A_array_rotation,mp->d_b_B_array_rotation
- );
+ mp->d_A_array_rotation,
+ mp->d_B_array_rotation,
+ mp->d_b_A_array_rotation,
+ mp->d_b_B_array_rotation);
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -226,7 +226,7 @@
int num_blocks_x = mp->NSPEC_CRUST_MANTLE;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -299,7 +299,7 @@
int num_blocks_x = mp->NSPEC_INNER_CORE;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -513,7 +513,7 @@
int num_blocks_x = mp->NSPEC_OUTER_CORE;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -605,7 +605,7 @@
int num_blocks_x = mp->nspec2D_top_crust_mantle;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -683,7 +683,7 @@
int num_blocks_x = mp->NSPEC_CRUST_MANTLE;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -228,8 +228,8 @@
case 8:
// zmin
num_abs_boundary_faces = mp->nspec2D_zmin_outer_core;
- d_abs_boundary_ispec = mp->d_ibelm_zmin_outer_core;
- d_abs_boundary_jacobian2D = mp->d_jacobian2D_zmin_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_bottom_outer_core;
+ d_abs_boundary_jacobian2D = mp->d_jacobian2D_bottom_outer_core;
d_b_absorb_potential = mp->d_absorb_zmin_outer_core;
d_wgllwgll = mp->d_wgllwgll_xy;
break;
@@ -252,7 +252,7 @@
int num_blocks_x = num_abs_boundary_faces;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -263,7 +263,8 @@
if (mp->simulation_type == 3 && num_abs_boundary_faces > 0 ){
// copies array to GPU
print_CUDA_error_if_any(cudaMemcpy(d_b_absorb_potential,absorb_potential,
- NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyHostToDevice),7700);
+ NGLL2*num_abs_boundary_faces*sizeof(realw),
+ cudaMemcpyHostToDevice),7700);
}
compute_stacey_acoustic_kernel<<<grid,threads>>>(mp->d_veloc_outer_core,
@@ -290,7 +291,8 @@
if (mp->simulation_type == 1 && mp->save_forward && num_abs_boundary_faces > 0 ){
// copies array to CPU
print_CUDA_error_if_any(cudaMemcpy(absorb_potential,d_b_absorb_potential,
- NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyDeviceToHost),7701);
+ NGLL2*num_abs_boundary_faces*sizeof(realw),
+ cudaMemcpyDeviceToHost),7701);
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -261,7 +261,7 @@
int num_blocks_x = num_abs_boundary_faces;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/initialize_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/initialize_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/initialize_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -51,6 +51,9 @@
INITIALIZE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) {
TRACE("initialize_cuda_device");
+ int device;
+ int device_count;
+
// Gets rank number of MPI process
int myrank = *myrank_f;
@@ -87,7 +90,7 @@
// note: from here on we use the runtime API ...
// Gets number of GPU devices
- int device_count = 0;
+ device_count = 0;
cudaGetDeviceCount(&device_count);
exit_on_cuda_error("CUDA runtime error: cudaGetDeviceCount failed\ncheck if driver and runtime libraries work together\nexiting...\n");
@@ -107,12 +110,20 @@
//MPI_Barrier(MPI_COMM_WORLD);
// sets active device
- cudaSetDevice( myrank % device_count );
- exit_on_cuda_error("cudaSetDevice");
+ device = myrank % device_count;
+ cudaSetDevice( device );
+ exit_on_cuda_error("cudaSetDevice has invalid device");
+
+ // double check that device was properly selected
+ cudaGetDevice(&device);
+ if( device != (myrank % device_count) ){
+ printf("error rank: %d devices: %d \n",myrank,device_count);
+ printf(" cudaSetDevice()=%d\n cudaGetDevice()=%d\n",myrank%device_count,device);
+ exit_on_error("CUDA set/get device error: device id conflict \n");
+ }
}
// returns a handle to the active device
- int device;
cudaGetDevice(&device);
// get device properties
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -91,7 +91,7 @@
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -154,7 +154,7 @@
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -248,7 +248,7 @@
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -288,8 +288,10 @@
/* ----------------------------------------------------------------------------------------------- */
__global__ void kernel_3_cuda_device(realw* veloc,
- realw* accel, int size,
+ realw* accel,
+ int size,
realw deltatover2,
+ realw two_omega_earth,
realw* rmassx,
realw* rmassy,
realw* rmassz) {
@@ -298,8 +300,9 @@
/* because of block and grid sizing problems, there is a small */
/* amount of buffer at the end of the calculation */
if(id < size) {
- accel[3*id] = accel[3*id]*rmassx[id];
- accel[3*id+1] = accel[3*id+1]*rmassy[id];
+ // note: update adds rotational acceleration in case two_omega_earth is non-zero
+ accel[3*id] = accel[3*id]*rmassx[id] + two_omega_earth*veloc[3*id+1]; // (2,i);
+ accel[3*id+1] = accel[3*id+1]*rmassy[id] - two_omega_earth*veloc[3*id]; //(1,i);
accel[3*id+2] = accel[3*id+2]*rmassz[id];
veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
@@ -311,7 +314,9 @@
/* ----------------------------------------------------------------------------------------------- */
__global__ void kernel_3_accel_cuda_device(realw* accel,
+ realw* veloc,
int size,
+ realw two_omega_earth,
realw* rmassx,
realw* rmassy,
realw* rmassz) {
@@ -320,8 +325,9 @@
/* because of block and grid sizing problems, there is a small */
/* amount of buffer at the end of the calculation */
if(id < size) {
- accel[3*id] = accel[3*id]*rmassx[id];
- accel[3*id+1] = accel[3*id+1]*rmassy[id];
+ // note: update adds rotational acceleration in case two_omega_earth is non-zero
+ accel[3*id] = accel[3*id]*rmassx[id] + two_omega_earth*veloc[3*id+1]; // (2,i);
+ accel[3*id+1] = accel[3*id+1]*rmassy[id] - two_omega_earth*veloc[3*id]; //(1,i);
accel[3*id+2] = accel[3*id+2]*rmassz[id];
}
}
@@ -351,13 +357,13 @@
realw* deltatover2_F,
int* SIMULATION_TYPE_f,
realw* b_deltatover2_F,
- int* OCEANS,
int* NCHUNKS_VAL) {
TRACE("kernel_3_a_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
int SIMULATION_TYPE = *SIMULATION_TYPE_f;
+
realw deltatover2 = *deltatover2_F;
realw b_deltatover2 = *b_deltatover2_F;
@@ -366,7 +372,7 @@
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -376,7 +382,7 @@
// crust/mantle region only
// check whether we can update accel and veloc, or only accel at this point
- if( *OCEANS == 0 ){
+ if( mp->oceans == 0 ){
// updates both, accel and veloc
if( *NCHUNKS_VAL != 6 && mp->absorbing_conditions){
@@ -384,6 +390,7 @@
mp->d_accel_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
deltatover2,
+ mp->two_omega_earth,
mp->d_rmassx_crust_mantle,
mp->d_rmassy_crust_mantle,
mp->d_rmassz_crust_mantle);
@@ -393,6 +400,7 @@
mp->d_b_accel_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
b_deltatover2,
+ mp->b_two_omega_earth,
mp->d_rmassx_crust_mantle,
mp->d_rmassy_crust_mantle,
mp->d_rmassz_crust_mantle);
@@ -402,6 +410,7 @@
mp->d_accel_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
deltatover2,
+ mp->two_omega_earth,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle);
@@ -411,6 +420,7 @@
mp->d_b_accel_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
b_deltatover2,
+ mp->b_two_omega_earth,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle);
@@ -422,28 +432,36 @@
if( *NCHUNKS_VAL != 6 && mp->absorbing_conditions){
kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel_crust_mantle,
+ mp->d_veloc_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
+ mp->two_omega_earth,
mp->d_rmassx_crust_mantle,
mp->d_rmassy_crust_mantle,
mp->d_rmassz_crust_mantle);
if(SIMULATION_TYPE == 3) {
kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_b_accel_crust_mantle,
+ mp->d_b_veloc_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
+ mp->b_two_omega_earth,
mp->d_rmassx_crust_mantle,
mp->d_rmassy_crust_mantle,
mp->d_rmassz_crust_mantle);
}
}else{
kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel_crust_mantle,
+ mp->d_veloc_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
+ mp->two_omega_earth,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle);
if(SIMULATION_TYPE == 3) {
kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_b_accel_crust_mantle,
+ mp->d_b_veloc_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
+ mp->b_two_omega_earth,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle);
@@ -464,8 +482,7 @@
KERNEL_3_B_CUDA)(long* Mesh_pointer,
realw* deltatover2_F,
int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F,
- int* OCEANS) {
+ realw* b_deltatover2_F) {
TRACE("kernel_3_b_cuda");
int size_padded,num_blocks_x,num_blocks_y;
@@ -479,11 +496,11 @@
// crust/mantle region
// in case of ocean loads, we still have to update the velocity for crust/mantle region
- if( *OCEANS ){
+ if( mp->oceans ){
size_padded = ((int)ceil(((double)mp->NGLOB_CRUST_MANTLE)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -504,11 +521,11 @@
}
}
- // inner core
+ // inner core region
size_padded = ((int)ceil(((double)mp->NGLOB_INNER_CORE)/((double)blocksize)))*blocksize;
num_blocks_x = size_padded/blocksize;
num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -520,6 +537,7 @@
mp->d_accel_inner_core,
mp->NGLOB_INNER_CORE,
deltatover2,
+ mp->two_omega_earth,
mp->d_rmass_inner_core,
mp->d_rmass_inner_core,
mp->d_rmass_inner_core);
@@ -529,6 +547,7 @@
mp->d_b_accel_inner_core,
mp->NGLOB_INNER_CORE,
b_deltatover2,
+ mp->b_two_omega_earth,
mp->d_rmass_inner_core,
mp->d_rmass_inner_core,
mp->d_rmass_inner_core);
@@ -586,10 +605,11 @@
realw b_deltatover2 = *b_deltatover2_F;
int blocksize = BLOCKSIZE_KERNEL3;
+
int size_padded = ((int)ceil(((double)mp->NGLOB_OUTER_CORE)/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-08-19 01:24:46 UTC (rev 20591)
@@ -154,14 +154,18 @@
// Texture memory usage:
// requires CUDA version >= 4.0, see check below
// Use textures for d_displ and d_accel -- 10% performance boost
-#define USE_TEXTURES_FIELDS
+//daniel: todo benchmark fields
+//#define USE_TEXTURES_FIELDS
+
// Using texture memory for the hprime-style constants is slower on
// Fermi generation hardware, but *may* be faster on Kepler
// generation.
// Use textures for hprime_xx
-#define USE_TEXTURES_CONSTANTS
+//daniel: todo benchmark constants
+//#define USE_TEXTURES_CONSTANTS
+
// CUDA version >= 4.0 needed for cudaTextureType1D and cudaDeviceSynchronize()
#if CUDA_VERSION < 4000
#undef USE_TEXTURES_FIELDS
@@ -187,6 +191,9 @@
#define BLOCKSIZE_KERNEL3 128
#define BLOCKSIZE_TRANSFER 256
+// maximum grid dimension in one direction of GPU
+#define MAXIMUM_GRID_DIM 65535
+
/* ----------------------------------------------------------------------------------------------- */
// indexing
@@ -492,8 +499,8 @@
int npoin_oceans;
// model parameter
+ int* d_ibool_ocean_load;
realw* d_rmass_ocean_load;
- int* d_iglob_ocean_load;
realw* d_normal_ocean_load;
// ------------------------------------------------------------------ //
@@ -551,6 +558,9 @@
int anisotropic_kl;
int approximate_hess_kl;
+ realw deltat;
+ realw b_deltat;
+
// ------------------------------------------------------------------ //
// gravity
// ------------------------------------------------------------------ //
@@ -560,16 +570,20 @@
realw* d_minus_deriv_gravity_table;
realw* d_density_table;
+ realw minus_g_icb;
+ realw minus_g_cmb;
+
+ realw RHO_BOTTOM_OC;
+ realw RHO_TOP_OC;
+
// ------------------------------------------------------------------ //
// rotation
// ------------------------------------------------------------------ //
- realw d_two_omega_earth;
- realw d_deltat;
+ realw two_omega_earth;
realw* d_A_array_rotation; realw* d_B_array_rotation;
// needed for backward/reconstructed fields (kernel runs)
- realw d_b_two_omega_earth;
- realw d_b_deltat;
+ realw b_two_omega_earth;
realw* d_b_A_array_rotation; realw* d_b_B_array_rotation;
// ------------------------------------------------------------------ //
@@ -656,11 +670,9 @@
int* d_ibelm_xmin_outer_core, *d_ibelm_xmax_outer_core;
int* d_ibelm_ymin_outer_core, *d_ibelm_ymax_outer_core;
- int* d_ibelm_zmin_outer_core;
realw* d_jacobian2D_xmin_outer_core, *d_jacobian2D_xmax_outer_core;
realw* d_jacobian2D_ymin_outer_core, *d_jacobian2D_ymax_outer_core;
- realw* d_jacobian2D_zmin_outer_core;
realw* d_absorb_xmin_outer_core, *d_absorb_xmax_outer_core;
realw* d_absorb_ymin_outer_core, *d_absorb_ymax_outer_core;
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -156,7 +156,7 @@
int num_blocks_x = mp->nspec2D_top_crust_mantle;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -254,7 +254,7 @@
int igll = threadIdx.x;
int iface = blockIdx.x + gridDim.x*blockIdx.y; // surface element id
- // when nspec_top > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
+ // when nspec_top > MAXIMUM_GRID_DIM, but mod(nspec_top,2) > 0, we end up with an extra block.
if(iface < nspec_top) {
int ispec = ibelm_top[iface]-1;
@@ -306,7 +306,7 @@
int num_blocks_x = mp->nspec2D_top_crust_mantle;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -94,7 +94,7 @@
PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
int* myrank_f,
int* h_NGLLX,
- realw* h_hprime_xx,realw* h_hprime_yy,realw* h_hprime_zz,
+ realw* h_hprime_xx,
realw* h_hprimewgll_xx,realw* h_hprimewgll_yy,realw* h_hprimewgll_zz,
realw* h_wgllwgll_xy,realw* h_wgllwgll_xz,realw* h_wgllwgll_yz,
int* NSOURCES,int* nsources_local,
@@ -125,7 +125,9 @@
int* SAVE_BOUNDARY_MESH_f,
int* USE_MESH_COLORING_GPU_f,
int* ANISOTROPIC_KL_f,
- int* APPROXIMATE_HESS_KL_f) {
+ int* APPROXIMATE_HESS_KL_f,
+ realw* deltat_f,
+ realw* b_deltat_f) {
TRACE("prepare_constants_device");
@@ -294,6 +296,15 @@
}
+ // for rotation and new attenuation
+ mp->deltat = *deltat_f;
+ if( mp->simulation_type == 3 ){
+ mp->b_deltat = *b_deltat_f;
+ }
+ // initializes for rotational effects
+ mp->two_omega_earth = 0.f;
+ mp->b_two_omega_earth = 0.f;
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("prepare_constants_device");
#endif
@@ -308,12 +319,10 @@
extern "C"
void FC_FUNC_(prepare_fields_rotation_device,
PREPARE_FIELDS_ROTATION_DEVICE)(long* Mesh_pointer_f,
- realw* two_omega_earth,
- realw* deltat,
+ realw* two_omega_earth_f,
realw* A_array_rotation,
realw* B_array_rotation,
- realw* b_two_omega_earth,
- realw* b_deltat,
+ realw* b_two_omega_earth_f,
realw* b_A_array_rotation,
realw* b_B_array_rotation,
int* NSPEC_OUTER_CORE_ROTATION) {
@@ -323,22 +332,26 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
// arrays only needed when rotation is required
- if( ! mp->rotation ){ exit_on_cuda_error("prepare_fields_rotation_device rotation not properly initialized"); }
+ if( ! mp->rotation ){
+ exit_on_cuda_error("prepare_fields_rotation_device: rotation flag not properly initialized");
+ }
+ // checks array size
+ if( *NSPEC_OUTER_CORE_ROTATION != mp->NSPEC_OUTER_CORE){
+ printf("error prepare_fields_rotation_device: rotation array has wrong size: %d instead of %d\n",
+ *NSPEC_OUTER_CORE_ROTATION,mp->NSPEC_OUTER_CORE);
+ exit_on_cuda_error("prepare_fields_rotation_device: rotation array has wrong size");
+ }
// rotation arrays (needed only for outer core region)
- mp->d_two_omega_earth = *two_omega_earth;
- mp->d_deltat = *deltat;
+ mp->two_omega_earth = *two_omega_earth_f;
+ copy_todevice_realw((void**)&mp->d_A_array_rotation,A_array_rotation,NGLL3*mp->NSPEC_OUTER_CORE);
+ copy_todevice_realw((void**)&mp->d_B_array_rotation,B_array_rotation,NGLL3*mp->NSPEC_OUTER_CORE);
- copy_todevice_realw((void**)&mp->d_A_array_rotation,A_array_rotation,NGLL3*(*NSPEC_OUTER_CORE_ROTATION));
- copy_todevice_realw((void**)&mp->d_B_array_rotation,B_array_rotation,NGLL3*(*NSPEC_OUTER_CORE_ROTATION));
-
// backward/reconstructed fields
if( mp->simulation_type == 3 ){
- mp->d_b_two_omega_earth = *b_two_omega_earth;
- mp->d_b_deltat = *b_deltat;
-
- copy_todevice_realw((void**)&mp->d_b_A_array_rotation,b_A_array_rotation,NGLL3*(*NSPEC_OUTER_CORE_ROTATION));
- copy_todevice_realw((void**)&mp->d_b_B_array_rotation,b_B_array_rotation,NGLL3*(*NSPEC_OUTER_CORE_ROTATION));
+ mp->b_two_omega_earth = *b_two_omega_earth_f;
+ copy_todevice_realw((void**)&mp->d_b_A_array_rotation,b_A_array_rotation,NGLL3*mp->NSPEC_OUTER_CORE);
+ copy_todevice_realw((void**)&mp->d_b_B_array_rotation,b_B_array_rotation,NGLL3*mp->NSPEC_OUTER_CORE);
}
}
@@ -358,7 +371,11 @@
realw* minus_deriv_gravity_table,
realw* density_table,
realw* h_wgll_cube,
- int* NRAD_GRAVITY) {
+ int* NRAD_GRAVITY,
+ realw* minus_g_icb,
+ realw* minus_g_cmb,
+ double* RHO_BOTTOM_OC,
+ double* RHO_TOP_OC) {
TRACE("prepare_fields_gravity_device");
@@ -372,6 +389,8 @@
}else{
// gravity case
+ mp->minus_g_icb = *minus_g_icb;
+ mp->minus_g_cmb = *minus_g_cmb;
// sets up gll weights cubed
setConst_wgll_cube(h_wgll_cube,mp);
@@ -382,6 +401,11 @@
copy_todevice_realw((void**)&mp->d_minus_deriv_gravity_table,minus_deriv_gravity_table,(*NRAD_GRAVITY));
copy_todevice_realw((void**)&mp->d_density_table,density_table,(*NRAD_GRAVITY));
}
+
+ // constants
+ mp->RHO_BOTTOM_OC = (realw) *RHO_BOTTOM_OC;
+ mp->RHO_TOP_OC = (realw) *RHO_TOP_OC;
+
}
@@ -600,10 +624,8 @@
int* nkmin_xi_outer_core,int* nkmin_eta_outer_core,
int* ibelm_xmin_outer_core,int* ibelm_xmax_outer_core,
int* ibelm_ymin_outer_core,int* ibelm_ymax_outer_core,
- int* ibelm_bottom_outer_core,
realw* jacobian2D_xmin_outer_core, realw* jacobian2D_xmax_outer_core,
realw* jacobian2D_ymin_outer_core, realw* jacobian2D_ymax_outer_core,
- realw* jacobian2D_bottom_outer_core,
realw* vp_outer_core) {
TRACE("prepare_fields_absorb_device");
@@ -759,9 +781,8 @@
// zmin
if( mp->nspec2D_zmin_outer_core > 0 ){
- copy_todevice_int((void**)&mp->d_ibelm_zmin_outer_core,ibelm_bottom_outer_core,mp->nspec2D_zmin_outer_core);
- copy_todevice_realw((void**)&mp->d_jacobian2D_zmin_outer_core,jacobian2D_bottom_outer_core,
- NGLL2*(mp->nspec2D_zmin_outer_core));
+ // note: ibelm_bottom_outer_core and jacobian2D_bottom_outer_core will be allocated
+ // when preparing the outer core
// boundary buffer
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_zmin_outer_core,
@@ -908,7 +929,6 @@
// initializes kernel values to zero
print_CUDA_error_if_any(cudaMemset(mp->d_Sigma_kl,0,
NGLL3*mp->NSPEC_CRUST_MANTLE*sizeof(realw)),7403);
-
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -927,8 +947,8 @@
void FC_FUNC_(prepare_oceans_device,
PREPARE_OCEANS_DEVICE)(long* Mesh_pointer_f,
int* npoin_oceans,
+ int* h_iglob_ocean_load,
realw* h_rmass_ocean_load_selected,
- int* h_iglob_ocean_load,
realw* h_normal_ocean_load) {
TRACE("prepare_oceans_device");
@@ -941,12 +961,12 @@
// checks for global partitions, each slice must have a top surface with points on it
if( mp->npoin_oceans == 0 ){ exit_on_cuda_error("prepare_oceans_device has zero npoin_oceans"); }
+ // global point indices
+ copy_todevice_int((void**)&mp->d_ibool_ocean_load,h_iglob_ocean_load,mp->npoin_oceans);
+
// mass matrix
copy_todevice_realw((void**)&mp->d_rmass_ocean_load,h_rmass_ocean_load_selected,mp->npoin_oceans);
- // global point indices
- copy_todevice_int((void**)&mp->d_iglob_ocean_load,h_iglob_ocean_load,mp->npoin_oceans);
-
// normals
copy_todevice_realw((void**)&mp->d_normal_ocean_load,h_normal_ocean_load,NDIM*mp->npoin_oceans);
@@ -1396,18 +1416,17 @@
// CMB/ICB coupling
mp->nspec2D_top_outer_core = *NSPEC2D_TOP_OC;
mp->nspec2D_bottom_outer_core = *NSPEC2D_BOTTOM_OC;
+
int size_toc = NGLL2*(mp->nspec2D_top_outer_core);
- int size_boc = NGLL2*(mp->nspec2D_bottom_outer_core);
-
+ copy_todevice_int((void**)&mp->d_ibelm_top_outer_core,h_ibelm_top_outer_core,mp->nspec2D_top_outer_core);
+ copy_todevice_realw((void**)&mp->d_jacobian2D_top_outer_core,h_jacobian2D_top_outer_core,size_toc);
copy_todevice_realw((void**)&mp->d_normal_top_outer_core,h_normal_top_outer_core,NDIM*size_toc);
- copy_todevice_realw((void**)&mp->d_normal_bottom_outer_core,h_normal_bottom_outer_core,NDIM*size_boc);
- copy_todevice_realw((void**)&mp->d_jacobian2D_top_outer_core,h_jacobian2D_top_outer_core,size_toc);
+ int size_boc = NGLL2*(mp->nspec2D_bottom_outer_core);
+ copy_todevice_int((void**)&mp->d_ibelm_bottom_outer_core,h_ibelm_bottom_outer_core,mp->nspec2D_bottom_outer_core);
copy_todevice_realw((void**)&mp->d_jacobian2D_bottom_outer_core,h_jacobian2D_bottom_outer_core,size_boc);
+ copy_todevice_realw((void**)&mp->d_normal_bottom_outer_core,h_normal_bottom_outer_core,NDIM*size_boc);
- copy_todevice_int((void**)&mp->d_ibelm_top_outer_core,h_ibelm_top_outer_core,mp->nspec2D_top_outer_core);
- copy_todevice_int((void**)&mp->d_ibelm_bottom_outer_core,h_ibelm_bottom_outer_core,mp->nspec2D_bottom_outer_core);
-
// wavefield
int size = mp->NGLOB_OUTER_CORE;
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_outer_core),sizeof(realw)*size),4001);
@@ -1887,8 +1906,6 @@
}
}
if( mp->nspec2D_zmin_outer_core > 0 ){
- cudaFree(mp->d_ibelm_zmin_outer_core);
- cudaFree(mp->d_jacobian2D_zmin_outer_core);
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
cudaFree(mp->d_absorb_zmin_outer_core);
}
@@ -2007,6 +2024,7 @@
}
if(mp->approximate_hess_kl){ cudaFree(mp->d_hess_kl_crust_mantle);}
}
+ // mass matrix
if( *NCHUNKS_VAL != 6 && mp->absorbing_conditions){
cudaFree(mp->d_rmassx_crust_mantle);
cudaFree(mp->d_rmassy_crust_mantle);
@@ -2039,12 +2057,11 @@
cudaFree(mp->d_phase_ispec_inner_outer_core);
cudaFree(mp->d_ibelm_top_outer_core);
+ cudaFree(mp->d_jacobian2D_top_outer_core);
+ cudaFree(mp->d_normal_top_outer_core);
+
cudaFree(mp->d_ibelm_bottom_outer_core);
-
- cudaFree(mp->d_normal_top_outer_core);
cudaFree(mp->d_normal_bottom_outer_core);
-
- cudaFree(mp->d_jacobian2D_top_outer_core);
cudaFree(mp->d_jacobian2D_bottom_outer_core);
cudaFree(mp->d_displ_outer_core);
@@ -2057,6 +2074,7 @@
cudaFree(mp->d_rho_kl_outer_core);
cudaFree(mp->d_alpha_kl_outer_core);
}
+ // mass matrix
cudaFree(mp->d_rmass_outer_core);
//------------------------------------------
@@ -2075,6 +2093,7 @@
cudaFree(mp->d_muvstore_inner_core);
cudaFree(mp->d_ibool_inner_core);
+ // gravity
if( mp->gravity ){
cudaFree(mp->d_xstore_inner_core);
cudaFree(mp->d_ystore_inner_core);
@@ -2116,11 +2135,13 @@
cudaFree(mp->d_alpha_kl_inner_core);
cudaFree(mp->d_beta_kl_inner_core);
}
+ // mass matrix
cudaFree(mp->d_rmass_inner_core);
+ // oceans
if( mp->oceans ){
cudaFree(mp->d_rmass_ocean_load);
- cudaFree(mp->d_iglob_ocean_load);
+ cudaFree(mp->d_ibool_ocean_load);
cudaFree(mp->d_normal_ocean_load);
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-08-19 01:24:46 UTC (rev 20591)
@@ -161,16 +161,10 @@
COMPUTE_COUPLING_FLUID_ICB_CUDA)(long* Mesh_pointer_f) {}
void FC_FUNC_(compute_coupling_cmb_fluid_cuda,
- COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f,
- double RHO_TOP_OC,
- realw minus_g_cmb,
- int GRAVITY_VAL) {}
+ COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f) {}
void FC_FUNC_(compute_coupling_icb_fluid_cuda,
- COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f,
- double RHO_BOTTOM_OC,
- realw minus_g_icb,
- int GRAVITY_VAL) {}
+ COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f) {}
void FC_FUNC_(compute_coupling_ocean_cuda,
COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f,
@@ -183,7 +177,6 @@
void FC_FUNC_(compute_forces_crust_mantle_cuda,
COMPUTE_FORCES_CRUST_MANTLE_CUDA)(long* Mesh_pointer_f,
- realw* deltat,
int* iphase) {}
@@ -193,7 +186,6 @@
void FC_FUNC_(compute_forces_inner_core_cuda,
COMPUTE_FORCES_INNER_CORE_CUDA)(long* Mesh_pointer_f,
- realw* deltat,
int* iphase) {}
@@ -203,9 +195,9 @@
void FC_FUNC_(compute_forces_outer_core_cuda,
COMPUTE_FORCES_OUTER_CORE_CUDA)(long* Mesh_pointer_f,
- int* iphase,
- realw* time_f,
- realw* b_time_f) {}
+ int* iphase,
+ realw* time_f,
+ realw* b_time_f) {}
//
@@ -298,15 +290,13 @@
realw* deltatover2_F,
int* SIMULATION_TYPE_f,
realw* b_deltatover2_F,
- int* OCEANS,
int* NCHUNKS_VAL) {}
void FC_FUNC_(kernel_3_b_cuda,
KERNEL_3_B_CUDA)(long* Mesh_pointer,
realw* deltatover2_F,
int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F,
- int* OCEANS) {}
+ realw* b_deltatover2_F) {}
void FC_FUNC_(kernel_3_outer_core_cuda,
KERNEL_3_OUTER_CORE_CUDA)(long* Mesh_pointer,
@@ -352,7 +342,7 @@
PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
int* myrank_f,
int* h_NGLLX,
- realw* h_hprime_xx,realw* h_hprime_yy,realw* h_hprime_zz,
+ realw* h_hprime_xx,
realw* h_hprimewgll_xx,realw* h_hprimewgll_yy,realw* h_hprimewgll_zz,
realw* h_wgllwgll_xy,realw* h_wgllwgll_xz,realw* h_wgllwgll_yz,
int* NSOURCES,int* nsources_local,
@@ -383,16 +373,16 @@
int* SAVE_BOUNDARY_MESH_f,
int* USE_MESH_COLORING_GPU_f,
int* ANISOTROPIC_KL_f,
- int* APPROXIMATE_HESS_KL_f) {}
+ int* APPROXIMATE_HESS_KL_f,
+ realw* deltat_f,
+ realw* b_deltat_f) {}
void FC_FUNC_(prepare_fields_rotation_device,
PREPARE_FIELDS_ROTATION_DEVICE)(long* Mesh_pointer_f,
- realw* two_omega_earth,
- realw* deltat,
+ realw* two_omega_earth_f,
realw* A_array_rotation,
realw* B_array_rotation,
- realw* b_two_omega_earth,
- realw* b_deltat,
+ realw* b_two_omega_earth_f,
realw* b_A_array_rotation,
realw* b_B_array_rotation,
int* NSPEC_OUTER_CORE_ROTATION) {}
@@ -405,7 +395,11 @@
realw* minus_deriv_gravity_table,
realw* density_table,
realw* h_wgll_cube,
- int* NRAD_GRAVITY) {}
+ int* NRAD_GRAVITY,
+ realw* minus_g_icb,
+ realw* minus_g_cmb,
+ double* RHO_BOTTOM_OC,
+ double* RHO_TOP_OC) {}
void FC_FUNC_(prepare_fields_attenuat_device,
PREPARE_FIELDS_ATTENUAT_DEVICE)(long* Mesh_pointer_f,
@@ -478,10 +472,8 @@
int* nkmin_xi_outer_core,int* nkmin_eta_outer_core,
int* ibelm_xmin_outer_core,int* ibelm_xmax_outer_core,
int* ibelm_ymin_outer_core,int* ibelm_ymax_outer_core,
- int* ibelm_bottom_outer_core,
realw* jacobian2D_xmin_outer_core, realw* jacobian2D_xmax_outer_core,
realw* jacobian2D_ymin_outer_core, realw* jacobian2D_ymax_outer_core,
- realw* jacobian2D_bottom_outer_core,
realw* vp_outer_core) {}
void FC_FUNC_(prepare_mpi_buffers_device,
@@ -514,8 +506,8 @@
void FC_FUNC_(prepare_oceans_device,
PREPARE_OCEANS_DEVICE)(long* Mesh_pointer_f,
int* npoin_oceans,
- realw* h_rmass_ocean_load_selected,
int* h_iglob_ocean_load,
+ realw* h_rmass_ocean_load_selected,
realw* h_normal_ocean_load) {}
void FC_FUNC_(prepare_crust_mantle_device,
@@ -766,7 +758,8 @@
//
void FC_FUNC_(write_seismograms_transfer_cuda,
- WRITE_SEISMOGRAMS_TRANSFER_CUDA)(realw* displ,
+ WRITE_SEISMOGRAMS_TRANSFER_CUDA)(long* Mesh_pointer_f,
+ realw* displ,
realw* b_displ,
realw* eps_trace_over_3,
realw* epsilondev_xx,
@@ -774,24 +767,8 @@
realw* epsilondev_xy,
realw* epsilondev_xz,
realw* epsilondev_yz,
- long* Mesh_pointer_f,
int* number_receiver_global,
int* ispec_selected_rec,
int* ispec_selected_source,
int* ibool) {}
-void FC_FUNC_(transfer_station_ac_from_device,
- TRANSFER_STATION_AC_FROM_DEVICE)(
- realw* potential_acoustic,
- realw* potential_dot_acoustic,
- realw* potential_dot_dot_acoustic,
- realw* b_potential_acoustic,
- realw* b_potential_dot_acoustic,
- realw* b_potential_dot_dot_acoustic,
- long* Mesh_pointer_f,
- int* number_receiver_global,
- int* ispec_selected_rec,
- int* ispec_selected_source,
- int* ibool,
- int* SIMULATION_TYPEf) {}
-
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu 2012-08-19 01:24:46 UTC (rev 20591)
@@ -97,13 +97,17 @@
TRACE("write_seismograms_transfer_from_device");
+ int irec_local,irec;
+ int ispec,iglob,i;
+
// checks if anything to do
if( mp->nrec_local == 0 ) return;
int blocksize = NGLL3;
+
int num_blocks_x = mp->nrec_local;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -122,13 +126,12 @@
cudaMemcpy(mp->h_station_seismo_field,mp->d_station_seismo_field,
3*NGLL3*(mp->nrec_local)*sizeof(realw),cudaMemcpyDeviceToHost);
- int irec_local;
for(irec_local = 0 ; irec_local < mp->nrec_local; irec_local++) {
- int irec = number_receiver_global[irec_local] - 1;
- int ispec = h_ispec_selected[irec] - 1;
+ irec = number_receiver_global[irec_local] - 1;
+ ispec = h_ispec_selected[irec] - 1;
- for(int i = 0; i < NGLL3; i++) {
- int iglob = ibool[i+NGLL3*ispec] - 1;
+ for(i = 0; i < NGLL3; i++) {
+ iglob = ibool[i+NGLL3*ispec] - 1;
h_field[0+3*iglob] = mp->h_station_seismo_field[0+3*i+irec_local*NGLL3*3];
h_field[1+3*iglob] = mp->h_station_seismo_field[1+3*i+irec_local*NGLL3*3];
h_field[2+3*iglob] = mp->h_station_seismo_field[2+3*i+irec_local*NGLL3*3];
@@ -151,13 +154,17 @@
TRACE("write_seismograms_transfer_scalar_from_device");
+ int irec_local,irec;
+ int ispec,iglob,i;
+
// checks if anything to do
if( mp->nrec_local == 0 ) return;
int blocksize = NGLL3;
+
int num_blocks_x = mp->nrec_local;
int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
@@ -174,15 +181,13 @@
// copies array to CPU
cudaMemcpy(mp->h_station_seismo_field,mp->d_station_seismo_field,
- 1*NGLL3*(mp->nrec_local)*sizeof(realw),cudaMemcpyDeviceToHost);
+ NGLL3*(mp->nrec_local)*sizeof(realw),cudaMemcpyDeviceToHost);
- int irec_local;
for(irec_local = 0 ; irec_local < mp->nrec_local; irec_local++) {
- int irec = number_receiver_global[irec_local] - 1;
- int ispec = h_ispec_selected[irec] - 1;
-
- for(int i = 0; i < NGLL3; i++) {
- int iglob = ibool[i+NGLL3*ispec] - 1;
+ irec = number_receiver_global[irec_local] - 1;
+ ispec = h_ispec_selected[irec] - 1;
+ for(i = 0; i < NGLL3; i++) {
+ iglob = ibool[i+NGLL3*ispec] - 1;
h_field[iglob] = mp->h_station_seismo_field[i+irec_local*NGLL3];
}
}
@@ -196,7 +201,8 @@
extern "C"
void FC_FUNC_(write_seismograms_transfer_cuda,
- WRITE_SEISMOGRAMS_TRANSFER_CUDA)(realw* displ,
+ WRITE_SEISMOGRAMS_TRANSFER_CUDA)(long* Mesh_pointer_f,
+ realw* displ,
realw* b_displ,
realw* eps_trace_over_3,
realw* epsilondev_xx,
@@ -204,7 +210,6 @@
realw* epsilondev_xy,
realw* epsilondev_xz,
realw* epsilondev_yz,
- long* Mesh_pointer_f,
int* number_receiver_global,
int* ispec_selected_rec,
int* ispec_selected_source,
@@ -219,10 +224,10 @@
switch( mp->simulation_type ){
case 1:
write_seismograms_transfer_from_device(mp,mp->d_displ_crust_mantle,
- displ,
- number_receiver_global,
- mp->d_ispec_selected_rec,
- ispec_selected_rec, ibool);
+ displ,
+ number_receiver_global,
+ mp->d_ispec_selected_rec,
+ ispec_selected_rec, ibool);
break;
case 2:
@@ -271,161 +276,5 @@
ispec_selected_rec, ibool);
break;
}
-
}
-/* ----------------------------------------------------------------------------------------------- */
-
-// ACOUSTIC simulations
-
-/* ----------------------------------------------------------------------------------------------- */
-/*
-__global__ void transfer_stations_fields_acoustic_from_device_kernel(int* number_receiver_global,
- int* ispec_selected_rec,
- int* ibool,
- realw* station_seismo_potential,
- realw* desired_potential) {
-
- int blockID = blockIdx.x + blockIdx.y*gridDim.x;
- int nodeID = threadIdx.x + blockID*blockDim.x;
-
- int irec = number_receiver_global[blockID]-1;
- int ispec = ispec_selected_rec[irec]-1;
- int iglob = ibool[threadIdx.x + NGLL3*ispec]-1;
-
- //if(threadIdx.x == 0 ) printf("node acoustic: %i %i %i %i %i %e \n",blockID,nodeID,irec,ispec,iglob,desired_potential[iglob]);
-
- station_seismo_potential[nodeID] = desired_potential[iglob];
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-void transfer_field_acoustic_from_device(Mesh* mp,
- realw* d_potential,
- realw* h_potential,
- int* number_receiver_global,
- int* d_ispec_selected,
- int* h_ispec_selected,
- int* ibool) {
-
-TRACE("transfer_field_acoustic_from_device");
-
- int irec_local,irec,ispec,iglob,j;
-
- // checks if anything to do
- if( mp->nrec_local == 0 ) return;
-
- // sets up kernel dimensions
- int blocksize = NGLL3;
- int num_blocks_x = mp->nrec_local;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = (int) ceil(num_blocks_x*0.5f);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- // prepare field transfer array on device
- transfer_stations_fields_acoustic_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
- d_ispec_selected,
- mp->d_ibool,
- mp->d_station_seismo_potential,
- d_potential);
-
-
- print_CUDA_error_if_any(cudaMemcpy(mp->h_station_seismo_potential,mp->d_station_seismo_potential,
- mp->nrec_local*NGLL3*sizeof(realw),cudaMemcpyDeviceToHost),500);
-
- //printf("copy local receivers: %i \n",mp->nrec_local);
-
- for(irec_local=0; irec_local < mp->nrec_local; irec_local++) {
- irec = number_receiver_global[irec_local]-1;
- ispec = h_ispec_selected[irec]-1;
-
- // copy element values
- // note: iglob may vary and can be irregularly accessing the h_potential array
- for(j=0; j < NGLL3; j++){
- iglob = ibool[j+NGLL3*ispec]-1;
- h_potential[iglob] = mp->h_station_seismo_potential[j+irec_local*NGLL3];
- }
-
- // copy each station element's points to working array
- // note: this works if iglob values would be all aligned...
- //memcpy(&(h_potential[iglob]),&(mp->h_station_seismo_potential[irec_local*NGLL3]),NGLL3*sizeof(realw));
-
- }
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("transfer_field_acoustic_from_device");
-#endif
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(transfer_station_ac_from_device,
- TRANSFER_STATION_AC_FROM_DEVICE)(
- realw* potential_acoustic,
- realw* potential_dot_acoustic,
- realw* potential_dot_dot_acoustic,
- realw* b_potential_acoustic,
- realw* b_potential_dot_acoustic,
- realw* b_potential_dot_dot_acoustic,
- long* Mesh_pointer_f,
- int* number_receiver_global,
- int* ispec_selected_rec,
- int* ispec_selected_source,
- int* ibool,
- int* SIMULATION_TYPEf) {
-
-TRACE("transfer_station_ac_from_device");
- //double start_time = get_time();
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
- // checks if anything to do
- if( mp->nrec_local == 0 ) return;
-
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
-
- if(SIMULATION_TYPE == 1) {
- transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
- number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
- number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
- number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- }
- else if(SIMULATION_TYPE == 2) {
- transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
- number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
- number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
- number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
- }
- else if(SIMULATION_TYPE == 3) {
- transfer_field_acoustic_from_device(mp,mp->d_b_potential_acoustic,b_potential_acoustic,
- number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
- number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
- number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //double end_time = get_time();
- //printf("Elapsed time: %e\n",end_time-start_time);
- exit_on_cuda_error("transfer_station_ac_from_device");
-#endif
-}
-*/
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90 2012-08-19 01:24:46 UTC (rev 20591)
@@ -162,9 +162,15 @@
do j = 1,NGLLY
do i = 1,NGLLX
- ! if 3D Earth with topography, compute local height of oceans
+ ! note: old version (5.1.4)
+ ! only for models where 3D crustal stretching was used (even without topography?)
+ !if( CASE_3D ) then
+
+ ! note: new version:
+ ! for 3D Earth with topography, compute local height of oceans
if( TOPOGRAPHY ) then
+
! get coordinates of current point
xval = xstore(i,j,k,ispec)
yval = ystore(i,j,k,ispec)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90 2012-08-19 01:24:46 UTC (rev 20591)
@@ -85,18 +85,12 @@
do iinterface = 1, num_interfaces
! non-blocking synchronous send request
call isend_cr(buffer_send_scalar(1:nibool_interfaces(iinterface),iinterface), &
- nibool_interfaces(iinterface), &
- my_neighbours(iinterface), &
- itag, &
- request_send_scalar(iinterface) &
- )
+ nibool_interfaces(iinterface),my_neighbours(iinterface), &
+ itag,request_send_scalar(iinterface) )
! receive request
call irecv_cr(buffer_recv_scalar(1:nibool_interfaces(iinterface),iinterface), &
- nibool_interfaces(iinterface), &
- my_neighbours(iinterface), &
- itag, &
- request_recv_scalar(iinterface) &
- )
+ nibool_interfaces(iinterface),my_neighbours(iinterface), &
+ itag,request_recv_scalar(iinterface) )
enddo
! wait for communications completion (recv)
@@ -177,18 +171,12 @@
do iinterface = 1, num_interfaces
! non-blocking synchronous send request
call isend_cr(buffer_send_scalar(1:nibool_interfaces(iinterface),iinterface), &
- nibool_interfaces(iinterface), &
- my_neighbours(iinterface), &
- itag, &
- request_send_scalar(iinterface) &
- )
+ nibool_interfaces(iinterface),my_neighbours(iinterface), &
+ itag,request_send_scalar(iinterface))
! receive request
call irecv_cr(buffer_recv_scalar(1:nibool_interfaces(iinterface),iinterface), &
- nibool_interfaces(iinterface), &
- my_neighbours(iinterface), &
- itag, &
- request_recv_scalar(iinterface) &
- )
+ nibool_interfaces(iinterface),my_neighbours(iinterface), &
+ itag,request_recv_scalar(iinterface))
enddo
@@ -303,18 +291,12 @@
do iinterface = 1, num_interfaces
! non-blocking synchronous send request
call isend_cr(buffer_send_scalar(1:nibool_interfaces(iinterface),iinterface), &
- nibool_interfaces(iinterface), &
- my_neighbours(iinterface), &
- itag, &
- request_send_scalar(iinterface) &
- )
+ nibool_interfaces(iinterface),my_neighbours(iinterface), &
+ itag,request_send_scalar(iinterface))
! receive request
call irecv_cr(buffer_recv_scalar(1:nibool_interfaces(iinterface),iinterface), &
- nibool_interfaces(iinterface), &
- my_neighbours(iinterface), &
- itag, &
- request_recv_scalar(iinterface) &
- )
+ nibool_interfaces(iinterface),my_neighbours(iinterface), &
+ itag,request_recv_scalar(iinterface) )
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90 2012-08-19 01:24:46 UTC (rev 20591)
@@ -26,11 +26,11 @@
!=====================================================================
subroutine compute_coupling_fluid_CMB(displ_crust_mantle,b_displ_crust_mantle, &
- ibool_crust_mantle,ibelm_bottom_crust_mantle, &
- accel_outer_core,b_accel_outer_core, &
- normal_top_outer_core,jacobian2D_top_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
- SIMULATION_TYPE,nspec2D_top)
+ ibool_crust_mantle,ibelm_bottom_crust_mantle, &
+ accel_outer_core,b_accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+ SIMULATION_TYPE,nspec2D_top)
implicit none
@@ -289,14 +289,15 @@
! get global point number
! corresponding points are located at the top of the outer core
- iglob_oc = ibool_outer_core(i,j,NGLLZ,ispec_selected)
+ iglob_oc = ibool_outer_core(i,j,k_corresp,ispec_selected)
iglob_mantle = ibool_crust_mantle(i,j,k,ispec)
! compute pressure, taking gravity into account
if(GRAVITY_VAL) then
pressure = RHO_TOP_OC * (- accel_outer_core(iglob_oc) &
+ minus_g_cmb *(displ_crust_mantle(1,iglob_mantle)*nx &
- + displ_crust_mantle(2,iglob_mantle)*ny + displ_crust_mantle(3,iglob_mantle)*nz))
+ + displ_crust_mantle(2,iglob_mantle)*ny &
+ + displ_crust_mantle(3,iglob_mantle)*nz))
else
pressure = - RHO_TOP_OC * accel_outer_core(iglob_oc)
endif
@@ -401,7 +402,7 @@
if(GRAVITY_VAL) then
pressure = RHO_BOTTOM_OC * (- accel_outer_core(iglob) &
+ minus_g_icb *(displ_inner_core(1,iglob_inner_core)*nx &
- + displ_inner_core(2,iglob_inner_core)*ny + displ_inner_core(3,iglob_inner_core)*nz))
+ + displ_inner_core(2,iglob_inner_core)*ny + displ_inner_core(3,iglob_inner_core)*nz))
else
pressure = - RHO_BOTTOM_OC * accel_outer_core(iglob)
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 2012-08-19 01:24:46 UTC (rev 20591)
@@ -144,7 +144,7 @@
! crust/mantle region
call compute_forces_crust_mantle_Dev( NSPEC_CRUST_MANTLE_ADJOINT,NGLOB_CRUST_MANTLE_ADJOINT, &
NSPEC_CRUST_MANTLE_STR_AND_ATT, &
- deltat, &
+ b_deltat, &
b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
phase_is_inner, &
b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
@@ -159,7 +159,7 @@
! inner core region
call compute_forces_inner_core_Dev( NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
NSPEC_INNER_CORE_STR_AND_ATT, &
- deltat, &
+ b_deltat, &
b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
phase_is_inner, &
b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
@@ -177,7 +177,7 @@
! crust/mantle region
call compute_forces_crust_mantle( NSPEC_CRUST_MANTLE_ADJOINT,NGLOB_CRUST_MANTLE_ADJOINT, &
NSPEC_CRUST_MANTLE_STR_AND_ATT, &
- deltat, &
+ b_deltat, &
b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
phase_is_inner, &
b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
@@ -193,7 +193,7 @@
! inner core region
call compute_forces_inner_core( NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
NSPEC_INNER_CORE_STR_AND_ATT, &
- deltat, &
+ b_deltat, &
b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
phase_is_inner, &
b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
@@ -212,9 +212,9 @@
! on GPU
! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
! for crust/mantle
- call compute_forces_crust_mantle_cuda(Mesh_pointer,deltat,iphase)
+ call compute_forces_crust_mantle_cuda(Mesh_pointer,iphase)
! for inner core
- call compute_forces_inner_core_cuda(Mesh_pointer,deltat,iphase)
+ call compute_forces_inner_core_cuda(Mesh_pointer,iphase)
endif ! GPU_MODE
@@ -280,27 +280,27 @@
!--- couple with outer core at the bottom of the mantle
!---
if(ACTUALLY_COUPLE_FLUID_CMB) &
- call compute_coupling_CMB_fluid(displ_crust_mantle,b_displ_crust_mantle, &
- accel_crust_mantle,b_accel_crust_mantle, &
- ibool_crust_mantle,ibelm_bottom_crust_mantle, &
- accel_outer_core,b_accel_outer_core, &
- normal_top_outer_core,jacobian2D_top_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
- RHO_TOP_OC,minus_g_cmb, &
- SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
+ call compute_coupling_CMB_fluid(displ_crust_mantle,b_displ_crust_mantle, &
+ accel_crust_mantle,b_accel_crust_mantle, &
+ ibool_crust_mantle,ibelm_bottom_crust_mantle, &
+ accel_outer_core,b_accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+ RHO_TOP_OC,minus_g_cmb, &
+ SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
!---
!--- couple with outer core at the top of the inner core
!---
if(ACTUALLY_COUPLE_FLUID_ICB) &
- call compute_coupling_ICB_fluid(displ_inner_core,b_displ_inner_core, &
- accel_inner_core,b_accel_inner_core, &
- ibool_inner_core,ibelm_top_inner_core, &
- accel_outer_core,b_accel_outer_core, &
- normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
- RHO_BOTTOM_OC,minus_g_icb, &
- SIMULATION_TYPE,NSPEC2D_TOP(IREGION_INNER_CORE))
+ call compute_coupling_ICB_fluid(displ_inner_core,b_displ_inner_core, &
+ accel_inner_core,b_accel_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core, &
+ accel_outer_core,b_accel_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+ RHO_BOTTOM_OC,minus_g_icb, &
+ SIMULATION_TYPE,NSPEC2D_TOP(IREGION_INNER_CORE))
else
! on GPU
@@ -308,14 +308,12 @@
!--- couple with outer core at the bottom of the mantle
!---
if( ACTUALLY_COUPLE_FLUID_CMB ) &
- call compute_coupling_cmb_fluid_cuda(Mesh_pointer, &
- RHO_TOP_OC,minus_g_cmb,GRAVITY_VAL)
+ call compute_coupling_cmb_fluid_cuda(Mesh_pointer)
!---
!--- couple with outer core at the top of the inner core
!---
if( ACTUALLY_COUPLE_FLUID_ICB ) &
- call compute_coupling_icb_fluid_cuda(Mesh_pointer, &
- RHO_BOTTOM_OC,minus_g_icb,GRAVITY_VAL)
+ call compute_coupling_icb_fluid_cuda(Mesh_pointer)
endif
endif ! iphase == 1
@@ -491,7 +489,7 @@
enddo ! iphase
- if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
+ if( NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS ) then
NGLOB_XY = NGLOB_CRUST_MANTLE
else
NGLOB_XY = 1
@@ -500,25 +498,25 @@
! updates (only) acceleration w/ rotation in the crust/mantle region (touches oceans)
if(.NOT. GPU_MODE) then
! on CPU
- call compute_forces_el_update_accel(NGLOB_CRUST_MANTLE,NGLOB_XY,veloc_crust_mantle,accel_crust_mantle,two_omega_earth, &
+ call compute_forces_el_update_accel(NGLOB_CRUST_MANTLE,NGLOB_XY,veloc_crust_mantle,accel_crust_mantle, &
+ two_omega_earth, &
rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle, &
NCHUNKS_VAL,ABSORBING_CONDITIONS)
-
! adjoint / kernel runs
if (SIMULATION_TYPE == 3) &
call compute_forces_el_update_accel(NGLOB_CRUST_MANTLE_ADJOINT,NGLOB_XY,b_veloc_crust_mantle,b_accel_crust_mantle, &
- b_two_omega_earth,rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle, &
+ b_two_omega_earth, &
+ rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle, &
NCHUNKS_VAL,ABSORBING_CONDITIONS)
-
else
! on GPU
call kernel_3_a_cuda(Mesh_pointer, &
- deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS_VAL,NCHUNKS_VAL)
+ deltatover2,SIMULATION_TYPE,b_deltatover2,NCHUNKS_VAL)
endif
! couples ocean with crust mantle
! (updates acceleration with ocean load approximation)
- if(OCEANS_VAL) then
+ if( OCEANS_VAL ) then
if(.NOT. GPU_MODE) then
! on CPU
call compute_coupling_ocean(accel_crust_mantle,b_accel_crust_mantle, &
@@ -551,7 +549,7 @@
else
! on GPU
call kernel_3_b_cuda(Mesh_pointer, &
- deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS_VAL)
+ deltatover2,SIMULATION_TYPE,b_deltatover2)
endif
end subroutine compute_forces_elastic
@@ -559,7 +557,8 @@
!=====================================================================
- subroutine compute_forces_el_update_accel(NGLOB,NGLOB_XY,veloc_crust_mantle,accel_crust_mantle,two_omega_earth, &
+ subroutine compute_forces_el_update_accel(NGLOB,NGLOB_XY,veloc_crust_mantle,accel_crust_mantle, &
+ two_omega_earth, &
rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle, &
NCHUNKS_VAL,ABSORBING_CONDITIONS)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90 2012-08-19 01:24:46 UTC (rev 20591)
@@ -73,20 +73,21 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: newtempx1,newtempx2,newtempx3
! for gravity
- integer int_radius
- double precision radius,theta,phi,gxl,gyl,gzl
- double precision cos_theta,sin_theta,cos_phi,sin_phi
+ integer :: int_radius
+ double precision :: radius,theta,phi,gxl,gyl,gzl
+ double precision :: cos_theta,sin_theta,cos_phi,sin_phi
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: gravity_term
! for the Euler scheme for rotation
- real(kind=CUSTOM_REAL) two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rotation,B_rotation, &
+ real(kind=CUSTOM_REAL) :: two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rotation,B_rotation, &
ux_rotation,uy_rotation,dpotentialdx_with_rot,dpotentialdy_with_rot
+
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: source_euler_A,source_euler_B
- integer ispec,iglob
- integer i,j,k
- real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
- real(kind=CUSTOM_REAL) dpotentialdxl,dpotentialdyl,dpotentialdzl
- real(kind=CUSTOM_REAL) sum_terms
+ integer :: ispec,iglob
+ integer :: i,j,k
+ real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+ real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
+ real(kind=CUSTOM_REAL) :: sum_terms
! manually inline the calls to the Deville et al. (2002) routines
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc
@@ -253,9 +254,6 @@
source_euler_B(i,j,k) = two_omega_deltat &
* (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl)
- A_rotation = A_array_rotation(i,j,k,ispec)
- B_rotation = B_array_rotation(i,j,k,ispec)
-
ux_rotation = A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t
uy_rotation = - A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-08-19 01:24:46 UTC (rev 20591)
@@ -1149,12 +1149,14 @@
! prepares general fields on GPU
call prepare_constants_device(Mesh_pointer,myrank,NGLLX, &
- hprime_xx, hprime_yy, hprime_zz, &
+ hprime_xx, &
hprimewgll_xx, hprimewgll_yy, hprimewgll_zz, &
wgllwgll_xy, wgllwgll_xz, wgllwgll_yz, &
NSOURCES, nsources_local, &
- sourcearrays,islice_selected_source,ispec_selected_source, &
- number_receiver_global,islice_selected_rec,ispec_selected_rec, &
+ sourcearrays, &
+ islice_selected_source,ispec_selected_source, &
+ number_receiver_global, &
+ islice_selected_rec,ispec_selected_rec, &
nrec, nrec_local, nadj_rec_local, &
NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
NSPEC_CRUST_MANTLE_STRAIN_ONLY, &
@@ -1163,25 +1165,28 @@
NSPEC_INNER_CORE_STRAIN_ONLY, &
SIMULATION_TYPE,NOISE_TOMOGRAPHY, &
SAVE_FORWARD,ABSORBING_CONDITIONS, &
- OCEANS_VAL,GRAVITY_VAL,ROTATION_VAL, &
+ OCEANS_VAL, &
+ GRAVITY_VAL, &
+ ROTATION_VAL, &
ATTENUATION_VAL,ATTENUATION_NEW_VAL, &
USE_ATTENUATION_MIMIC,ATTENUATION_3D_VAL, &
COMPUTE_AND_STORE_STRAIN, &
ANISOTROPIC_3D_MANTLE_VAL,ANISOTROPIC_INNER_CORE_VAL, &
SAVE_BOUNDARY_MESH, &
USE_MESH_COLORING_GPU, &
- ANISOTROPIC_KL,APPROXIMATE_HESS_KL)
+ ANISOTROPIC_KL,APPROXIMATE_HESS_KL, &
+ deltat,b_deltat)
! prepares rotation arrays
if( ROTATION_VAL ) then
if(myrank == 0 ) write(IMAIN,*) " loading rotation arrays"
call prepare_fields_rotation_device(Mesh_pointer, &
- two_omega_earth,deltat, &
- A_array_rotation,B_array_rotation, &
- b_two_omega_earth,b_deltat, &
- b_A_array_rotation,b_B_array_rotation, &
- NSPEC_OUTER_CORE_ROTATION)
+ two_omega_earth, &
+ A_array_rotation,B_array_rotation, &
+ b_two_omega_earth, &
+ b_A_array_rotation,b_B_array_rotation, &
+ NSPEC_OUTER_CORE_ROTATION)
endif
! prepares arrays related to gravity
@@ -1197,18 +1202,30 @@
cr_density_table(NRAD_GRAVITY), &
stat=ier)
if( ier /= 0 ) stop 'error allocating cr_minus_rho_g_over_kappa_fluid, etc...'
- ! d_ln_density_dr_table needed for no gravity case
- cr_d_ln_density_dr_table(:) = d_ln_density_dr_table(:)
- ! these are needed for gravity cases only
- cr_minus_rho_g_over_kappa_fluid(:) = minus_rho_g_over_kappa_fluid(:)
- cr_minus_gravity_table(:) = minus_gravity_table(:)
- cr_minus_deriv_gravity_table(:) = minus_deriv_gravity_table(:)
- cr_density_table(:) = density_table(:)
allocate(cr_wgll_cube(NGLLX,NGLLY,NGLLZ),stat=ier)
if( ier /= 0 ) stop 'error allocating cr_wgll_cube'
- cr_wgll_cube(:,:,:) = wgll_cube(:,:,:)
+ if(CUSTOM_REAL == SIZE_REAL) then
+ ! d_ln_density_dr_table needed for no gravity case
+ cr_d_ln_density_dr_table(:) = sngl(d_ln_density_dr_table(:))
+ ! these are needed for gravity cases only
+ cr_minus_rho_g_over_kappa_fluid(:) = sngl(minus_rho_g_over_kappa_fluid(:))
+ cr_minus_gravity_table(:) = sngl(minus_gravity_table(:))
+ cr_minus_deriv_gravity_table(:) = sngl(minus_deriv_gravity_table(:))
+ cr_density_table(:) = sngl(density_table(:))
+ cr_wgll_cube(:,:,:) = sngl(wgll_cube(:,:,:))
+ else
+ ! d_ln_density_dr_table needed for no gravity case
+ cr_d_ln_density_dr_table(:) = d_ln_density_dr_table(:)
+ ! these are needed for gravity cases only
+ cr_minus_rho_g_over_kappa_fluid(:) = minus_rho_g_over_kappa_fluid(:)
+ cr_minus_gravity_table(:) = minus_gravity_table(:)
+ cr_minus_deriv_gravity_table(:) = minus_deriv_gravity_table(:)
+ cr_density_table(:) = density_table(:)
+ cr_wgll_cube(:,:,:) = wgll_cube(:,:,:)
+ endif
+
! prepares on GPU
call prepare_fields_gravity_device(Mesh_pointer, &
cr_d_ln_density_dr_table, &
@@ -1217,7 +1234,10 @@
cr_minus_deriv_gravity_table, &
cr_density_table, &
cr_wgll_cube, &
- NRAD_GRAVITY)
+ NRAD_GRAVITY, &
+ minus_g_icb,minus_g_cmb, &
+ RHO_BOTTOM_OC,RHO_TOP_OC)
+
deallocate(cr_d_ln_density_dr_table,cr_minus_rho_g_over_kappa_fluid, &
cr_minus_gravity_table,cr_minus_deriv_gravity_table, &
cr_density_table)
@@ -1287,10 +1307,8 @@
nkmin_xi_outer_core,nkmin_eta_outer_core, &
ibelm_xmin_outer_core,ibelm_xmax_outer_core, &
ibelm_ymin_outer_core,ibelm_ymax_outer_core, &
- ibelm_bottom_outer_core, &
jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core, &
jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core, &
- jacobian2D_bottom_outer_core, &
vp_outer_core)
endif
@@ -1350,7 +1368,7 @@
! allocates arrays with all global points on ocean surface
npoin_oceans = ipoin
- allocate(iglob_ocean_load(npoin_oceans), &
+ allocate(ibool_ocean_load(npoin_oceans), &
normal_ocean_load(NDIM,npoin_oceans), &
rmass_ocean_load_selected(npoin_oceans), &
stat=ier)
@@ -1369,7 +1387,7 @@
if(.not. updated_dof_ocean_load(iglob)) then
ipoin = ipoin + 1
! fills arrays
- iglob_ocean_load(ipoin) = iglob
+ ibool_ocean_load(ipoin) = iglob
rmass_ocean_load_selected(ipoin) = rmass_ocean_load(iglob)
normal_ocean_load(:,ipoin) = normal_top_crust_mantle(:,i,j,ispec2D)
! masks this global point
@@ -1381,10 +1399,12 @@
! prepares arrays on GPU
call prepare_oceans_device(Mesh_pointer,npoin_oceans, &
- rmass_ocean_load_selected,iglob_ocean_load,normal_ocean_load)
+ ibool_ocean_load, &
+ rmass_ocean_load_selected, &
+ normal_ocean_load)
! frees memory
- deallocate(iglob_ocean_load,rmass_ocean_load_selected,normal_ocean_load)
+ deallocate(ibool_ocean_load,rmass_ocean_load_selected,normal_ocean_load)
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2012-08-19 01:24:46 UTC (rev 20591)
@@ -92,7 +92,7 @@
logical, dimension(NGLOB_CRUST_MANTLE_OCEANS) :: updated_dof_ocean_load
integer :: npoin_oceans
- integer, dimension(:),allocatable :: iglob_ocean_load
+ integer, dimension(:),allocatable :: ibool_ocean_load
real(kind=CUSTOM_REAL), dimension(:,:),allocatable :: normal_ocean_load
real(kind=CUSTOM_REAL), dimension(:),allocatable :: rmass_ocean_load_selected
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90 2012-08-18 17:02:11 UTC (rev 20590)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90 2012-08-19 01:24:46 UTC (rev 20591)
@@ -44,13 +44,14 @@
! gets resulting array values onto CPU
if( GPU_MODE ) then
! this transfers fields only in elements with stations for efficiency
- call write_seismograms_transfer_cuda(displ_crust_mantle,b_displ_crust_mantle, &
- eps_trace_over_3_crust_mantle, &
- epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
- epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
- Mesh_pointer,number_receiver_global, &
- ispec_selected_rec,ispec_selected_source, &
- ibool_crust_mantle)
+ call write_seismograms_transfer_cuda(Mesh_pointer, &
+ displ_crust_mantle,b_displ_crust_mantle, &
+ eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ number_receiver_global, &
+ ispec_selected_rec,ispec_selected_source, &
+ ibool_crust_mantle)
endif
! computes traces at interpolated receiver locations
More information about the CIG-COMMITS
mailing list