[cig-commits] r20674 - in seismo/3D/SPECFEM3D/trunk: src/cuda src/generate_databases src/shared src/specfem3D utils
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Sun Sep 2 20:54:59 PDT 2012
Author: danielpeter
Date: 2012-09-02 20:54:59 -0700 (Sun, 02 Sep 2012)
New Revision: 20674
Modified:
seismo/3D/SPECFEM3D/trunk/src/cuda/check_fields_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/compute_add_sources_acoustic_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/compute_coupling_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_acoustic_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_elastic_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/compute_stacey_acoustic_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/compute_stacey_elastic_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/it_update_displacement_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D/trunk/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D/trunk/src/cuda/write_seismograms_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_mass_matrices.f90
seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_regions_mesh.f90
seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases.f90
seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases_par.f90
seismo/3D/SPECFEM3D/trunk/src/generate_databases/get_model.f90
seismo/3D/SPECFEM3D/trunk/src/generate_databases/memory_eval.f90
seismo/3D/SPECFEM3D/trunk/src/generate_databases/model_1d_socal.f90
seismo/3D/SPECFEM3D/trunk/src/generate_databases/model_ipati.f90
seismo/3D/SPECFEM3D/trunk/src/generate_databases/save_arrays_solver.f90
seismo/3D/SPECFEM3D/trunk/src/shared/constants.h.in
seismo/3D/SPECFEM3D/trunk/src/shared/get_shape2D.f90
seismo/3D/SPECFEM3D/trunk/src/shared/hex_nodes.f90
seismo/3D/SPECFEM3D/trunk/src/shared/read_parameter_file.f90
seismo/3D/SPECFEM3D/trunk/src/shared/recompute_jacobian.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/PML_init.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_acoustic.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_elastic.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_poroelastic.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_coupling_elastic_ac.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_acoustic.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_elastic.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/create_color_image.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/finalize_simulation.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/initialize_simulation.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/model_update.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/read_mesh_databases.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/save_external_bin_m_up.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/write_output_SU.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/write_seismograms.f90
seismo/3D/SPECFEM3D/trunk/utils/update_headers_change_word_f90.pl
Log:
adds improved stacey conditions for acoustic and elastic domains; updates seismic unix output; adds ipati_water model option
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/check_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/check_fields_cuda.cu 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/check_fields_cuda.cu 2012-09-03 03:54:59 UTC (rev 20674)
@@ -752,8 +752,7 @@
extern "C"
void FC_FUNC_(get_norm_acoustic_from_device,
GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
- long* Mesh_pointer_f,
- int* SIMULATION_TYPE) {
+ long* Mesh_pointer_f) {
TRACE("get_norm_acoustic_from_device");
//double start_time = get_time();
@@ -816,9 +815,9 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- if(*SIMULATION_TYPE == 1 ){
+ if(mp->simulation_type == 1 ){
get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,size,d_max);
- }else if(*SIMULATION_TYPE == 3 ){
+ }else if(mp->simulation_type == 3 ){
get_maximum_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,size,d_max);
}
@@ -925,8 +924,7 @@
extern "C"
void FC_FUNC_(get_norm_elastic_from_device,
GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
- long* Mesh_pointer_f,
- int* SIMULATION_TYPE) {
+ long* Mesh_pointer_f) {
TRACE("get_norm_elastic_from_device");
//double start_time = get_time();
@@ -957,9 +955,9 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- if(*SIMULATION_TYPE == 1 ){
+ if(mp->simulation_type == 1 ){
get_maximum_vector_kernel<<<grid,threads>>>(mp->d_displ,size,d_max);
- }else if(*SIMULATION_TYPE == 3 ){
+ }else if(mp->simulation_type == 3 ){
get_maximum_vector_kernel<<<grid,threads>>>(mp->d_b_displ,size,d_max);
}
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/compute_add_sources_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/compute_add_sources_acoustic_cuda.cu 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/compute_add_sources_acoustic_cuda.cu 2012-09-03 03:54:59 UTC (rev 20674)
@@ -99,7 +99,6 @@
COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* NSOURCESf,
- int* SIMULATION_TYPEf,
double* h_stf_pre_compute,
int* myrankf) {
@@ -153,7 +152,6 @@
COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* NSOURCESf,
- int* SIMULATION_TYPEf,
double* h_stf_pre_compute,
int* myrankf) {
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/compute_coupling_cuda.cu 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/compute_coupling_cuda.cu 2012-09-03 03:54:59 UTC (rev 20674)
@@ -117,15 +117,13 @@
void FC_FUNC_(compute_coupling_ac_el_cuda,
COMPUTE_COUPLING_AC_EL_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
- int* num_coupling_ac_el_facesf,
- int* SIMULATION_TYPEf) {
+ int* num_coupling_ac_el_facesf) {
TRACE("compute_coupling_ac_el_cuda");
//double start_time = get_time();
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
int phase_is_inner = *phase_is_innerf;
int num_coupling_ac_el_faces = *num_coupling_ac_el_facesf;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
// way 1: exact blocksize to match NGLLSQUARE
int blocksize = NGLL2;
@@ -152,7 +150,7 @@
phase_is_inner);
// adjoint simulations
- if (SIMULATION_TYPE == 3 ){
+ if (mp->simulation_type == 3 ){
compute_coupling_acoustic_el_kernel<<<grid,threads>>>(mp->d_b_displ,
mp->d_b_potential_dot_dot_acoustic,
num_coupling_ac_el_faces,
@@ -279,15 +277,13 @@
void FC_FUNC_(compute_coupling_el_ac_cuda,
COMPUTE_COUPLING_EL_AC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
- int* num_coupling_ac_el_facesf,
- int* SIMULATION_TYPEf) {
+ int* num_coupling_ac_el_facesf) {
TRACE("compute_coupling_el_ac_cuda");
//double start_time = get_time();
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
int phase_is_inner = *phase_is_innerf;
int num_coupling_ac_el_faces = *num_coupling_ac_el_facesf;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
// way 1: exact blocksize to match NGLLSQUARE
int blocksize = 25;
@@ -319,7 +315,7 @@
mp->d_displ);
// adjoint simulations
- if (SIMULATION_TYPE == 3 ){
+ if (mp->simulation_type == 3 ){
compute_coupling_elastic_ac_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
mp->d_b_accel,
num_coupling_ac_el_faces,
@@ -343,3 +339,136 @@
exit_on_cuda_error("compute_coupling_el_ac_cuda");
#endif
}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/* OCEANS load on free surface */
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void compute_coupling_ocean_cuda_kernel(realw* accel,
+ realw* rmassx,realw* rmassy,realw* rmassz,
+ realw* rmass_ocean_load,
+ int num_free_surface_faces,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ realw* free_surface_normal,
+ int* ibool,
+ int* updated_dof_ocean_load) {
+ // gets spectral element face id
+ int igll = threadIdx.x ; // threadIdx.y*blockDim.x will be always = 0 for thread block (25,1,1)
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+ realw nx,ny,nz;
+ realw force_normal_comp;
+
+ // for all faces on free surface
+ if( iface < num_free_surface_faces ){
+
+ int ispec = free_surface_ispec[iface]-1;
+
+ // gets global point index
+ int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface)
+ int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
+ int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
+
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
+
+ //if(igll == 0 ) printf("igll %d %d %d %d\n",igll,i,j,k,iglob);
+
+ // only update this global point once
+
+ // daniel: TODO - there might be better ways to implement a mutex like below,
+ // and find a workaround to not use the temporary update array.
+ // atomicExch: returns the old value, i.e. 0 indicates that we still have to do this point
+
+ if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){
+
+ // get normal
+ nx = free_surface_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; //(1,igll,iface)
+ ny = free_surface_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
+ nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
+
+ // make updated component of right-hand side
+ // we divide by rmass() which is 1 / M
+ // we use the total force which includes the Coriolis term above
+ force_normal_comp = accel[iglob*3]*nx / rmassx[iglob]
+ + accel[iglob*3+1]*ny / rmassy[iglob]
+ + accel[iglob*3+2]*nz / rmassz[iglob];
+
+ // probably wouldn't need atomicAdd anymore, but just to be sure...
+ atomicAdd(&accel[iglob*3], + (rmass_ocean_load[iglob] - rmassx[iglob]) * force_normal_comp * nx);
+ atomicAdd(&accel[iglob*3+1], + (rmass_ocean_load[iglob] - rmassy[iglob]) * force_normal_comp * ny);
+ atomicAdd(&accel[iglob*3+2], + (rmass_ocean_load[iglob] - rmassz[iglob]) * force_normal_comp * nz);
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_coupling_ocean_cuda,
+ COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f) {
+
+ TRACE("compute_coupling_ocean_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // checks if anything to do
+ if( mp->num_free_surface_faces == 0 ) return;
+
+ // block sizes: exact blocksize to match NGLLSQUARE
+ int blocksize = NGLL2;
+
+ int num_blocks_x = mp->num_free_surface_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+
+ // initializes temporary array to zero
+ print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
+ sizeof(int)*mp->NGLOB_AB),88501);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("before kernel compute_coupling_ocean_cuda");
+#endif
+
+ compute_coupling_ocean_cuda_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_accel,
+ mp->d_rmassx,mp->d_rmassy,mp->d_rmassz,
+ mp->d_rmass_ocean_load,
+ mp->num_free_surface_faces,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->d_free_surface_normal,
+ mp->d_ibool,
+ mp->d_updated_dof_ocean_load);
+ // for backward/reconstructed potentials
+ if(mp->simulation_type == 3) {
+ // re-initializes array
+ print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
+ sizeof(int)*mp->NGLOB_AB),88502);
+
+ compute_coupling_ocean_cuda_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_b_accel,
+ mp->d_rmassx,mp->d_rmassy,mp->d_rmassz,
+ mp->d_rmass_ocean_load,
+ mp->num_free_surface_faces,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->d_free_surface_normal,
+ mp->d_ibool,
+ mp->d_updated_dof_ocean_load);
+
+ }
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_coupling_ocean_cuda");
+#endif
+}
+
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_acoustic_cuda.cu 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_acoustic_cuda.cu 2012-09-03 03:54:59 UTC (rev 20674)
@@ -538,7 +538,6 @@
/* ----------------------------------------------------------------------------------------------- */
void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
- int SIMULATION_TYPE,
int* d_ibool,
realw* d_xix,
realw* d_xiy,
@@ -599,7 +598,7 @@
d_kappastore,
mp->d_wgll_cube);
- if(SIMULATION_TYPE == 3) {
+ if(mp->simulation_type == 3) {
Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
mp->NGLOB_AB,
d_ibool,
@@ -647,8 +646,7 @@
COMPUTE_FORCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
int* iphase,
int* nspec_outer_acoustic,
- int* nspec_inner_acoustic,
- int* SIMULATION_TYPE) {
+ int* nspec_inner_acoustic) {
TRACE("compute_forces_acoustic_cuda");
//double start_time = get_time();
@@ -700,7 +698,6 @@
nb_blocks_to_compute = mp->h_num_elem_colors_acoustic[icolor];
Kernel_2_acoustic(nb_blocks_to_compute,mp,*iphase,
- *SIMULATION_TYPE,
mp->d_ibool + color_offset_nonpadded,
mp->d_xix + color_offset,
mp->d_xiy + color_offset,
@@ -724,7 +721,6 @@
// no mesh coloring: uses atomic updates
Kernel_2_acoustic(num_elements, mp, *iphase,
- *SIMULATION_TYPE,
mp->d_ibool,
mp->d_xix,
mp->d_xiy,
@@ -741,133 +737,10 @@
}
}
-/* ----------------------------------------------------------------------------------------------- */
-/* KERNEL 3 */
/* ----------------------------------------------------------------------------------------------- */
-
-__global__ void kernel_3_a_acoustic_cuda_device(realw* potential_dot_dot_acoustic,
- int size,
- realw* rmass_acoustic) {
- int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
- /* because of block and grid sizing problems, there is a small */
- /* amount of buffer at the end of the calculation */
- if(id < size) {
- // multiplies pressure with the inverse of the mass matrix
- potential_dot_dot_acoustic[id] = potential_dot_dot_acoustic[id]*rmass_acoustic[id];
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-__global__ void kernel_3_b_acoustic_cuda_device(realw* potential_dot_acoustic,
- realw* potential_dot_dot_acoustic,
- int size,
- realw deltatover2,
- realw* rmass_acoustic) {
- int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
- /* because of block and grid sizing problems, there is a small */
- /* amount of buffer at the end of the calculation */
- if(id < size) {
- // Newmark time scheme: corrector term
- potential_dot_acoustic[id] = potential_dot_acoustic[id] + deltatover2*potential_dot_dot_acoustic[id];
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
-void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
- long* Mesh_pointer,
- int* size_F,
- int* SIMULATION_TYPE) {
-
-TRACE("kernel_3_a_acoustic_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
- int size = *size_F;
-
- int blocksize = BLOCKSIZE_KERNEL3;
- int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
- int num_blocks_x = size_padded/blocksize;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = (int) ceil(num_blocks_x*0.5f);
- num_blocks_y = num_blocks_y*2;
- }
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_dot_acoustic,
- size,
- mp->d_rmass_acoustic);
-
- if(*SIMULATION_TYPE == 3) {
- kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_dot_acoustic,
- size,
- mp->d_rmass_acoustic);
- }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- exit_on_cuda_error("after kernel 3 a");
-#endif
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
-void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
- long* Mesh_pointer,
- int* size_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE,
- realw* b_deltatover2_F) {
-
-TRACE("kernel_3_b_acoustic_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
- int size = *size_F;
- realw deltatover2 = *deltatover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
-
- int blocksize = BLOCKSIZE_KERNEL3;
- int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
- int num_blocks_x = size_padded/blocksize;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = (int) ceil(num_blocks_x*0.5f);
- num_blocks_y = num_blocks_y*2;
- }
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_acoustic,
- mp->d_potential_dot_dot_acoustic,
- size, deltatover2,
- mp->d_rmass_acoustic);
-
- if(*SIMULATION_TYPE == 3) {
- kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_acoustic,
- mp->d_b_potential_dot_dot_acoustic,
- size, b_deltatover2,
- mp->d_rmass_acoustic);
- }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- exit_on_cuda_error("after kernel 3 b");
-#endif
-}
-
-
-
-/* ----------------------------------------------------------------------------------------------- */
-
/* KERNEL for enforce free surface */
/* ----------------------------------------------------------------------------------------------- */
@@ -915,8 +788,7 @@
extern "C"
void FC_FUNC_(acoustic_enforce_free_surf_cuda,
ACOUSTIC_ENFORCE_FREE_SURF_CUDA)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE,
- int* ABSORB_FREE_SURFACE) {
+ int* ABSORB_FREE_SURFACE) {
TRACE("acoustic_enforce_free_surf_cuda");
@@ -947,7 +819,7 @@
mp->d_ibool,
mp->d_ispec_is_acoustic);
// for backward/reconstructed potentials
- if(*SIMULATION_TYPE == 3) {
+ if(mp->simulation_type == 3) {
enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic,
mp->d_b_potential_dot_acoustic,
mp->d_b_potential_dot_dot_acoustic,
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_elastic_cuda.cu 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_elastic_cuda.cu 2012-09-03 03:54:59 UTC (rev 20674)
@@ -160,7 +160,6 @@
if( mp->size_mpi_buffer > 0 ){
-//daniel: todo - check below with this...
int blocksize = BLOCKSIZE_TRANSFER;
int size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
@@ -172,20 +171,6 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-/*
-//daniel: todo - check originally used...
- int num_blocks_x = *nspec_outer_elastic;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = (int) ceil(num_blocks_x*0.5f);
- num_blocks_y = num_blocks_y*2;
- }
-
- int blocksize = NGLL3_PADDED;
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-*/
-
prepare_boundary_accel_on_device<<<grid,threads,0,mp->compute_stream>>>(mp->d_accel,mp->d_send_accel_buffer,
mp->num_interfaces_ext_mesh,
mp->max_nibool_interfaces_ext_mesh,
@@ -200,9 +185,6 @@
cudaMemcpyAsync(mp->h_send_accel_buffer,mp->d_send_accel_buffer,
mp->size_mpi_buffer*sizeof(realw),cudaMemcpyDeviceToHost,mp->copy_stream);
- // cudaMemcpyAsync(mp->h_send_accel_buffer,mp->d_send_accel_buffer,
- // 3* mp->max_nibool_interfaces_ext_mesh* mp->num_interfaces_ext_mesh*sizeof(realw),
- // cudaMemcpyDeviceToHost,mp->compute_stream);
}
}
@@ -1265,7 +1247,7 @@
/* ----------------------------------------------------------------------------------------------- */
void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,
- int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE,
+ int COMPUTE_AND_STORE_STRAIN,
int ATTENUATION,int ANISOTROPY,
int* d_ibool,
realw* d_xix,
@@ -1374,7 +1356,7 @@
d_epsilondev_xz,
d_epsilondev_yz,
d_epsilon_trace_over_3,
- SIMULATION_TYPE,
+ mp->simulation_type,
ATTENUATION,mp->NSPEC_AB,
d_one_minus_sum_beta,
d_factor_common,
@@ -1409,7 +1391,7 @@
mp->d_wgll_cube);
- if(SIMULATION_TYPE == 3) {
+ if(mp->simulation_type == 3) {
Kernel_2_impl<<< grid,threads,0,mp->compute_stream>>>(nb_blocks_to_compute,
mp->NGLOB_AB,
d_ibool,
@@ -1432,7 +1414,7 @@
d_b_epsilondev_xz,
d_b_epsilondev_yz,
d_b_epsilon_trace_over_3,
- SIMULATION_TYPE,
+ mp->simulation_type,
ATTENUATION,mp->NSPEC_AB,
d_one_minus_sum_beta,
d_factor_common,
@@ -1490,7 +1472,6 @@
int* iphase,
int* nspec_outer_elastic,
int* nspec_inner_elastic,
- int* SIMULATION_TYPE,
int* COMPUTE_AND_STORE_STRAIN,
int* ATTENUATION,
int* ANISOTROPY) {
@@ -1556,7 +1537,7 @@
//}
Kernel_2(nb_blocks_to_compute,mp,*iphase,
- *COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE,
+ *COMPUTE_AND_STORE_STRAIN,
*ATTENUATION,*ANISOTROPY,
mp->d_ibool + color_offset_nonpadded,
mp->d_xix + color_offset,
@@ -1638,7 +1619,7 @@
// no mesh coloring: uses atomic updates
Kernel_2(num_elements,mp,*iphase,
- *COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE,
+ *COMPUTE_AND_STORE_STRAIN,
*ATTENUATION,*ANISOTROPY,
mp->d_ibool,
mp->d_xix,
@@ -1703,7 +1684,6 @@
/* ----------------------------------------------------------------------------------------------- */
-//daniel: todo - use this instead above call to fortran routine to avoid compilation problems
extern "C"
void FC_FUNC_(sync_copy_from_device,
SYNC_copy_FROM_DEVICE)(long* Mesh_pointer_f,
@@ -1729,310 +1709,3 @@
// memory copy is now finished, so non-blocking MPI send can proceed
}
-/* ----------------------------------------------------------------------------------------------- */
-
-// KERNEL 3
-
-/* ----------------------------------------------------------------------------------------------- */
-
-__global__ void kernel_3_cuda_device(realw* veloc,
- realw* accel, int size,
- realw deltatover2,
- realw* rmass) {
- int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
- /* because of block and grid sizing problems, there is a small */
- /* amount of buffer at the end of the calculation */
- if(id < size) {
- realw new_accel = accel[id] * rmass[id / 3];
- veloc[id] += deltatover2 * new_accel;
- accel[id] = new_accel;
-/*
- accel[3*id] = accel[3*id]*rmass[id];
- accel[3*id+1] = accel[3*id+1]*rmass[id];
- accel[3*id+2] = accel[3*id+2]*rmass[id];
-
- veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
- veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
- veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
-*/
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-__global__ void kernel_3_accel_cuda_device(realw* accel,
- int size,
- realw* rmass) {
- int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
- /* because of block and grid sizing problems, there is a small */
- /* amount of buffer at the end of the calculation */
- if(id < size) {
- accel[id] *= rmass[id / 3];
-/*
- accel[3*id] = accel[3*id]*rmass[id];
- accel[3*id+1] = accel[3*id+1]*rmass[id];
- accel[3*id+2] = accel[3*id+2]*rmass[id];
-*/
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-__global__ void kernel_3_veloc_cuda_device(realw* veloc,
- realw* accel,
- int size,
- realw deltatover2) {
- int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
- /* because of block and grid sizing problems, there is a small */
- /* amount of buffer at the end of the calculation */
- if(id < size) {
- veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
- veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
- veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
-void FC_FUNC_(kernel_3_a_cuda,
- KERNEL_3_A_CUDA)(long* Mesh_pointer,
- int* size_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F,
- int* OCEANS) {
-TRACE("kernel_3_a_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
- //int size = *size_F;
- int size = *size_F * 3;
- int SIMULATION_TYPE = *SIMULATION_TYPE_f;
- realw deltatover2 = *deltatover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
-
- int blocksize = BLOCKSIZE_KERNEL3;
- int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
-
- int num_blocks_x = size_padded/blocksize;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = (int) ceil(num_blocks_x*0.5f);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- // check whether we can update accel and veloc, or only accel at this point
- if( *OCEANS == 0 ){
- // updates both, accel and veloc
- kernel_3_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_veloc,
- mp->d_accel,
- size, deltatover2, mp->d_rmass);
-
- if(SIMULATION_TYPE == 3) {
- kernel_3_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_veloc,
- mp->d_b_accel,
- size, b_deltatover2,mp->d_rmass);
- }
- }else{
- // updates only accel
- kernel_3_accel_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_accel,
- size, mp->d_rmass);
-
- if(SIMULATION_TYPE == 3) {
- kernel_3_accel_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_accel,
- size, mp->d_rmass);
- }
- }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- exit_on_cuda_error("after kernel 3 a");
-#endif
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
-void FC_FUNC_(kernel_3_b_cuda,
- KERNEL_3_B_CUDA)(long* Mesh_pointer,
- int* size_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F) {
- TRACE("kernel_3_b_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
- int size = *size_F;
- int SIMULATION_TYPE = *SIMULATION_TYPE_f;
- realw deltatover2 = *deltatover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
-
- int blocksize = BLOCKSIZE_KERNEL3;
- int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
-
- int num_blocks_x = size_padded/blocksize;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = (int) ceil(num_blocks_x*0.5f);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- // updates only veloc at this point
- kernel_3_veloc_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_veloc,
- mp->d_accel,
- size,deltatover2);
-
- if(SIMULATION_TYPE == 3) {
- kernel_3_veloc_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_veloc,
- mp->d_b_accel,
- size,b_deltatover2);
- }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- exit_on_cuda_error("after kernel 3 b");
-#endif
-}
-
-
-/* ----------------------------------------------------------------------------------------------- */
-
-/* OCEANS load on free surface */
-
-/* ----------------------------------------------------------------------------------------------- */
-
-
-__global__ void elastic_ocean_load_cuda_kernel(realw* accel,
- realw* rmass,
- realw* rmass_ocean_load,
- int num_free_surface_faces,
- int* free_surface_ispec,
- int* free_surface_ijk,
- realw* free_surface_normal,
- int* ibool,
- int* updated_dof_ocean_load) {
- // gets spectral element face id
- int igll = threadIdx.x ; // threadIdx.y*blockDim.x will be always = 0 for thread block (25,1,1)
- int iface = blockIdx.x + gridDim.x*blockIdx.y;
- realw nx,ny,nz;
- realw force_normal_comp,additional_term;
-
- // for all faces on free surface
- if( iface < num_free_surface_faces ){
-
- int ispec = free_surface_ispec[iface]-1;
-
- // gets global point index
- int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface)
- int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
- int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-
- int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
-
- //if(igll == 0 ) printf("igll %d %d %d %d\n",igll,i,j,k,iglob);
-
- // only update this global point once
-
- // daniel: TODO - there might be better ways to implement a mutex like below,
- // and find a workaround to not use the temporary update array.
- // atomicExch: returns the old value, i.e. 0 indicates that we still have to do this point
-
- if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){
-
- // get normal
- nx = free_surface_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; //(1,igll,iface)
- ny = free_surface_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
- nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
-
- // make updated component of right-hand side
- // we divide by rmass() which is 1 / M
- // we use the total force which includes the Coriolis term above
- force_normal_comp = ( accel[iglob*3]*nx + accel[iglob*3+1]*ny + accel[iglob*3+2]*nz ) / rmass[iglob];
-
- additional_term = (rmass_ocean_load[iglob] - rmass[iglob]) * force_normal_comp;
-
- // probably wouldn't need atomicAdd anymore, but just to be sure...
- atomicAdd(&accel[iglob*3], + additional_term * nx);
- atomicAdd(&accel[iglob*3+1], + additional_term * ny);
- atomicAdd(&accel[iglob*3+2], + additional_term * nz);
- }
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
-void FC_FUNC_(elastic_ocean_load_cuda,
- ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE) {
-
- TRACE("elastic_ocean_load_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- // checks if anything to do
- if( mp->num_free_surface_faces == 0 ) return;
-
- // block sizes: exact blocksize to match NGLLSQUARE
- int blocksize = NGLL2;
-
- int num_blocks_x = mp->num_free_surface_faces;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = (int) ceil(num_blocks_x*0.5f);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
-
- // initializes temporary array to zero
- print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
- sizeof(int)*mp->NGLOB_AB),88501);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("before kernel elastic_ocean_load_cuda");
-#endif
-
- elastic_ocean_load_cuda_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_accel,
- mp->d_rmass,
- mp->d_rmass_ocean_load,
- mp->num_free_surface_faces,
- mp->d_free_surface_ispec,
- mp->d_free_surface_ijk,
- mp->d_free_surface_normal,
- mp->d_ibool,
- mp->d_updated_dof_ocean_load);
- // for backward/reconstructed potentials
- if(*SIMULATION_TYPE == 3) {
- // re-initializes array
- print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
- sizeof(int)*mp->NGLOB_AB),88502);
-
- elastic_ocean_load_cuda_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_b_accel,
- mp->d_rmass,
- mp->d_rmass_ocean_load,
- mp->num_free_surface_faces,
- mp->d_free_surface_ispec,
- mp->d_free_surface_ijk,
- mp->d_free_surface_normal,
- mp->d_ibool,
- mp->d_updated_dof_ocean_load);
-
- }
-
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("elastic_ocean_load_cuda");
-#endif
-}
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/compute_stacey_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/compute_stacey_acoustic_cuda.cu 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/compute_stacey_acoustic_cuda.cu 2012-09-03 03:54:59 UTC (rev 20674)
@@ -50,7 +50,8 @@
int* ispec_is_inner,
int* ispec_is_acoustic,
int phase_is_inner,
- int SIMULATION_TYPE, int SAVE_FORWARD,
+ int SIMULATION_TYPE,
+ int SAVE_FORWARD,
int num_abs_boundary_faces,
realw* b_potential_dot_acoustic,
realw* b_potential_dot_dot_acoustic,
@@ -121,18 +122,15 @@
extern "C"
void FC_FUNC_(compute_stacey_acoustic_cuda,
- COMPUTE_STACEY_ACOUSTIC_CUDA)(
- long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* SIMULATION_TYPEf,
- int* SAVE_FORWARDf,
- realw* h_b_absorb_potential) {
+ COMPUTE_STACEY_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* SAVE_FORWARDf,
+ realw* h_b_absorb_potential) {
TRACE("compute_stacey_acoustic_cuda");
//double start_time = get_time();
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
int phase_is_inner = *phase_is_innerf;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
int SAVE_FORWARD = *SAVE_FORWARDf;
// way 1: Elapsed time: 4.385948e-03
@@ -154,7 +152,7 @@
dim3 threads(blocksize,1,1);
// adjoint simulations: reads in absorbing boundary
- if (SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0 ){
+ if (mp->simulation_type == 3 && mp->d_num_abs_boundary_faces > 0 ){
// copies array to GPU
print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,h_b_absorb_potential,
mp->d_b_reclen_potential,cudaMemcpyHostToDevice),7700);
@@ -171,7 +169,8 @@
mp->d_ispec_is_inner,
mp->d_ispec_is_acoustic,
phase_is_inner,
- SIMULATION_TYPE,SAVE_FORWARD,
+ mp->simulation_type,
+ SAVE_FORWARD,
mp->d_num_abs_boundary_faces,
mp->d_b_potential_dot_acoustic,
mp->d_b_potential_dot_dot_acoustic,
@@ -179,7 +178,7 @@
mp->gravity);
// adjoint simulations: stores absorbed wavefield part
- if (SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ){
+ if (mp->simulation_type == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ){
// copies array to CPU
print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_potential,mp->d_b_absorb_potential,
mp->d_b_reclen_potential,cudaMemcpyDeviceToHost),7701);
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/compute_stacey_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/compute_stacey_elastic_cuda.cu 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/compute_stacey_elastic_cuda.cu 2012-09-03 03:54:59 UTC (rev 20674)
@@ -132,7 +132,6 @@
void FC_FUNC_(compute_stacey_elastic_cuda,
COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
- int* SIMULATION_TYPEf,
int* SAVE_FORWARDf,
realw* h_b_absorb_field) {
@@ -144,7 +143,6 @@
if( mp->d_num_abs_boundary_faces == 0 ) return;
int phase_is_inner = *phase_is_innerf;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
int SAVE_FORWARD = *SAVE_FORWARDf;
// way 1
@@ -165,7 +163,7 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- if(SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0) {
+ if(mp->simulation_type == 3 && mp->d_num_abs_boundary_faces > 0) {
// The read is done in fortran
print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field,h_b_absorb_field,
mp->d_b_reclen_field,cudaMemcpyHostToDevice),7700);
@@ -187,7 +185,8 @@
mp->d_ispec_is_inner,
mp->d_ispec_is_elastic,
phase_is_inner,
- SIMULATION_TYPE,SAVE_FORWARD,
+ mp->simulation_type,
+ SAVE_FORWARD,
mp->d_num_abs_boundary_faces,
mp->d_b_accel,
mp->d_b_absorb_field);
@@ -197,10 +196,10 @@
#endif
// ! adjoint simulations: stores absorbed wavefield part
- // if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) &
+ // if (mp->simulation_type == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) &
// write(IOABS,rec=it) b_reclen_field,b_absorb_field,b_reclen_field
- if(SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ) {
+ if(mp->simulation_type == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ) {
print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_field,mp->d_b_absorb_field,
mp->d_b_reclen_field,cudaMemcpyDeviceToHost),7701);
// The write is done in fortran
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/it_update_displacement_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/it_update_displacement_cuda.cu 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/it_update_displacement_cuda.cu 2012-09-03 03:54:59 UTC (rev 20674)
@@ -68,14 +68,13 @@
extern "C"
void FC_FUNC_(it_update_displacement_cuda,
IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f,
- int* size_F,
- realw* deltat_F,
- realw* deltatsqover2_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F) {
+ int* size_F,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {
TRACE("it_update_displacement_cuda");
@@ -120,7 +119,7 @@
//#endif
// kernel for backward fields
- if(*SIMULATION_TYPE == 3) {
+ if(mp->simulation_type == 3) {
realw b_deltat = *b_deltat_F;
realw b_deltatsqover2 = *b_deltatsqover2_F;
realw b_deltatover2 = *b_deltatover2_F;
@@ -178,7 +177,6 @@
realw* deltat_F,
realw* deltatsqover2_F,
realw* deltatover2_F,
- int* SIMULATION_TYPE,
realw* b_deltat_F,
realw* b_deltatsqover2_F,
realw* b_deltatover2_F) {
@@ -212,7 +210,7 @@
mp->d_potential_dot_dot_acoustic,
size,deltat,deltatsqover2,deltatover2);
- if(*SIMULATION_TYPE == 3) {
+ if(mp->simulation_type == 3) {
realw b_deltat = *b_deltat_F;
realw b_deltatsqover2 = *b_deltatsqover2_F;
realw b_deltatover2 = *b_deltatover2_F;
@@ -229,3 +227,310 @@
exit_on_cuda_error("it_update_displacement_ac_cuda");
#endif
}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// elastic domains
+
+// KERNEL 3
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void kernel_3_cuda_device(realw* veloc,
+ realw* accel,
+ int size,
+ realw deltatover2,
+ realw* rmassx,
+ realw* rmassy,
+ realw* rmassz) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ accel[3*id] = accel[3*id]*rmassx[id];
+ accel[3*id+1] = accel[3*id+1]*rmassy[id];
+ accel[3*id+2] = accel[3*id+2]*rmassz[id];
+
+ veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
+ veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
+ veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void kernel_3_accel_cuda_device(realw* accel,
+ int size,
+ realw* rmassx,
+ realw* rmassy,
+ realw* rmassz) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ accel[3*id] = accel[3*id]*rmassx[id];
+ accel[3*id+1] = accel[3*id+1]*rmassy[id];
+ accel[3*id+2] = accel[3*id+2]*rmassz[id];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void kernel_3_veloc_cuda_device(realw* veloc,
+ realw* accel,
+ int size,
+ realw deltatover2) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
+ veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
+ veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(kernel_3_a_cuda,
+ KERNEL_3_A_CUDA)(long* Mesh_pointer,
+ int* size_F,
+ realw* deltatover2_F,
+ realw* b_deltatover2_F,
+ int* OCEANS) {
+TRACE("kernel_3_a_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
+
+ int size = *size_F;
+
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+
+ int blocksize = BLOCKSIZE_KERNEL3;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // check whether we can update accel and veloc, or only accel at this point
+ if( *OCEANS == 0 ){
+ // updates both, accel and veloc
+ kernel_3_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_veloc,
+ mp->d_accel,
+ size, deltatover2,
+ mp->d_rmassx,mp->d_rmassy,mp->d_rmassz);
+
+ if(mp->simulation_type == 3) {
+ kernel_3_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_veloc,
+ mp->d_b_accel,
+ size, b_deltatover2,
+ mp->d_rmassx,mp->d_rmassy,mp->d_rmassz);
+ }
+ }else{
+ // updates only accel
+ kernel_3_accel_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_accel,
+ size,
+ mp->d_rmassx,
+ mp->d_rmassy,
+ mp->d_rmassz);
+
+ if(mp->simulation_type == 3) {
+ kernel_3_accel_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_accel,
+ size,
+ mp->d_rmassx,
+ mp->d_rmassy,
+ mp->d_rmassz);
+ }
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after kernel 3 a");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(kernel_3_b_cuda,
+ KERNEL_3_B_CUDA)(long* Mesh_pointer,
+ int* size_F,
+ realw* deltatover2_F,
+ realw* b_deltatover2_F) {
+ TRACE("kernel_3_b_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
+ int size = *size_F;
+
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+
+ int blocksize = BLOCKSIZE_KERNEL3;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // updates only veloc at this point
+ kernel_3_veloc_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_veloc,
+ mp->d_accel,
+ size,deltatover2);
+
+ if(mp->simulation_type == 3) {
+ kernel_3_veloc_cuda_device<<< grid, threads,0,mp->compute_stream>>>(mp->d_b_veloc,
+ mp->d_b_accel,
+ size,b_deltatover2);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after kernel 3 b");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// acoustic domains
+
+// KERNEL 3
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void kernel_3_a_acoustic_cuda_device(realw* potential_dot_dot_acoustic,
+ int size,
+ realw* rmass_acoustic) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ // multiplies pressure with the inverse of the mass matrix
+ potential_dot_dot_acoustic[id] = potential_dot_dot_acoustic[id]*rmass_acoustic[id];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void kernel_3_b_acoustic_cuda_device(realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ int size,
+ realw deltatover2,
+ realw* rmass_acoustic) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ // Newmark time scheme: corrector term
+ potential_dot_acoustic[id] = potential_dot_acoustic[id] + deltatover2*potential_dot_dot_acoustic[id];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
+ long* Mesh_pointer,
+ int* size_F) {
+
+TRACE("kernel_3_a_acoustic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
+ int size = *size_F;
+
+ int blocksize = BLOCKSIZE_KERNEL3;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_dot_acoustic,
+ size,
+ mp->d_rmass_acoustic);
+
+ if(mp->simulation_type == 3) {
+ kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ size,
+ mp->d_rmass_acoustic);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after kernel 3 a");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
+ long* Mesh_pointer,
+ int* size_F,
+ realw* deltatover2_F,
+ realw* b_deltatover2_F) {
+
+TRACE("kernel_3_b_acoustic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
+ int size = *size_F;
+
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+
+ int blocksize = BLOCKSIZE_KERNEL3;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_acoustic,
+ mp->d_potential_dot_dot_acoustic,
+ size, deltatover2,
+ mp->d_rmass_acoustic);
+
+ if(mp->simulation_type == 3) {
+ kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ size, b_deltatover2,
+ mp->d_rmass_acoustic);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after kernel 3 b");
+#endif
+}
+
+
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/mesh_constants_cuda.h 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/mesh_constants_cuda.h 2012-09-03 03:54:59 UTC (rev 20674)
@@ -192,6 +192,13 @@
int myrank;
int NPROC;
+ // constants
+ int simulation_type;
+ int use_mesh_coloring_gpu;
+ int absorbing_conditions;
+ int gravity;
+
+
// ------------------------------------------------------------------ //
// GLL points & weights
// ------------------------------------------------------------------ //
@@ -210,9 +217,6 @@
// inner / outer elements
int* d_ispec_is_inner;
- // mesh coloring
- int use_mesh_coloring_gpu;
-
// pointers to constant memory arrays
realw* d_hprime_xx;
//realw* d_hprime_yy; // only needed if NGLLX != NGLLY != NGLLZ
@@ -285,7 +289,9 @@
int num_colors_outer_elastic,num_colors_inner_elastic;
int nspec_elastic;
- realw* d_rmass;
+ realw* d_rmassx;
+ realw* d_rmassy;
+ realw* d_rmassz;
// mpi buffer
realw* d_send_accel_buffer;
@@ -481,7 +487,6 @@
realw* d_coupling_ac_el_jacobian2Dw;
// gravity
- int gravity;
realw* d_minus_deriv_gravity;
realw* d_minus_g;
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/prepare_mesh_constants_cuda.cu 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/prepare_mesh_constants_cuda.cu 2012-09-03 03:54:59 UTC (rev 20674)
@@ -154,6 +154,10 @@
mp->NSPEC_AB = *NSPEC_AB;
mp->NGLOB_AB = *NGLOB_AB;
+ // constants
+ mp->simulation_type = *SIMULATION_TYPE;
+ mp->absorbing_conditions = *ABSORBING_CONDITIONS;
+
// sets constant arrays
setConst_hprime_xx(h_hprime_xx,mp);
// setConst_hprime_yy(h_hprime_yy,mp); // only needed if NGLLX != NGLLY != NGLLZ
@@ -289,41 +293,54 @@
}
// inner elements
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,mp->NSPEC_AB*sizeof(int)),1205);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_inner, h_ispec_is_inner,
- mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),1206);
+ copy_todevice_int((void**)&mp->d_ispec_is_inner,h_ispec_is_inner,mp->NSPEC_AB);
+// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,mp->NSPEC_AB*sizeof(int)),1205);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_inner, h_ispec_is_inner,
+ // mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),1206);
+
// absorbing boundaries
mp->d_num_abs_boundary_faces = *h_num_abs_boundary_faces;
- if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ispec),
- (mp->d_num_abs_boundary_faces)*sizeof(int)),1101);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ispec, h_abs_boundary_ispec,
- (mp->d_num_abs_boundary_faces)*sizeof(int),
- cudaMemcpyHostToDevice),1102);
+ if( mp->absorbing_conditions && mp->d_num_abs_boundary_faces > 0 ){
+ copy_todevice_int((void**)&mp->d_abs_boundary_ispec,h_abs_boundary_ispec,mp->d_num_abs_boundary_faces);
- print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ijk),
- 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(int)),1103);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ijk, h_abs_boundary_ijk,
- 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(int),
- cudaMemcpyHostToDevice),1104);
+// print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ispec),
+// (mp->d_num_abs_boundary_faces)*sizeof(int)),1101);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ispec, h_abs_boundary_ispec,
+// (mp->d_num_abs_boundary_faces)*sizeof(int),
+// cudaMemcpyHostToDevice),1102);
- print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_normal),
- 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw)),1105);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_normal, h_abs_boundary_normal,
- 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw),
- cudaMemcpyHostToDevice),1106);
+ copy_todevice_int((void**)&mp->d_abs_boundary_ijk,h_abs_boundary_ijk,
+ 3*NGLL2*(mp->d_num_abs_boundary_faces));
- print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_jacobian2Dw),
- NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw)),1107);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_jacobian2Dw, h_abs_boundary_jacobian2Dw,
- NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw),
- cudaMemcpyHostToDevice),1108);
+// print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ijk),
+// 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(int)),1103);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ijk, h_abs_boundary_ijk,
+// 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(int),
+// cudaMemcpyHostToDevice),1104);
+
+ copy_todevice_realw((void**)&mp->d_abs_boundary_normal,h_abs_boundary_normal,
+ NDIM*NGLL2*(mp->d_num_abs_boundary_faces));
+
+// print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_normal),
+// 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw)),1105);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_normal, h_abs_boundary_normal,
+// 3*NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw),
+// cudaMemcpyHostToDevice),1106);
+
+ copy_todevice_realw((void**)&mp->d_abs_boundary_jacobian2Dw,h_abs_boundary_jacobian2Dw,
+ NGLL2*(mp->d_num_abs_boundary_faces));
+
+// print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_jacobian2Dw),
+// NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw)),1107);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_jacobian2Dw, h_abs_boundary_jacobian2Dw,
+// NGLL2*(mp->d_num_abs_boundary_faces)*sizeof(realw),
+ // cudaMemcpyHostToDevice),1108);
}
// sources
mp->nsources_local = *nsources_local_f;
- if (*SIMULATION_TYPE == 1 || *SIMULATION_TYPE == 3){
+ if (mp->simulation_type == 1 || mp->simulation_type == 3){
// not needed in case of pure adjoint simulations (SIMULATION_TYPE == 2)
copy_todevice_realw((void**)&mp->d_sourcearrays,h_sourcearrays,(*NSOURCES)*NDIM*NGLL3);
@@ -388,7 +405,6 @@
int* num_free_surface_faces,
int* free_surface_ispec,
int* free_surface_ijk,
- int* ABSORBING_CONDITIONS,
int* b_reclen_potential,
realw* b_absorb_potential,
int* ELASTIC_SIMULATION,
@@ -406,7 +422,7 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
/* Assuming NGLLX==5. Padded is then 128 (5^3+3) */
int size_padded = NGLL3_PADDED * mp->NSPEC_AB;
- int size_nonpadded = NGLL3 * mp->NSPEC_AB;
+// int size_nonpadded = NGLL3 * mp->NSPEC_AB;
int size_glob = mp->NGLOB_AB;
// allocates arrays on device (GPU)
@@ -421,10 +437,12 @@
}
// mass matrix
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_acoustic,rmass_acoustic,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ copy_todevice_realw((void**)&mp->d_rmass_acoustic,rmass_acoustic,mp->NGLOB_AB);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(realw)*size_glob),2005);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_acoustic,rmass_acoustic,
+// sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+
// padded array
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rhostore),size_padded*sizeof(realw)),2006);
// transfer constant element data with padding
@@ -434,45 +452,63 @@
}
// non-padded array
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_nonpadded*sizeof(realw)),2007);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_kappastore,kappastore,
- NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),2105);
+ copy_todevice_realw((void**)&mp->d_kappastore,kappastore,NGLL3*mp->NSPEC_AB);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_nonpadded*sizeof(realw)),2007);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_kappastore,kappastore,
+// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),2105);
+
// phase elements
mp->num_phase_ispec_acoustic = *num_phase_ispec_acoustic;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic),
- mp->num_phase_ispec_acoustic*2*sizeof(int)),2008);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic,
- mp->num_phase_ispec_acoustic*2*sizeof(int),cudaMemcpyHostToDevice),2101);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic),
- mp->NSPEC_AB*sizeof(int)),2009);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_acoustic,ispec_is_acoustic,
- mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),2102);
+ copy_todevice_int((void**)&mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic,
+ 2*mp->num_phase_ispec_acoustic);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic),
+// mp->num_phase_ispec_acoustic*2*sizeof(int)),2008);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic,
+// mp->num_phase_ispec_acoustic*2*sizeof(int),cudaMemcpyHostToDevice),2101);
+
+ copy_todevice_int((void**)&mp->d_ispec_is_acoustic,ispec_is_acoustic,mp->NSPEC_AB);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic),
+// mp->NSPEC_AB*sizeof(int)),2009);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_acoustic,ispec_is_acoustic,
+// mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),2102);
+
// free surface
if( *NOISE_TOMOGRAPHY == 0 ){
// allocate surface arrays
mp->num_free_surface_faces = *num_free_surface_faces;
if( mp->num_free_surface_faces > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
- mp->num_free_surface_faces*sizeof(int)),2201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
- mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2203);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
- 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),2202);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
- 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2204);
+ copy_todevice_int((void**)&mp->d_free_surface_ispec,free_surface_ispec,mp->num_free_surface_faces);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
+// mp->num_free_surface_faces*sizeof(int)),2201);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
+// mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2203);
+
+
+ copy_todevice_int((void**)&mp->d_free_surface_ijk,free_surface_ijk,
+ 3*NGLL2*mp->num_free_surface_faces);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
+// 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),2202);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+// 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2204);
}
}
// absorbing boundaries
- if( *ABSORBING_CONDITIONS ){
+ if( mp->absorbing_conditions ){
mp->d_b_reclen_potential = *b_reclen_potential;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),2301);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential,
- mp->d_b_reclen_potential,cudaMemcpyHostToDevice),2302);
+
+ copy_todevice_realw((void**)&mp->d_b_absorb_potential,b_absorb_potential,mp->d_b_reclen_potential);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),2301);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential,
+// mp->d_b_reclen_potential,cudaMemcpyHostToDevice),2302);
}
@@ -488,26 +524,37 @@
// coupling with elastic parts
if( *ELASTIC_SIMULATION && *num_coupling_ac_el_faces > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ispec),
- (*num_coupling_ac_el_faces)*sizeof(int)),2601);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ispec,coupling_ac_el_ispec,
- (*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2602);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ijk),
- 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int)),2603);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ijk,coupling_ac_el_ijk,
- 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2604);
+ copy_todevice_int((void**)&mp->d_coupling_ac_el_ispec,coupling_ac_el_ispec,(*num_coupling_ac_el_faces));
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_normal),
- 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2605);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_normal,coupling_ac_el_normal,
- 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2606);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ispec),
+// (*num_coupling_ac_el_faces)*sizeof(int)),2601);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ispec,coupling_ac_el_ispec,
+// (*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2602);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_jacobian2Dw),
- NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2607);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_jacobian2Dw,coupling_ac_el_jacobian2Dw,
- NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2608);
+ copy_todevice_int((void**)&mp->d_coupling_ac_el_ijk,coupling_ac_el_ijk,3*NGLL2*(*num_coupling_ac_el_faces));
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ijk),
+// 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int)),2603);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ijk,coupling_ac_el_ijk,
+// 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2604);
+
+ copy_todevice_realw((void**)&mp->d_coupling_ac_el_normal,coupling_ac_el_normal,
+ 3*NGLL2*(*num_coupling_ac_el_faces));
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_normal),
+// 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2605);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_normal,coupling_ac_el_normal,
+// 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2606);
+
+ copy_todevice_realw((void**)&mp->d_coupling_ac_el_jacobian2Dw,coupling_ac_el_jacobian2Dw,
+ NGLL2*(*num_coupling_ac_el_faces));
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_jacobian2Dw),
+// NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2607);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_jacobian2Dw,coupling_ac_el_jacobian2Dw,
+// NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2608);
+
}
// mesh coloring
@@ -528,7 +575,6 @@
extern "C"
void FC_FUNC_(prepare_fields_acoustic_adj_dev,
PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE,
int* APPROXIMATE_HESS_KL) {
TRACE("prepare_fields_acoustic_adj_dev");
@@ -538,7 +584,7 @@
int size_glob = mp->NGLOB_AB;
// kernel simulations
- if( *SIMULATION_TYPE != 3 ) return;
+ if( mp->simulation_type != 3 ) return;
// allocates backward/reconstructed arrays on device (GPU)
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_acoustic),sizeof(realw)*size_glob),3014);
@@ -579,16 +625,17 @@
void FC_FUNC_(prepare_fields_elastic_device,
PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
int* size,
- realw* rmass,
+ realw* rmassx,
+ realw* rmassy,
+ realw* rmassz,
realw* rho_vp,
realw* rho_vs,
int* num_phase_ispec_elastic,
int* phase_ispec_inner_elastic,
int* ispec_is_elastic,
- int* ABSORBING_CONDITIONS,
realw* h_b_absorb_field,
int* h_b_reclen_field,
- int* SIMULATION_TYPE,int* SAVE_FORWARD,
+ int* SAVE_FORWARD,
int* COMPUTE_AND_STORE_STRAIN,
realw* epsilondev_xx,realw* epsilondev_yy,realw* epsilondev_xy,
realw* epsilondev_xz,realw* epsilondev_yz,
@@ -636,7 +683,7 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
/* Assuming NGLLX==5. Padded is then 128 (5^3+3) */
int size_padded = NGLL3_PADDED * (mp->NSPEC_AB);
- int size_nonpadded = NGLL3 * (mp->NSPEC_AB);
+// int size_nonpadded = NGLL3 * (mp->NSPEC_AB);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ),sizeof(realw)*(*size)),4001);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc),sizeof(realw)*(*size)),4002);
@@ -663,24 +710,33 @@
}
// mass matrix
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass),sizeof(realw)*mp->NGLOB_AB),4005);
- // transfer element data
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,rmass,
- sizeof(realw)*mp->NGLOB_AB,cudaMemcpyHostToDevice),4010);
+ copy_todevice_realw((void**)&mp->d_rmassx,rmassx,mp->NGLOB_AB);
+ copy_todevice_realw((void**)&mp->d_rmassy,rmassy,mp->NGLOB_AB);
+ copy_todevice_realw((void**)&mp->d_rmassz,rmassz,mp->NGLOB_AB);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass),sizeof(realw)*mp->NGLOB_AB),4005);
+// // transfer element data
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,rmass,
+// sizeof(realw)*mp->NGLOB_AB,cudaMemcpyHostToDevice),4010);
+
// element indices
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_elastic),mp->NSPEC_AB*sizeof(int)),4009);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_elastic,ispec_is_elastic,
- mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),4012);
+ copy_todevice_int((void**)&mp->d_ispec_is_elastic,ispec_is_elastic,mp->NSPEC_AB);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_elastic),mp->NSPEC_AB*sizeof(int)),4009);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_elastic,ispec_is_elastic,
+// mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),4012);
+
// phase elements
mp->num_phase_ispec_elastic = *num_phase_ispec_elastic;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_elastic),
- mp->num_phase_ispec_elastic*2*sizeof(int)),4008);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic,
- mp->num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),4011);
+ copy_todevice_int((void**)&mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic,2*mp->num_phase_ispec_elastic);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_elastic),
+// mp->num_phase_ispec_elastic*2*sizeof(int)),4008);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic,
+// mp->num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),4011);
+
// for seismograms
if( mp->nrec_local > 0 ){
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),
@@ -691,24 +747,30 @@
}
// absorbing conditions
- if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0){
+ if( mp->absorbing_conditions && mp->d_num_abs_boundary_faces > 0){
// non-padded arrays
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vp),size_nonpadded*sizeof(realw)),4006);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vs),size_nonpadded*sizeof(realw)),4007);
+ copy_todevice_realw((void**)&mp->d_rho_vp,rho_vp,NGLL3*mp->NSPEC_AB);
+ copy_todevice_realw((void**)&mp->d_rho_vs,rho_vs,NGLL3*mp->NSPEC_AB);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vp),size_nonpadded*sizeof(realw)),4006);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vs),size_nonpadded*sizeof(realw)),4007);
+
// rho_vp, rho_vs non-padded; they are needed for stacey boundary condition
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp, rho_vp,
- NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4013);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs, rho_vs,
- NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4014);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp, rho_vp,
+// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4013);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs, rho_vs,
+// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4014);
// absorb_field array used for file i/o
- if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD )){
+ if(mp->simulation_type == 3 || ( mp->simulation_type == 1 && *SAVE_FORWARD )){
mp->d_b_reclen_field = *h_b_reclen_field;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_field),
- mp->d_b_reclen_field),4016);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field,
- mp->d_b_reclen_field,cudaMemcpyHostToDevice),4017);
+
+ copy_todevice_realw((void**)&mp->d_b_absorb_field,h_b_absorb_field,mp->d_b_reclen_field);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_field),
+// mp->d_b_reclen_field),4016);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field,
+// mp->d_b_reclen_field,cudaMemcpyHostToDevice),4017);
}
}
@@ -717,84 +779,118 @@
// strains
int epsilondev_size = NGLL3*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xx,
- epsilondev_size*sizeof(realw)),4301);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size*sizeof(realw),
- cudaMemcpyHostToDevice),4302);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yy,
- epsilondev_size*sizeof(realw)),4302);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yy,epsilondev_yy,epsilondev_size*sizeof(realw),
- cudaMemcpyHostToDevice),4303);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xy,
- epsilondev_size*sizeof(realw)),4304);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xy,epsilondev_xy,epsilondev_size*sizeof(realw),
- cudaMemcpyHostToDevice),4305);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xz,
- epsilondev_size*sizeof(realw)),4306);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xz,epsilondev_xz,epsilondev_size*sizeof(realw),
- cudaMemcpyHostToDevice),4307);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yz,
- epsilondev_size*sizeof(realw)),4308);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size*sizeof(realw),
- cudaMemcpyHostToDevice),4309);
+ copy_todevice_realw((void**)&mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size);
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xx,
+// epsilondev_size*sizeof(realw)),4301);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size*sizeof(realw),
+// cudaMemcpyHostToDevice),4302);
+
+ copy_todevice_realw((void**)&mp->d_epsilondev_yy,epsilondev_yy,epsilondev_size);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yy,
+// epsilondev_size*sizeof(realw)),4302);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yy,epsilondev_yy,epsilondev_size*sizeof(realw),
+// cudaMemcpyHostToDevice),4303);
+
+ copy_todevice_realw((void**)&mp->d_epsilondev_xy,epsilondev_xy,epsilondev_size);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xy,
+// epsilondev_size*sizeof(realw)),4304);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xy,epsilondev_xy,epsilondev_size*sizeof(realw),
+// cudaMemcpyHostToDevice),4305);
+
+ copy_todevice_realw((void**)&mp->d_epsilondev_xz,epsilondev_xz,epsilondev_size);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xz,
+// epsilondev_size*sizeof(realw)),4306);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xz,epsilondev_xz,epsilondev_size*sizeof(realw),
+// cudaMemcpyHostToDevice),4307);
+
+ copy_todevice_realw((void**)&mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yz,
+// epsilondev_size*sizeof(realw)),4308);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size*sizeof(realw),
+// cudaMemcpyHostToDevice),4309);
+
}
// attenuation memory variables
if( *ATTENUATION ){
// memory arrays
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xx),
- (*R_size)*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xx,R_xx,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),4402);
+ copy_todevice_realw((void**)&mp->d_R_xx,R_xx,(*R_size));
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yy),
- (*R_size)*sizeof(realw)),4403);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yy,R_yy,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),4404);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xx),
+// (*R_size)*sizeof(realw)),4401);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xx,R_xx,(*R_size)*sizeof(realw),
+// cudaMemcpyHostToDevice),4402);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xy),
- (*R_size)*sizeof(realw)),4405);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xy,R_xy,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),4406);
+ copy_todevice_realw((void**)&mp->d_R_yy,R_yy,(*R_size));
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xz),
- (*R_size)*sizeof(realw)),4407);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xz,R_xz,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),4408);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yy),
+// (*R_size)*sizeof(realw)),4403);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yy,R_yy,(*R_size)*sizeof(realw),
+// cudaMemcpyHostToDevice),4404);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yz),
- (*R_size)*sizeof(realw)),4409);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz,R_yz,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),4410);
+ copy_todevice_realw((void**)&mp->d_R_xy,R_xy,(*R_size));
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xy),
+// (*R_size)*sizeof(realw)),4405);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xy,R_xy,(*R_size)*sizeof(realw),
+// cudaMemcpyHostToDevice),4406);
+
+ copy_todevice_realw((void**)&mp->d_R_xz,R_xz,(*R_size));
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xz),
+// (*R_size)*sizeof(realw)),4407);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xz,R_xz,(*R_size)*sizeof(realw),
+// cudaMemcpyHostToDevice),4408);
+
+ copy_todevice_realw((void**)&mp->d_R_yz,R_yz,(*R_size));
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yz),
+// (*R_size)*sizeof(realw)),4409);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz,R_yz,(*R_size)*sizeof(realw),
+// cudaMemcpyHostToDevice),4410);
+
// attenuation factors
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_one_minus_sum_beta),
- NGLL3*mp->NSPEC_AB*sizeof(realw)),4430);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta ,one_minus_sum_beta,
- NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4431);
+ copy_todevice_realw((void**)&mp->d_one_minus_sum_beta,one_minus_sum_beta,NGLL3*mp->NSPEC_AB);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_factor_common),
- N_SLS*NGLL3*mp->NSPEC_AB*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_factor_common ,factor_common,
- N_SLS*NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4433);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_one_minus_sum_beta),
+// NGLL3*mp->NSPEC_AB*sizeof(realw)),4430);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta ,one_minus_sum_beta,
+// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4431);
+ copy_todevice_realw((void**)&mp->d_factor_common,factor_common,N_SLS*NGLL3*mp->NSPEC_AB);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_factor_common),
+// N_SLS*NGLL3*mp->NSPEC_AB*sizeof(realw)),4432);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_factor_common ,factor_common,
+// N_SLS*NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4433);
+
// alpha,beta,gamma factors
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_alphaval),
- N_SLS*sizeof(realw)),4434);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_alphaval ,alphaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4435);
+ copy_todevice_realw((void**)&mp->d_alphaval,alphaval,N_SLS);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_betaval),
- N_SLS*sizeof(realw)),4436);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_betaval ,betaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4437);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_alphaval),
+// N_SLS*sizeof(realw)),4434);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_alphaval ,alphaval,
+// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4435);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_gammaval),
- N_SLS*sizeof(realw)),4438);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaval ,gammaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4439);
+ copy_todevice_realw((void**)&mp->d_betaval,betaval,N_SLS);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_betaval),
+// N_SLS*sizeof(realw)),4436);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_betaval ,betaval,
+// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4437);
+
+ copy_todevice_realw((void**)&mp->d_gammaval,gammaval,N_SLS);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_gammaval),
+// N_SLS*sizeof(realw)),4438);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaval ,gammaval,
+// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4439);
+
}
// anisotropy
@@ -896,29 +992,41 @@
mp->num_free_surface_faces = *num_free_surface_faces;
if( mp->num_free_surface_faces > 0 ){
// mass matrix
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load),
- sizeof(realw)*mp->NGLOB_AB),4501);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,rmass_ocean_load,
- sizeof(realw)*mp->NGLOB_AB,cudaMemcpyHostToDevice),4502);
+ copy_todevice_realw((void**)&mp->d_rmass_ocean_load,rmass_ocean_load,mp->NGLOB_AB);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load),
+// sizeof(realw)*mp->NGLOB_AB),4501);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,rmass_ocean_load,
+// sizeof(realw)*mp->NGLOB_AB,cudaMemcpyHostToDevice),4502);
// surface normal
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_normal),
- 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw)),4503);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_normal,free_surface_normal,
- 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw),cudaMemcpyHostToDevice),4504);
+ copy_todevice_realw((void**)&mp->d_free_surface_normal,free_surface_normal,
+ 3*NGLL2*(mp->num_free_surface_faces));
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_normal),
+// 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw)),4503);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_normal,free_surface_normal,
+// 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw),cudaMemcpyHostToDevice),4504);
+
// temporary global array: used to synchronize updates on global accel array
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_updated_dof_ocean_load),
sizeof(int)*mp->NGLOB_AB),4505);
if( *NOISE_TOMOGRAPHY == 0 && *ACOUSTIC_SIMULATION == 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
- mp->num_free_surface_faces*sizeof(int)),4601);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
- mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4603);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
- 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),4602);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
- 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4604);
+
+ copy_todevice_int((void**)&mp->d_free_surface_ispec,free_surface_ispec,mp->num_free_surface_faces);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
+// mp->num_free_surface_faces*sizeof(int)),4601);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
+// mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4603);
+
+ copy_todevice_int((void**)&mp->d_free_surface_ijk,free_surface_ijk,
+ 3*NGLL2*mp->num_free_surface_faces);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
+// 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),4602);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+// 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4604);
}
}
}
@@ -941,7 +1049,6 @@
void FC_FUNC_(prepare_fields_elastic_adj_dev,
PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
int* size,
- int* SIMULATION_TYPE,
int* COMPUTE_AND_STORE_STRAIN,
realw* epsilon_trace_over_3,
realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy,
@@ -958,7 +1065,7 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
// checks if kernel simulation
- if( *SIMULATION_TYPE != 3 ) return;
+ if( mp->simulation_type != 3 ) return;
// kernel simulations
// allocates backward/reconstructed arrays on device (GPU)
@@ -985,81 +1092,104 @@
int epsilondev_size = NGLL3*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing
// solid pressure
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3),
- NGLL3*mp->NSPEC_AB*sizeof(realw)),5310);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3,
- NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5311);
+ copy_todevice_realw((void**)&mp->d_epsilon_trace_over_3,epsilon_trace_over_3,NGLL3*mp->NSPEC_AB);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3),
+// NGLL3*mp->NSPEC_AB*sizeof(realw)),5310);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3,
+// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5311);
// backward solid pressure
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3),
- NGLL3*mp->NSPEC_AB*sizeof(realw)),5312);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3,
- NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5313);
+
+ copy_todevice_realw((void**)&mp->d_b_epsilon_trace_over_3,b_epsilon_trace_over_3,NGLL3*mp->NSPEC_AB);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3),
+// NGLL3*mp->NSPEC_AB*sizeof(realw)),5312);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3,
+// NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5313);
+
// prepares backward strains
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx),
- epsilondev_size*sizeof(realw)),5321);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy),
- epsilondev_size*sizeof(realw)),5322);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy),
- epsilondev_size*sizeof(realw)),5323);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz),
- epsilondev_size*sizeof(realw)),5324);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz),
- epsilondev_size*sizeof(realw)),5325);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,
- epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5326);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,
- epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5327);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,
- epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5328);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,
- epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5329);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,
- epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5330);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_xx,b_epsilondev_xx,epsilondev_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_yy,b_epsilondev_yy,epsilondev_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_xy,b_epsilondev_xy,epsilondev_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_xz,b_epsilondev_xz,epsilondev_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_yz,b_epsilondev_yz,epsilondev_size);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx),
+// epsilondev_size*sizeof(realw)),5321);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy),
+// epsilondev_size*sizeof(realw)),5322);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy),
+// epsilondev_size*sizeof(realw)),5323);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz),
+// epsilondev_size*sizeof(realw)),5324);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz),
+// epsilondev_size*sizeof(realw)),5325);
+
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,
+// epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5326);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,
+// epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5327);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,
+// epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5328);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,
+// epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5329);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,
+// epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5330);
}
// attenuation memory variables
if( *ATTENUATION ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xx),
- (*R_size)*sizeof(realw)),5421);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xx,b_R_xx,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),5422);
+ copy_todevice_realw((void**)&mp->d_b_R_xx,b_R_xx,(*R_size));
+ copy_todevice_realw((void**)&mp->d_b_R_yy,b_R_yy,(*R_size));
+ copy_todevice_realw((void**)&mp->d_b_R_xy,b_R_xy,(*R_size));
+ copy_todevice_realw((void**)&mp->d_b_R_xz,b_R_xz,(*R_size));
+ copy_todevice_realw((void**)&mp->d_b_R_yz,b_R_yz,(*R_size));
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yy),
- (*R_size)*sizeof(realw)),5423);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yy,b_R_yy,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),5424);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xx),
+// (*R_size)*sizeof(realw)),5421);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xx,b_R_xx,(*R_size)*sizeof(realw),
+// cudaMemcpyHostToDevice),5422);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xy),
- (*R_size)*sizeof(realw)),5425);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xy,b_R_xy,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),5426);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yy),
+// (*R_size)*sizeof(realw)),5423);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yy,b_R_yy,(*R_size)*sizeof(realw),
+// cudaMemcpyHostToDevice),5424);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xz),
- (*R_size)*sizeof(realw)),5427);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xz,b_R_xz,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),5428);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xy),
+// (*R_size)*sizeof(realw)),5425);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xy,b_R_xy,(*R_size)*sizeof(realw),
+// cudaMemcpyHostToDevice),5426);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yz),
- (*R_size)*sizeof(realw)),5429);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yz,b_R_yz,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),5420);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xz),
+// (*R_size)*sizeof(realw)),5427);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xz,b_R_xz,(*R_size)*sizeof(realw),
+// cudaMemcpyHostToDevice),5428);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yz),
+// (*R_size)*sizeof(realw)),5429);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yz,b_R_yz,(*R_size)*sizeof(realw),
+// cudaMemcpyHostToDevice),5420);
+
// alpha,beta,gamma factors for backward fields
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_alphaval),
- N_SLS*sizeof(realw)),5434);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5435);
+ copy_todevice_realw((void**)&mp->d_b_alphaval,b_alphaval,N_SLS);
+ copy_todevice_realw((void**)&mp->d_b_betaval,b_betaval,N_SLS);
+ copy_todevice_realw((void**)&mp->d_b_gammaval,b_gammaval,N_SLS);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_betaval),
- N_SLS*sizeof(realw)),5436);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5437);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_alphaval),
+// N_SLS*sizeof(realw)),5434);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval,
+// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5435);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_gammaval),
- N_SLS*sizeof(realw)),5438);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5439);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_betaval),
+// N_SLS*sizeof(realw)),5436);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval,
+// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5437);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_gammaval),
+// N_SLS*sizeof(realw)),5438);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval,
+// N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5439);
}
if( *APPROXIMATE_HESS_KL ){
@@ -1148,7 +1278,6 @@
int* free_surface_ispec,
int* free_surface_ijk,
int* num_free_surface_faces,
- int* SIMULATION_TYPE,
int* NOISE_TOMOGRAPHY,
int* NSTEP,
realw* noise_sourcearray,
@@ -1165,53 +1294,69 @@
// free surface
mp->num_free_surface_faces = *num_free_surface_faces;
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec,
- mp->num_free_surface_faces*sizeof(int)),7001);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec,
- mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),7002);
+ copy_todevice_int((void**)&mp->d_free_surface_ispec,free_surface_ispec,mp->num_free_surface_faces);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk,
- 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),7003);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
- 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),7004);
+// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec,
+// mp->num_free_surface_faces*sizeof(int)),7001);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec,
+// mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),7002);
+ copy_todevice_int((void**)&mp->d_free_surface_ijk,free_surface_ijk,
+ 3*NGLL2*mp->num_free_surface_faces);
+
+// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk,
+// 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),7003);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+// 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),7004);
+
// alloc storage for the surface buffer to be copied
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie,
3*NGLL2*mp->num_free_surface_faces*sizeof(realw)),7005);
// prepares noise source array
if( *NOISE_TOMOGRAPHY == 1 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
- 3*NGLL3*(*NSTEP)*sizeof(realw)),7101);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_sourcearray, noise_sourcearray,
- 3*NGLL3*(*NSTEP)*sizeof(realw),cudaMemcpyHostToDevice),7102);
+ copy_todevice_realw((void**)&mp->d_noise_sourcearray,noise_sourcearray,
+ 3*NGLL3*(*NSTEP));
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
+// 3*NGLL3*(*NSTEP)*sizeof(realw)),7101);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_sourcearray, noise_sourcearray,
+// 3*NGLL3*(*NSTEP)*sizeof(realw),cudaMemcpyHostToDevice),7102);
}
// prepares noise directions
if( *NOISE_TOMOGRAPHY > 1 ){
int nface_size = NGLL2*(*num_free_surface_faces);
// allocates memory on GPU
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_x_noise,
- nface_size*sizeof(realw)),7301);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_y_noise,
- nface_size*sizeof(realw)),7302);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise,
- nface_size*sizeof(realw)),7303);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise,
- nface_size*sizeof(realw)),7304);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_free_surface_jacobian2Dw,
- nface_size*sizeof(realw)),7305);
+ copy_todevice_realw((void**)&mp->d_normal_x_noise,normal_x_noise,nface_size);
+ copy_todevice_realw((void**)&mp->d_normal_y_noise,normal_y_noise,nface_size);
+ copy_todevice_realw((void**)&mp->d_normal_z_noise,normal_z_noise,nface_size);
+
+ copy_todevice_realw((void**)&mp->d_mask_noise,mask_noise,nface_size);
+ copy_todevice_realw((void**)&mp->d_free_surface_jacobian2Dw,free_surface_jacobian2Dw,nface_size);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_x_noise,
+// nface_size*sizeof(realw)),7301);
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_y_noise,
+// nface_size*sizeof(realw)),7302);
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise,
+// nface_size*sizeof(realw)),7303);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise,
+// nface_size*sizeof(realw)),7304);
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_free_surface_jacobian2Dw,
+// nface_size*sizeof(realw)),7305);
// transfers data onto GPU
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7306);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7307);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7308);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7309);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7310);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise,
+// nface_size*sizeof(realw),cudaMemcpyHostToDevice),7306);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise,
+// nface_size*sizeof(realw),cudaMemcpyHostToDevice),7307);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise,
+// nface_size*sizeof(realw),cudaMemcpyHostToDevice),7308);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise,
+// nface_size*sizeof(realw),cudaMemcpyHostToDevice),7309);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw,
+// nface_size*sizeof(realw),cudaMemcpyHostToDevice),7310);
}
// prepares noise strength kernel
@@ -1254,17 +1399,22 @@
mp->gravity = *GRAVITY;
if( mp->gravity ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_minus_deriv_gravity),
- (mp->NGLOB_AB)*sizeof(realw)),8000);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_deriv_gravity, minus_deriv_gravity,
- (mp->NGLOB_AB)*sizeof(realw),cudaMemcpyHostToDevice),8001);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_minus_g),
- (mp->NGLOB_AB)*sizeof(realw)),8002);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_g, minus_g,
- (mp->NGLOB_AB)*sizeof(realw),cudaMemcpyHostToDevice),8003);
+ copy_todevice_realw((void**)&mp->d_minus_deriv_gravity,minus_deriv_gravity,mp->NGLOB_AB);
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_minus_deriv_gravity),
+// (mp->NGLOB_AB)*sizeof(realw)),8000);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_deriv_gravity, minus_deriv_gravity,
+// (mp->NGLOB_AB)*sizeof(realw),cudaMemcpyHostToDevice),8001);
+ copy_todevice_realw((void**)&mp->d_minus_g,minus_g,mp->NGLOB_AB);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_minus_g),
+// (mp->NGLOB_AB)*sizeof(realw)),8002);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_g, minus_g,
+// (mp->NGLOB_AB)*sizeof(realw),cudaMemcpyHostToDevice),8003);
+
+
if( *ACOUSTIC_SIMULATION == 0 ){
// rhostore not allocated yet
int size_padded = NGLL3_PADDED * (mp->NSPEC_AB);
@@ -1280,6 +1430,8 @@
}
+/* ----------------------------------------------------------------------------------------------- */
+
extern "C"
void FC_FUNC_(prepare_seismogram_fields,
PREPARE_SEISMOGRAM_FIELDS)(long* Mesh_pointer,int* nrec_local, double* nu, double* hxir, double* hetar, double* hgammar) {
@@ -1287,19 +1439,19 @@
TRACE("prepare_constants_device");
Mesh* mp = (Mesh*)(*Mesh_pointer);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_nu),3*3* *nrec_local*sizeof(double)),8100);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hxir),5* *nrec_local*sizeof(double)),8100);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hetar),5* *nrec_local*sizeof(double)),8100);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hgammar),5* *nrec_local*sizeof(double)),8100);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_nu),3*3*(*nrec_local)*sizeof(double)),8100);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hxir),5*(*nrec_local)*sizeof(double)),8100);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hetar),5*(*nrec_local)*sizeof(double)),8100);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hgammar),5*(*nrec_local)*sizeof(double)),8100);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_d,3**nrec_local*sizeof(realw)),8101);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_v,3**nrec_local*sizeof(realw)),8101);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_a,3**nrec_local*sizeof(realw)),8101);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_d,3*(*nrec_local)*sizeof(realw)),8101);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_v,3*(*nrec_local)*sizeof(realw)),8101);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_seismograms_a,3*(*nrec_local)*sizeof(realw)),8101);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nu,nu,3*3* *nrec_local*sizeof(double),cudaMemcpyHostToDevice),8101);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_hxir,hxir,5* *nrec_local*sizeof(double),cudaMemcpyHostToDevice),8101);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_hetar,hetar,5* *nrec_local*sizeof(double),cudaMemcpyHostToDevice),8101);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_hgammar,hgammar,5* *nrec_local*sizeof(double),cudaMemcpyHostToDevice),8101);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nu,nu,3*3*(*nrec_local)*sizeof(double),cudaMemcpyHostToDevice),8101);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_hxir,hxir,5*(*nrec_local)*sizeof(double),cudaMemcpyHostToDevice),8101);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_hetar,hetar,5*(*nrec_local)*sizeof(double),cudaMemcpyHostToDevice),8101);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_hgammar,hgammar,5*(*nrec_local)*sizeof(double),cudaMemcpyHostToDevice),8101);
cudaMallocHost((void**)&mp->h_seismograms_d_it,3**nrec_local*sizeof(realw));
cudaMallocHost((void**)&mp->h_seismograms_v_it,3**nrec_local*sizeof(realw));
@@ -1315,7 +1467,6 @@
extern "C"
void FC_FUNC_(prepare_cleanup_device,
PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE,
int* SAVE_FORWARD,
int* ACOUSTIC_SIMULATION,
int* ELASTIC_SIMULATION,
@@ -1362,7 +1513,7 @@
cudaFree(mp->d_ibool);
// sources
- if (*SIMULATION_TYPE == 1 || *SIMULATION_TYPE == 3){
+ if (mp->simulation_type == 1 || mp->simulation_type == 3){
cudaFree(mp->d_sourcearrays);
cudaFree(mp->d_stf_pre_compute);
}
@@ -1393,7 +1544,7 @@
if( *ABSORBING_CONDITIONS ) cudaFree(mp->d_b_absorb_potential);
- if( *SIMULATION_TYPE == 3 ) {
+ if( mp->simulation_type == 3 ) {
cudaFree(mp->d_b_potential_acoustic);
cudaFree(mp->d_b_potential_dot_acoustic);
cudaFree(mp->d_b_potential_dot_dot_acoustic);
@@ -1416,8 +1567,11 @@
cudaFree(mp->d_veloc);
cudaFree(mp->d_accel);
cudaFree(mp->d_send_accel_buffer);
- cudaFree(mp->d_rmass);
+ cudaFree(mp->d_rmassx);
+ cudaFree(mp->d_rmassy);
+ cudaFree(mp->d_rmassz);
+
cudaFree(mp->d_phase_ispec_inner_elastic);
cudaFree(mp->d_ispec_is_elastic);
@@ -1430,11 +1584,11 @@
cudaFree(mp->d_rho_vp);
cudaFree(mp->d_rho_vs);
- if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD ))
+ if(mp->simulation_type == 3 || ( mp->simulation_type == 1 && *SAVE_FORWARD ))
cudaFree(mp->d_b_absorb_field);
}
- if( *SIMULATION_TYPE == 3 ) {
+ if( mp->simulation_type == 3 ) {
cudaFree(mp->d_b_displ);
cudaFree(mp->d_b_veloc);
cudaFree(mp->d_b_accel);
@@ -1450,7 +1604,7 @@
cudaFree(mp->d_epsilondev_xy);
cudaFree(mp->d_epsilondev_xz);
cudaFree(mp->d_epsilondev_yz);
- if( *SIMULATION_TYPE == 3 ){
+ if( mp->simulation_type == 3 ){
cudaFree(mp->d_epsilon_trace_over_3);
cudaFree(mp->d_b_epsilon_trace_over_3);
cudaFree(mp->d_b_epsilondev_xx);
@@ -1472,7 +1626,7 @@
cudaFree(mp->d_R_xy);
cudaFree(mp->d_R_xz);
cudaFree(mp->d_R_yz);
- if( *SIMULATION_TYPE == 3){
+ if( mp->simulation_type == 3){
cudaFree(mp->d_b_R_xx);
cudaFree(mp->d_b_R_yy);
cudaFree(mp->d_b_R_xy);
@@ -1522,7 +1676,7 @@
} // ELASTIC_SIMULATION
// purely adjoint & kernel array
- if( *SIMULATION_TYPE == 2 || *SIMULATION_TYPE == 3 ){
+ if( mp->simulation_type == 2 || mp->simulation_type == 3 ){
if(mp->nadj_rec_local > 0 ){
cudaFree(mp->d_adj_sourcearrays);
cudaFree(mp->d_pre_computed_irec);
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-09-03 03:54:59 UTC (rev 20674)
@@ -79,13 +79,11 @@
void FC_FUNC_(get_norm_acoustic_from_device,
GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
- long* Mesh_pointer_f,
- int* SIMULATION_TYPE) {}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(get_norm_elastic_from_device,
GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
- long* Mesh_pointer_f,
- int* SIMULATION_TYPE) {}
+ long* Mesh_pointer_f) {}
//
@@ -96,7 +94,6 @@
COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* NSOURCESf,
- int* SIMULATION_TYPEf,
double* h_stf_pre_compute,
int* myrankf) {}
@@ -104,7 +101,6 @@
COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* NSOURCESf,
- int* SIMULATION_TYPEf,
double* h_stf_pre_compute,
int* myrankf) {}
@@ -170,16 +166,17 @@
void FC_FUNC_(compute_coupling_ac_el_cuda,
COMPUTE_COUPLING_AC_EL_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
- int* num_coupling_ac_el_facesf,
- int* SIMULATION_TYPEf) {}
+ int* num_coupling_ac_el_facesf) {}
void FC_FUNC_(compute_coupling_el_ac_cuda,
COMPUTE_COUPLING_EL_AC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
- int* num_coupling_ac_el_facesf,
- int* SIMULATION_TYPEf) {}
+ int* num_coupling_ac_el_facesf) {}
+void FC_FUNC_(compute_coupling_ocean_cuda,
+ COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f) {}
+
//
// src/cuda/compute_forces_acoustic_cuda.cu
//
@@ -211,25 +208,11 @@
COMPUTE_FORCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
int* iphase,
int* nspec_outer_acoustic,
- int* nspec_inner_acoustic,
- int* SIMULATION_TYPE) {}
+ int* nspec_inner_acoustic) {}
-void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
- long* Mesh_pointer,
- int* size_F,
- int* SIMULATION_TYPE) {}
-
-void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
- long* Mesh_pointer,
- int* size_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE,
- realw* b_deltatover2_F) {}
-
void FC_FUNC_(acoustic_enforce_free_surf_cuda,
ACOUSTIC_ENFORCE_FREE_SURF_CUDA)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE,
- int* ABSORB_FREE_SURFACE) {}
+ int* ABSORB_FREE_SURFACE) {}
//
@@ -278,7 +261,6 @@
int* iphase,
int* nspec_outer_elastic,
int* nspec_inner_elastic,
- int* SIMULATION_TYPE,
int* COMPUTE_AND_STORE_STRAIN,
int* ATTENUATION,
int* ANISOTROPY) {}
@@ -288,26 +270,7 @@
int* iphase,
realw* send_buffer) {}
-void FC_FUNC_(kernel_3_a_cuda,
- KERNEL_3_A_CUDA)(long* Mesh_pointer,
- int* size_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F,
- int* OCEANS) {}
-void FC_FUNC_(kernel_3_b_cuda,
- KERNEL_3_B_CUDA)(long* Mesh_pointer,
- int* size_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F) {}
-
-void FC_FUNC_(elastic_ocean_load_cuda,
- ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE) {}
-
-
//
// src/cuda/compute_kernels_cuda.cu
//
@@ -338,12 +301,10 @@
//
void FC_FUNC_(compute_stacey_acoustic_cuda,
- COMPUTE_STACEY_ACOUSTIC_CUDA)(
- long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* SIMULATION_TYPEf,
- int* SAVE_FORWARDf,
- realw* h_b_absorb_potential) {}
+ COMPUTE_STACEY_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* SAVE_FORWARDf,
+ realw* h_b_absorb_potential) {}
//
@@ -353,7 +314,6 @@
void FC_FUNC_(compute_stacey_elastic_cuda,
COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
- int* SIMULATION_TYPEf,
int* SAVE_FORWARDf,
realw* h_b_absorb_field) {}
@@ -375,14 +335,13 @@
void FC_FUNC_(it_update_displacement_cuda,
IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f,
- int* size_F,
- realw* deltat_F,
- realw* deltatsqover2_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F) {}
+ int* size_F,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {}
void FC_FUNC_(it_update_displacement_ac_cuda,
it_update_displacement_ac_cuda)(long* Mesh_pointer_f,
@@ -390,12 +349,34 @@
realw* deltat_F,
realw* deltatsqover2_F,
realw* deltatover2_F,
- int* SIMULATION_TYPE,
realw* b_deltat_F,
realw* b_deltatsqover2_F,
realw* b_deltatover2_F) {}
+void FC_FUNC_(kernel_3_a_cuda,
+ KERNEL_3_A_CUDA)(long* Mesh_pointer,
+ int* size_F,
+ realw* deltatover2_F,
+ realw* b_deltatover2_F,
+ int* OCEANS) {}
+void FC_FUNC_(kernel_3_b_cuda,
+ KERNEL_3_B_CUDA)(long* Mesh_pointer,
+ int* size_F,
+ realw* deltatover2_F,
+ realw* b_deltatover2_F) {}
+
+void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
+ long* Mesh_pointer,
+ int* size_F) {}
+
+void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
+ long* Mesh_pointer,
+ int* size_F,
+ realw* deltatover2_F,
+ realw* b_deltatover2_F) {}
+
+
//
// src/cuda/noise_tomography_cuda.cu
//
@@ -475,7 +456,6 @@
int* num_free_surface_faces,
int* free_surface_ispec,
int* free_surface_ijk,
- int* ABSORBING_CONDITIONS,
int* b_reclen_potential,
realw* b_absorb_potential,
int* ELASTIC_SIMULATION,
@@ -490,22 +470,22 @@
void FC_FUNC_(prepare_fields_acoustic_adj_dev,
PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE,
int* APPROXIMATE_HESS_KL) {}
void FC_FUNC_(prepare_fields_elastic_device,
PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
int* size,
- realw* rmass,
+ realw* rmassx,
+ realw* rmassy,
+ realw* rmassz,
realw* rho_vp,
realw* rho_vs,
int* num_phase_ispec_elastic,
int* phase_ispec_inner_elastic,
int* ispec_is_elastic,
- int* ABSORBING_CONDITIONS,
realw* h_b_absorb_field,
int* h_b_reclen_field,
- int* SIMULATION_TYPE,int* SAVE_FORWARD,
+ int* SAVE_FORWARD,
int* COMPUTE_AND_STORE_STRAIN,
realw* epsilondev_xx,realw* epsilondev_yy,realw* epsilondev_xy,
realw* epsilondev_xz,realw* epsilondev_yz,
@@ -551,7 +531,6 @@
void FC_FUNC_(prepare_fields_elastic_adj_dev,
PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
int* size,
- int* SIMULATION_TYPE,
int* COMPUTE_AND_STORE_STRAIN,
realw* epsilon_trace_over_3,
realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy,
@@ -578,7 +557,6 @@
int* free_surface_ispec,
int* free_surface_ijk,
int* num_free_surface_faces,
- int* SIMULATION_TYPE,
int* NOISE_TOMOGRAPHY,
int* NSTEP,
realw* noise_sourcearray,
@@ -602,7 +580,6 @@
void FC_FUNC_(prepare_cleanup_device,
PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE,
int* SAVE_FORWARD,
int* ACOUSTIC_SIMULATION,
int* ELASTIC_SIMULATION,
@@ -774,7 +751,6 @@
void FC_FUNC_(transfer_seismograms_el_from_d,
TRANSFER_SEISMOGRAMS_EL_FROM_D)(int* nrec_local,
long* Mesh_pointer_f,
- int* SIMULATION_TYPEf,
realw* seismograms_d,
realw* seismograms_v,
realw* seismograms_a,
@@ -785,7 +761,7 @@
realw* b_displ, realw* b_veloc, realw* b_accel,
long* Mesh_pointer_f,int* number_receiver_global,
int* ispec_selected_rec,int* ispec_selected_source,
- int* ibool,int* SIMULATION_TYPEf) {}
+ int* ibool) {}
void FC_FUNC_(transfer_station_ac_from_device,
TRANSFER_STATION_AC_FROM_DEVICE)(
@@ -799,6 +775,5 @@
int* number_receiver_global,
int* ispec_selected_rec,
int* ispec_selected_source,
- int* ibool,
- int* SIMULATION_TYPEf) {}
+ int* ibool) {}
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/write_seismograms_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/write_seismograms_cuda.cu 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/write_seismograms_cuda.cu 2012-09-03 03:54:59 UTC (rev 20674)
@@ -195,7 +195,6 @@
void FC_FUNC_(transfer_seismograms_el_from_d,
TRANSFER_SEISMOGRAMS_EL_FROM_D)(int* nrec_local,
long* Mesh_pointer_f,
- int* SIMULATION_TYPEf,
realw* seismograms_d,
realw* seismograms_v,
realw* seismograms_a,
@@ -345,16 +344,15 @@
realw* b_displ, realw* b_veloc, realw* b_accel,
long* Mesh_pointer_f,int* number_receiver_global,
int* ispec_selected_rec,int* ispec_selected_source,
- int* ibool,int* SIMULATION_TYPEf) {
+ int* ibool) {
TRACE("transfer_station_el_from_device");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
// checks if anything to do
if( mp->nrec_local == 0 ) return;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
-
- if(SIMULATION_TYPE == 1) {
+ if(mp->simulation_type == 1) {
transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
@@ -362,7 +360,7 @@
transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
}
- else if(SIMULATION_TYPE == 2) {
+ else if(mp->simulation_type == 2) {
transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
mp->d_ispec_selected_source, ispec_selected_source, ibool);
transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
@@ -370,7 +368,7 @@
transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
mp->d_ispec_selected_source, ispec_selected_source, ibool);
}
- else if(SIMULATION_TYPE == 3) {
+ else if(mp->simulation_type == 3) {
transfer_field_from_device(mp,mp->d_b_displ,b_displ, number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
transfer_field_from_device(mp,mp->d_b_veloc,b_veloc, number_receiver_global,
@@ -420,7 +418,7 @@
int irec_local,irec,ispec,iglob,j;
// checks if anything to do
- if( mp->nrec_local == 0 ) return;
+ if( mp->nrec_local < 1 ) return;
// sets up kernel dimensions
int blocksize = NGLL3;
@@ -442,8 +440,12 @@
d_potential);
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_field_acoustic_from_device kernel");
+#endif
+
print_CUDA_error_if_any(cudaMemcpy(mp->h_station_seismo_potential,mp->d_station_seismo_potential,
- mp->nrec_local*NGLL3*sizeof(realw),cudaMemcpyDeviceToHost),500);
+ mp->nrec_local*NGLL3*sizeof(realw),cudaMemcpyDeviceToHost),55000);
//printf("copy local receivers: %i \n",mp->nrec_local);
@@ -483,19 +485,17 @@
int* number_receiver_global,
int* ispec_selected_rec,
int* ispec_selected_source,
- int* ibool,
- int* SIMULATION_TYPEf) {
+ int* ibool) {
TRACE("transfer_station_ac_from_device");
//double start_time = get_time();
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
// checks if anything to do
if( mp->nrec_local == 0 ) return;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
-
- if(SIMULATION_TYPE == 1) {
+ if(mp->simulation_type == 1) {
transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
@@ -506,7 +506,7 @@
number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
}
- else if(SIMULATION_TYPE == 2) {
+ else if(mp->simulation_type == 2) {
transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
number_receiver_global,
mp->d_ispec_selected_source, ispec_selected_source, ibool);
@@ -517,7 +517,7 @@
number_receiver_global,
mp->d_ispec_selected_source, ispec_selected_source, ibool);
}
- else if(SIMULATION_TYPE == 3) {
+ else if(mp->simulation_type == 3) {
transfer_field_acoustic_from_device(mp,mp->d_b_potential_acoustic,b_potential_acoustic,
number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_mass_matrices.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_mass_matrices.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_mass_matrices.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -44,97 +44,142 @@
integer :: ispec,i,j,k,iglob,ier
real(kind=CUSTOM_REAL) :: rho_s, rho_f, rho_bar, phi, tort
-! allocates memory
- allocate(rmass(nglob),stat=ier); if(ier /= 0) stop 'error in allocate rmass'
- allocate(rmass_acoustic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate rmass_acoustic'
- allocate(rmass_solid_poroelastic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate'
- allocate(rmass_fluid_poroelastic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate'
+ ! elastic domains
+ if( ELASTIC_SIMULATION ) then
+ ! allocates memory
+ allocate(rmass(nglob),stat=ier); if(ier /= 0) stop 'error in allocate rmass'
+ rmass(:) = 0._CUSTOM_REAL
-! creates mass matrix
- rmass(:) = 0._CUSTOM_REAL
- rmass_acoustic(:) = 0._CUSTOM_REAL
- rmass_solid_poroelastic(:) = 0._CUSTOM_REAL
- rmass_fluid_poroelastic(:) = 0._CUSTOM_REAL
+ ! elastic mass matrix
+ do ispec=1,nspec
+ if( ispec_is_elastic(ispec) ) then
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
- do ispec=1,nspec
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
+ weight = wxgll(i)*wygll(j)*wzgll(k)
+ jacobianl = jacobianstore(i,j,k,ispec)
- weight = wxgll(i)*wygll(j)*wzgll(k)
- jacobianl = jacobianstore(i,j,k,ispec)
+ if(CUSTOM_REAL == SIZE_REAL) then
+ rmass(iglob) = rmass(iglob) + &
+ sngl( dble(jacobianl) * weight * dble(rhostore(i,j,k,ispec)) )
+ else
+ rmass(iglob) = rmass(iglob) + &
+ jacobianl * weight * rhostore(i,j,k,ispec)
+ endif
+ enddo
+ enddo
+ enddo
+ endif
+ enddo ! nspec
+ endif
-! acoustic mass matrix
- if( ispec_is_acoustic(ispec) ) then
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- rmass_acoustic(iglob) = rmass_acoustic(iglob) + &
- sngl( dble(jacobianl) * weight / dble(kappastore(i,j,k,ispec)) )
- else
- rmass_acoustic(iglob) = rmass_acoustic(iglob) + &
- jacobianl * weight / kappastore(i,j,k,ispec)
- endif
- endif
+ ! acoustic domains
+ if( ACOUSTIC_SIMULATION) then
+ ! allocates memory
+ allocate(rmass_acoustic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate rmass_acoustic'
+ rmass_acoustic(:) = 0._CUSTOM_REAL
-! elastic mass matrix
- if( ispec_is_elastic(ispec) ) then
- if(CUSTOM_REAL == SIZE_REAL) then
- rmass(iglob) = rmass(iglob) + &
- sngl( dble(jacobianl) * weight * dble(rhostore(i,j,k,ispec)) )
- else
- rmass(iglob) = rmass(iglob) + &
- jacobianl * weight * rhostore(i,j,k,ispec)
- endif
- endif
+ ! acoustic mass matrix
+ do ispec=1,nspec
+ if( ispec_is_acoustic(ispec) ) then
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
-! poroelastic mass matrices
- if( ispec_is_poroelastic(ispec) ) then
+ weight = wxgll(i)*wygll(j)*wzgll(k)
+ jacobianl = jacobianstore(i,j,k,ispec)
- rho_s = rhoarraystore(1,i,j,k,ispec)
- rho_f = rhoarraystore(2,i,j,k,ispec)
- phi = phistore(i,j,k,ispec)
- tort = tortstore(i,j,k,ispec)
- rho_bar = (1._CUSTOM_REAL-phi)*rho_s + phi*rho_f
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ rmass_acoustic(iglob) = rmass_acoustic(iglob) + &
+ sngl( dble(jacobianl) * weight / dble(kappastore(i,j,k,ispec)) )
+ else
+ rmass_acoustic(iglob) = rmass_acoustic(iglob) + &
+ jacobianl * weight / kappastore(i,j,k,ispec)
+ endif
+ enddo
+ enddo
+ enddo
+ endif
+ enddo ! nspec
+ endif
- if(CUSTOM_REAL == SIZE_REAL) then
- ! for the solid mass matrix
- rmass_solid_poroelastic(iglob) = rmass_solid_poroelastic(iglob) + &
- sngl( dble(jacobianl) * weight * dble(rho_bar - phi*rho_f/tort) )
+ ! poroelastic domains
+ if( POROELASTIC_SIMULATION) then
+ ! allocates memory
+ allocate(rmass_solid_poroelastic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate'
+ allocate(rmass_fluid_poroelastic(nglob),stat=ier); if(ier /= 0) stop 'error in allocate'
+ rmass_solid_poroelastic(:) = 0._CUSTOM_REAL
+ rmass_fluid_poroelastic(:) = 0._CUSTOM_REAL
- ! for the fluid mass matrix
- rmass_fluid_poroelastic(iglob) = rmass_fluid_poroelastic(iglob) + &
- sngl( dble(jacobianl) * weight * dble(rho_bar*rho_f*tort - &
- phi*rho_f*rho_f)/dble(rho_bar*phi) )
- else
- rmass_solid_poroelastic(iglob) = rmass_solid_poroelastic(iglob) + &
- jacobianl * weight * (rho_bar - phi*rho_f/tort)
+ ! poroelastic mass matrices
+ do ispec=1,nspec
+ if( ispec_is_poroelastic(ispec) ) then
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
- rmass_fluid_poroelastic(iglob) = rmass_fluid_poroelastic(iglob) + &
- jacobianl * weight * (rho_bar*rho_f*tort - &
- phi*rho_f*rho_f) / (rho_bar*phi)
- endif
+ weight = wxgll(i)*wygll(j)*wzgll(k)
+ jacobianl = jacobianstore(i,j,k,ispec)
- endif
+ rho_s = rhoarraystore(1,i,j,k,ispec)
+ rho_f = rhoarraystore(2,i,j,k,ispec)
+ phi = phistore(i,j,k,ispec)
+ tort = tortstore(i,j,k,ispec)
+ rho_bar = (1._CUSTOM_REAL-phi)*rho_s + phi*rho_f
+ if(CUSTOM_REAL == SIZE_REAL) then
+ ! for the solid mass matrix
+ rmass_solid_poroelastic(iglob) = rmass_solid_poroelastic(iglob) + &
+ sngl( dble(jacobianl) * weight * dble(rho_bar - phi*rho_f/tort) )
+
+ ! for the fluid mass matrix
+ rmass_fluid_poroelastic(iglob) = rmass_fluid_poroelastic(iglob) + &
+ sngl( dble(jacobianl) * weight * dble(rho_bar*rho_f*tort - &
+ phi*rho_f*rho_f)/dble(rho_bar*phi) )
+ else
+ rmass_solid_poroelastic(iglob) = rmass_solid_poroelastic(iglob) + &
+ jacobianl * weight * (rho_bar - phi*rho_f/tort)
+
+ rmass_fluid_poroelastic(iglob) = rmass_fluid_poroelastic(iglob) + &
+ jacobianl * weight * (rho_bar*rho_f*tort - &
+ phi*rho_f*rho_f) / (rho_bar*phi)
+ endif
+ enddo
+ enddo
enddo
- enddo
- enddo
- enddo ! nspec
+ endif
+ enddo ! nspec
+ endif
+ ! extra C*deltat/2 contribution to the mass matrices on Stacey edges
+ ! for absorbing condition
+ call create_mass_matrices_Stacey(nglob,nspec,ibool)
+
+ ! creates ocean load mass matrix
+ call create_mass_matrices_ocean_load(nglob,nspec,ibool)
+
+
end subroutine create_mass_matrices
!
!-------------------------------------------------------------------------------------------------
!
- subroutine create_mass_matrices_ocean_load(nglob,nspec,ibool,OCEANS,TOPOGRAPHY, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
- NX_TOPO,NY_TOPO,itopo_bathy)
+ subroutine create_mass_matrices_ocean_load(nglob,nspec,ibool)
! returns precomputed mass matrix in rmass array
+ use generate_databases_par,only: &
+ OCEANS,TOPOGRAPHY,UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+ NX_TOPO,NY_TOPO,itopo_bathy,myrank
+
use create_regions_mesh_ext_par
+
implicit none
! number of spectral elements in each block
@@ -143,15 +188,7 @@
! arrays with the mesh global indices
integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
- logical :: OCEANS,TOPOGRAPHY
- ! use integer array to store topography values
- integer :: UTM_PROJECTION_ZONE
- logical :: SUPPRESS_UTM_PROJECTION
-
- integer :: NX_TOPO,NY_TOPO
- integer, dimension(NX_TOPO,NY_TOPO) :: itopo_bathy
-
! local parameters
double precision :: weight
double precision :: elevation
@@ -164,6 +201,10 @@
! creates ocean load mass matrix
if(OCEANS) then
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...creating ocean load mass matrix '
+ endif
+
! adding ocean load mass matrix at ocean bottom
NGLOB_OCEAN = nglob
allocate(rmass_ocean_load(NGLOB_OCEAN),stat=ier)
@@ -244,3 +285,144 @@
endif
end subroutine create_mass_matrices_ocean_load
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine create_mass_matrices_Stacey(nglob,nspec,ibool)
+
+! in the case of stacey boundary conditions, add C*deltat/2 contribution to the mass matrix
+! on Stacey edges for the crust_mantle and outer_core regions but not for the inner_core region
+! thus the mass matrix must be replaced by three mass matrices including the "C" damping matrix
+
+ use generate_databases_par,only: &
+ DT
+
+ use create_regions_mesh_ext_par
+
+ implicit none
+
+ integer :: nglob
+ integer :: nspec
+ integer,dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+ ! local parameters
+ real(kind=CUSTOM_REAL) :: jacobianw
+ real(kind=CUSTOM_REAL) :: deltat,deltatover2
+ real(kind=CUSTOM_REAL) :: tx,ty,tz,sn
+ real(kind=CUSTOM_REAL) :: nx,ny,nz,vn
+ integer :: ispec,iglob,i,j,k,iface,igll,ier
+
+ ! checks if anything to do
+ if( num_abs_boundary_faces > 0 ) then
+ nglob_xy = nglob
+ else
+ nglob_xy = 1
+ endif
+
+ ! elastic domains
+ if( ELASTIC_SIMULATION ) then
+ allocate( rmassx(nglob_xy), &
+ rmassy(nglob_xy), &
+ rmassz(nglob_xy), &
+ stat=ier)
+ if(ier /= 0) stop 'error in allocate 21'
+ rmassx(:) = 0._CUSTOM_REAL
+ rmassy(:) = 0._CUSTOM_REAL
+ rmassz(:) = 0._CUSTOM_REAL
+ endif
+
+ ! acoustic domains
+ if( ACOUSTIC_SIMULATION ) then
+ allocate( rmassz_acoustic(nglob_xy), &
+ stat=ier)
+ if(ier /= 0) stop 'error in allocate 22'
+ rmassz_acoustic(:) = 0._CUSTOM_REAL
+ endif
+
+ ! use the non-dimensional time step to make the mass matrix correction
+ if(CUSTOM_REAL == SIZE_REAL) then
+ deltat = sngl(DT)
+ deltatover2 = sngl(0.5d0*deltat)
+ else
+ deltat = DT
+ deltatover2 = 0.5d0*deltat
+ endif
+
+ ! adds contributions to mass matrix to stabilize stacey conditions
+ do iface=1,num_abs_boundary_faces
+
+ ispec = abs_boundary_ispec(iface)
+
+ ! elastic elements
+ if( ispec_is_elastic(ispec)) then
+ ! reference gll points on boundary face
+ do igll = 1,NGLLSQUARE
+ ! gets local indices for GLL point
+ i = abs_boundary_ijk(1,igll,iface)
+ j = abs_boundary_ijk(2,igll,iface)
+ k = abs_boundary_ijk(3,igll,iface)
+
+ ! gets velocity
+ iglob = ibool(i,j,k,ispec)
+
+ ! gets associated normal
+ nx = abs_boundary_normal(1,igll,iface)
+ ny = abs_boundary_normal(2,igll,iface)
+ nz = abs_boundary_normal(3,igll,iface)
+
+ vn = deltatover2*(nx+ny+nz)
+
+ ! C*deltat/2 contributions
+ tx = rho_vp(i,j,k,ispec)*vn*nx + rho_vs(i,j,k,ispec)*(deltatover2-vn*nx)
+ ty = rho_vp(i,j,k,ispec)*vn*ny + rho_vs(i,j,k,ispec)*(deltatover2-vn*ny)
+ tz = rho_vp(i,j,k,ispec)*vn*nz + rho_vs(i,j,k,ispec)*(deltatover2-vn*nz)
+
+ ! gets associated, weighted jacobian
+ jacobianw = abs_boundary_jacobian2Dw(igll,iface)
+
+ ! assembles mass matrix on global points
+ if(CUSTOM_REAL == SIZE_REAL) then
+ rmassx(iglob) = rmassx(iglob) + sngl(tx*jacobianw)
+ rmassy(iglob) = rmassy(iglob) + sngl(ty*jacobianw)
+ rmassz(iglob) = rmassz(iglob) + sngl(tz*jacobianw)
+ else
+ rmassx(iglob) = rmassx(iglob) + tx*jacobianw
+ rmassy(iglob) = rmassy(iglob) + ty*jacobianw
+ rmassz(iglob) = rmassz(iglob) + tz*jacobianw
+ endif
+ enddo
+ endif ! elastic
+
+ ! acoustic element
+ if( ispec_is_acoustic(ispec) ) then
+
+ ! reference gll points on boundary face
+ do igll = 1,NGLLSQUARE
+ ! gets local indices for GLL point
+ i = abs_boundary_ijk(1,igll,iface)
+ j = abs_boundary_ijk(2,igll,iface)
+ k = abs_boundary_ijk(3,igll,iface)
+
+ ! gets global index
+ iglob=ibool(i,j,k,ispec)
+
+ ! gets associated, weighted jacobian
+ jacobianw = abs_boundary_jacobian2Dw(igll,iface)
+
+ ! C * DT/2 contribution
+ sn = deltatover2/rho_vp(i,j,k,ispec)
+
+ if(CUSTOM_REAL == SIZE_REAL) then
+ rmassz_acoustic(iglob) = rmassz_acoustic(iglob) + sngl(jacobianw*sn)
+ else
+ rmassz_acoustic(iglob) = rmassz_acoustic(iglob) + jacobianw*sn
+ endif
+ enddo
+ endif ! acoustic
+ enddo
+
+ end subroutine create_mass_matrices_Stacey
+
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_regions_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_regions_mesh.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_regions_mesh.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -46,11 +46,10 @@
nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax,&
nodes_ibelm_bottom,nodes_ibelm_top, &
SAVE_MESH_FILES, &
- ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
+ ANISOTROPY,NPROC,OCEANS, &
ATTENUATION,USE_OLSEN_ATTENUATION, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
- NX_TOPO,NY_TOPO,itopo_bathy, &
nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho
+
use create_regions_mesh_ext_par
implicit none
@@ -176,15 +175,6 @@
endif
call create_mass_matrices(nglob_dummy,nspec,ibool)
-! creates ocean load mass matrix
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*) ' ...creating ocean load mass matrix '
- endif
- call create_mass_matrices_ocean_load(nglob_dummy,nspec,ibool,OCEANS,TOPOGRAPHY, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
- NX_TOPO,NY_TOPO,itopo_bathy)
-
! saves the binary mesh files
call sync_all()
if( myrank == 0) then
@@ -232,8 +222,7 @@
ispec_is_elastic,min_resolved_period,prname)
endif
-! cleanup
- if( .not. SAVE_MOHO_MESH ) deallocate(xstore_dummy,ystore_dummy,zstore_dummy)
+ ! cleanup
deallocate(xixstore,xiystore,xizstore,&
etaxstore,etaystore,etazstore,&
gammaxstore,gammaystore,gammazstore)
@@ -242,6 +231,22 @@
deallocate(rho_vpI,rho_vpII,rho_vsI)
deallocate(rhoarraystore,kappaarraystore,etastore,phistore,tortstore,permstore)
+ if( .not. SAVE_MOHO_MESH ) then
+ deallocate(xstore_dummy,ystore_dummy,zstore_dummy)
+ endif
+
+ if( ELASTIC_SIMULATION ) then
+ deallocate(rmass)
+ deallocate(rmassx,rmassy,rmassz)
+ endif
+ if( ACOUSTIC_SIMULATION) then
+ deallocate(rmass_acoustic)
+ deallocate(rmassz_acoustic)
+ endif
+ if( POROELASTIC_SIMULATION) then
+ deallocate(rmass_solid_poroelastic,rmass_fluid_poroelastic)
+ endif
+
end subroutine create_regions_mesh
!
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -320,6 +320,8 @@
write(IMAIN,'(a)',advance='yes') ' external'
case( IMODEL_IPATI )
write(IMAIN,'(a)',advance='yes') ' ipati'
+ case( IMODEL_IPATI_WATER )
+ write(IMAIN,'(a)',advance='yes') ' ipati_water'
end select
write(IMAIN,*)
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases_par.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases_par.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases_par.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -177,6 +177,11 @@
real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass,rmass_acoustic,&
rmass_solid_poroelastic,rmass_fluid_poroelastic
+! mass matrix contributions
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassx,rmassy,rmassz
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassz_acoustic
+ integer :: nglob_xy
+
! ocean load
integer :: NGLOB_OCEAN
real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_ocean_load
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/get_model.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/get_model.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/get_model.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -346,6 +346,7 @@
enddo
! sets simulation domain flags
+ ! all processes will have e.g. acoustic_simulation flag set if any flag is .true. somewhere
call any_all_l( ANY(ispec_is_acoustic), ACOUSTIC_SIMULATION )
call any_all_l( ANY(ispec_is_elastic), ELASTIC_SIMULATION )
call any_all_l( ANY(ispec_is_poroelastic), POROELASTIC_SIMULATION )
@@ -409,7 +410,7 @@
! selects chosen velocity model
select case( IMODEL )
- case( IMODEL_DEFAULT,IMODEL_GLL,IMODEL_IPATI )
+ case( IMODEL_DEFAULT,IMODEL_GLL,IMODEL_IPATI,IMODEL_IPATI_WATER )
! material values determined by mesh properties
call model_default(materials_ext_mesh,nmat_ext_mesh, &
undef_mat_prop,nundefMat_ext_mesh, &
@@ -485,7 +486,9 @@
! reads in material parameters from external binary files
use generate_databases_par,only: IMODEL
+
use create_regions_mesh_ext_par
+
implicit none
! number of spectral elements in each block
@@ -505,9 +508,15 @@
! import the model from files in SPECFEM format
! note that those those files should be saved in LOCAL_PATH
call model_gll(myrank,nspec,LOCAL_PATH)
+
case( IMODEL_IPATI )
! import the model from modified files in SPECFEM format
call model_ipati(myrank,nspec,LOCAL_PATH)
+
+ case( IMODEL_IPATI_WATER )
+ ! import the model from modified files in SPECFEM format
+ call model_ipati_water(myrank,nspec,LOCAL_PATH)
+
end select
end subroutine get_model_binaries
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/memory_eval.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/memory_eval.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/memory_eval.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -88,7 +88,7 @@
memory_size = memory_size + 3.d0*dble(NDIM)*NGLOB_AB*dble(CUSTOM_REAL)
! rmass
- memory_size = memory_size + NGLOB_AB*dble(CUSTOM_REAL)
+ memory_size = memory_size + 3*NGLOB_AB*dble(CUSTOM_REAL)
! rho_vp,rho_vs
memory_size = memory_size + 2.d0*dble(NGLLX)*dble(NGLLY)*dble(NGLLZ)*NSPEC_AB*dble(CUSTOM_REAL)
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/model_1d_socal.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/model_1d_socal.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/model_1d_socal.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -37,7 +37,6 @@
! given a GLL point, returns super-imposed velocity model values
- use generate_databases_par,only: nspec => NSPEC_AB,ibool
use create_regions_mesh_ext_par
implicit none
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/model_ipati.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/model_ipati.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/model_ipati.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -111,3 +111,97 @@
deallocate( rho_read,vp_read,vs_read)
end subroutine model_ipati
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine model_ipati_water(myrank,nspec,LOCAL_PATH)
+
+ use create_regions_mesh_ext_par
+ implicit none
+
+ integer, intent(in) :: myrank,nspec
+ character(len=256) :: LOCAL_PATH
+
+ ! local parameters
+ real, dimension(:,:,:,:),allocatable :: vp_read,vs_read,rho_read
+ integer :: ispec,ier
+ character(len=256) :: prname_lp,filename
+
+ ! -----------------------------------------------------------------------------
+
+ ! note: vp not vs structure is available (as is often the case in exploration seismology),
+ ! scaling factor
+ real, parameter :: SCALING_FACTOR = 1.0/1.8
+
+ ! -----------------------------------------------------------------------------
+
+ ! user output
+ if (myrank==0) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'using external IPATI_WATER model from:',trim(LOCAL_PATH)
+ write(IMAIN,*) 'scaling factor: ',SCALING_FACTOR
+ write(IMAIN,*)
+ endif
+
+ ! processors name
+ write(prname_lp,'(a,i6.6,a)') trim(LOCAL_PATH)//'proc',myrank,'_'
+
+ ! density
+ allocate( rho_read(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array rho_read'
+
+ filename = prname_lp(1:len_trim(prname_lp))//'rho.bin'
+ open(unit=28,file=trim(filename),status='old',action='read',form='unformatted',iostat=ier)
+ if( ier /= 0 ) then
+ print*,'error opening file: ',trim(filename)
+ stop 'error reading rho.bin file'
+ endif
+
+ read(28) rho_read
+ close(28)
+
+ ! vp
+ allocate( vp_read(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array vp_read'
+
+ filename = prname_lp(1:len_trim(prname_lp))//'vp.bin'
+ open(unit=28,file=trim(filename),status='old',action='read',form='unformatted',iostat=ier)
+ if( ier /= 0 ) then
+ print*,'error opening file: ',trim(filename)
+ stop 'error reading vp.bin file'
+ endif
+
+ read(28) vp_read
+ close(28)
+
+ ! vs scaled from vp
+ allocate( vs_read(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array vs_read'
+
+ ! scaling
+ vs_read = vp_read * SCALING_FACTOR
+
+ ! overwrites only elastic elements
+ do ispec=1,nspec
+ ! assumes water layer with acoustic elements are set properly
+ ! only overwrites elastic elements
+ if( ispec_is_elastic(ispec)) then
+ ! isotropic model parameters
+ rhostore(:,:,:,ispec) = rho_read(:,:,:,ispec)
+ kappastore(:,:,:,ispec) = rhostore(:,:,:,ispec) * ( vp_read(:,:,:,ispec) * vp_read(:,:,:,ispec) &
+ - FOUR_THIRDS * vs_read(:,:,:,ispec) * vs_read(:,:,:,ispec) )
+ mustore(:,:,:,ispec) = rhostore(:,:,:,ispec) * vs_read(:,:,:,ispec) * vs_read(:,:,:,ispec)
+ rho_vp(:,:,:,ispec) = rhostore(:,:,:,ispec) * vp_read(:,:,:,ispec)
+ rho_vs(:,:,:,ispec) = rhostore(:,:,:,ispec) * vs_read(:,:,:,ispec)
+ endif
+ enddo
+
+ ! free memory
+ deallocate( rho_read,vp_read,vs_read)
+
+ end subroutine model_ipati_water
+
+
+
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/save_arrays_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/save_arrays_solver.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/save_arrays_solver.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -91,30 +91,23 @@
write(IOUT) ispec_is_poroelastic
! acoustic
-! all processes will have acoustic_simulation set if any flag is .true. somewhere
- call any_all_l( ANY(ispec_is_acoustic), ACOUSTIC_SIMULATION )
if( ACOUSTIC_SIMULATION ) then
write(IOUT) rmass_acoustic
write(IOUT) rhostore
endif
! elastic
- call any_all_l( ANY(ispec_is_elastic), ELASTIC_SIMULATION )
if( ELASTIC_SIMULATION ) then
write(IOUT) rmass
-
if( OCEANS) then
write(IOUT) rmass_ocean_load
endif
-
!pll Stacey
write(IOUT) rho_vp
write(IOUT) rho_vs
-
endif
! poroelastic
- call any_all_l( ANY(ispec_is_poroelastic), POROELASTIC_SIMULATION )
if( POROELASTIC_SIMULATION ) then
write(IOUT) rmass_solid_poroelastic
write(IOUT) rmass_fluid_poroelastic
@@ -136,6 +129,15 @@
write(IOUT) abs_boundary_ijk
write(IOUT) abs_boundary_jacobian2Dw
write(IOUT) abs_boundary_normal
+ ! store mass matrix contributions
+ if(ELASTIC_SIMULATION) then
+ write(IOUT) rmassx
+ write(IOUT) rmassy
+ write(IOUT) rmassz
+ endif
+ if(ACOUSTIC_SIMULATION) then
+ write(IOUT) rmassz_acoustic
+ endif
endif
! free surface
Modified: seismo/3D/SPECFEM3D/trunk/src/shared/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/shared/constants.h.in 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/shared/constants.h.in 2012-09-03 03:54:59 UTC (rev 20674)
@@ -428,3 +428,5 @@
integer, parameter :: IMODEL_SALTON_TROUGH = 8
integer, parameter :: IMODEL_1D_PREM_PB = 9
integer, parameter :: IMODEL_IPATI = 10
+ integer, parameter :: IMODEL_IPATI_WATER = 11
+
Modified: seismo/3D/SPECFEM3D/trunk/src/shared/get_shape2D.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/shared/get_shape2D.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/shared/get_shape2D.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -46,8 +46,6 @@
! location of the nodes of the 2D quadrilateral elements
double precision xi,eta
double precision xi_map,eta_map
- double precision l1xi,l2xi,l3xi,l1eta,l2eta,l3eta
- double precision l1pxi,l2pxi,l3pxi,l1peta,l2peta,l3peta
! for checking the 2D shape functions
double precision sumshape,sumdershapexi,sumdershapeeta
@@ -94,105 +92,146 @@
else
- ! generate the 2D shape functions and their derivatives (9 nodes)
- do i=1,NGLLA
+ ! note: put further initialization for ngnod2d == 9 into subroutine
+ ! to avoid compilation errors in case ngnod2d == 4
+ call get_shape2D_9(NGNOD2D,NDIM2D,shape2D,dershape2D,xigll,yigll,NGLLA,NGLLB)
- xi=xigll(i)
+ endif
- l1xi=HALF*xi*(xi-ONE)
- l2xi=ONE-xi**2
- l3xi=HALF*xi*(xi+ONE)
+! check the 2D shape functions
+ do i=1,NGLLA
+ do j=1,NGLLB
- l1pxi=xi-HALF
- l2pxi=-TWO*xi
- l3pxi=xi+HALF
+ sumshape=ZERO
- do j=1,NGLLB
+ sumdershapexi=ZERO
+ sumdershapeeta=ZERO
- eta=yigll(j)
+ do ia=1,NGNOD2D
+ sumshape=sumshape+shape2D(ia,i,j)
- l1eta=HALF*eta*(eta-ONE)
- l2eta=ONE-eta**2
- l3eta=HALF*eta*(eta+ONE)
+ sumdershapexi=sumdershapexi+dershape2D(1,ia,i,j)
+ sumdershapeeta=sumdershapeeta+dershape2D(2,ia,i,j)
+ enddo
- l1peta=eta-HALF
- l2peta=-TWO*eta
- l3peta=eta+HALF
+! the sum of the shape functions should be 1
+ if(abs(sumshape-ONE)>TINYVAL) call exit_MPI(myrank,'error in 2D shape functions')
-! corner nodes
+! the sum of the derivatives of the shape functions should be 0
+ if(abs(sumdershapexi)>TINYVAL) &
+ call exit_MPI(myrank,'error in xi derivatives of 2D shape function')
- shape2D(1,i,j)=l1xi*l1eta
- shape2D(2,i,j)=l3xi*l1eta
- shape2D(3,i,j)=l3xi*l3eta
- shape2D(4,i,j)=l1xi*l3eta
+ if(abs(sumdershapeeta)>TINYVAL) &
+ call exit_MPI(myrank,'error in eta derivatives of 2D shape function')
- dershape2D(1,1,i,j)=l1pxi*l1eta
- dershape2D(1,2,i,j)=l3pxi*l1eta
- dershape2D(1,3,i,j)=l3pxi*l3eta
- dershape2D(1,4,i,j)=l1pxi*l3eta
+ enddo
+ enddo
- dershape2D(2,1,i,j)=l1xi*l1peta
- dershape2D(2,2,i,j)=l3xi*l1peta
- dershape2D(2,3,i,j)=l3xi*l3peta
- dershape2D(2,4,i,j)=l1xi*l3peta
+ end subroutine get_shape2D
-! midside nodes
+!
+!-------------------------------------------------------------------------------------------------
+!
- shape2D(5,i,j)=l2xi*l1eta
- shape2D(6,i,j)=l3xi*l2eta
- shape2D(7,i,j)=l2xi*l3eta
- shape2D(8,i,j)=l1xi*l2eta
+ subroutine get_shape2D_9(ngnod2d,ndim2d,shape2D,dershape2D,xigll,yigll,NGLLA,NGLLB)
- dershape2D(1,5,i,j)=l2pxi*l1eta
- dershape2D(1,6,i,j)=l3pxi*l2eta
- dershape2D(1,7,i,j)=l2pxi*l3eta
- dershape2D(1,8,i,j)=l1pxi*l2eta
+ implicit none
- dershape2D(2,5,i,j)=l2xi*l1peta
- dershape2D(2,6,i,j)=l3xi*l2peta
- dershape2D(2,7,i,j)=l2xi*l3peta
- dershape2D(2,8,i,j)=l1xi*l2peta
+! generic routine that accepts any polynomial degree in each direction
-! center node
+ integer :: ngnod2d,ndim2d
+ integer :: NGLLA,NGLLB
- shape2D(9,i,j)=l2xi*l2eta
+ double precision xigll(NGLLA)
+ double precision yigll(NGLLB)
- dershape2D(1,9,i,j)=l2pxi*l2eta
- dershape2D(2,9,i,j)=l2xi*l2peta
+! 2D shape functions and their derivatives
+ double precision shape2D(NGNOD2D,NGLLA,NGLLB)
+ double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
- enddo
- enddo
+ integer i,j
- endif
+! location of the nodes of the 2D quadrilateral elements
+ double precision xi,eta
+ double precision l1xi,l2xi,l3xi,l1eta,l2eta,l3eta
+ double precision l1pxi,l2pxi,l3pxi,l1peta,l2peta,l3peta
-! check the 2D shape functions
+ double precision,parameter :: HALF = 0.5d0
+ double precision,parameter :: ONE = 1.0d0
+ double precision,parameter :: TWO = 2.0d0
+
+ ! check that the parameter file is correct
+ if( ngnod2d /= 9) stop 'surface elements should have 9 control nodes'
+
+ ! generate the 2D shape functions and their derivatives (9 nodes)
do i=1,NGLLA
+
+ xi=xigll(i)
+
+ l1xi=HALF*xi*(xi-ONE)
+ l2xi=ONE-xi**2
+ l3xi=HALF*xi*(xi+ONE)
+
+ l1pxi=xi-HALF
+ l2pxi=-TWO*xi
+ l3pxi=xi+HALF
+
do j=1,NGLLB
- sumshape=ZERO
+ eta=yigll(j)
- sumdershapexi=ZERO
- sumdershapeeta=ZERO
+ l1eta=HALF*eta*(eta-ONE)
+ l2eta=ONE-eta**2
+ l3eta=HALF*eta*(eta+ONE)
- do ia=1,NGNOD2D
- sumshape=sumshape+shape2D(ia,i,j)
+ l1peta=eta-HALF
+ l2peta=-TWO*eta
+ l3peta=eta+HALF
- sumdershapexi=sumdershapexi+dershape2D(1,ia,i,j)
- sumdershapeeta=sumdershapeeta+dershape2D(2,ia,i,j)
- enddo
+! corner nodes
-! the sum of the shape functions should be 1
- if(abs(sumshape-ONE)>TINYVAL) call exit_MPI(myrank,'error in 2D shape functions')
+ shape2D(1,i,j)=l1xi*l1eta
+ shape2D(2,i,j)=l3xi*l1eta
+ shape2D(3,i,j)=l3xi*l3eta
+ shape2D(4,i,j)=l1xi*l3eta
-! the sum of the derivatives of the shape functions should be 0
- if(abs(sumdershapexi)>TINYVAL) &
- call exit_MPI(myrank,'error in xi derivatives of 2D shape function')
+ dershape2D(1,1,i,j)=l1pxi*l1eta
+ dershape2D(1,2,i,j)=l3pxi*l1eta
+ dershape2D(1,3,i,j)=l3pxi*l3eta
+ dershape2D(1,4,i,j)=l1pxi*l3eta
- if(abs(sumdershapeeta)>TINYVAL) &
- call exit_MPI(myrank,'error in eta derivatives of 2D shape function')
+ dershape2D(2,1,i,j)=l1xi*l1peta
+ dershape2D(2,2,i,j)=l3xi*l1peta
+ dershape2D(2,3,i,j)=l3xi*l3peta
+ dershape2D(2,4,i,j)=l1xi*l3peta
+! midside nodes
+
+ shape2D(5,i,j)=l2xi*l1eta
+ shape2D(6,i,j)=l3xi*l2eta
+ shape2D(7,i,j)=l2xi*l3eta
+ shape2D(8,i,j)=l1xi*l2eta
+
+ dershape2D(1,5,i,j)=l2pxi*l1eta
+ dershape2D(1,6,i,j)=l3pxi*l2eta
+ dershape2D(1,7,i,j)=l2pxi*l3eta
+ dershape2D(1,8,i,j)=l1pxi*l2eta
+
+ dershape2D(2,5,i,j)=l2xi*l1peta
+ dershape2D(2,6,i,j)=l3xi*l2peta
+ dershape2D(2,7,i,j)=l2xi*l3peta
+ dershape2D(2,8,i,j)=l1xi*l2peta
+
+! center node
+
+ shape2D(9,i,j)=l2xi*l2eta
+
+ dershape2D(1,9,i,j)=l2pxi*l2eta
+ dershape2D(2,9,i,j)=l2xi*l2peta
+
enddo
enddo
- end subroutine get_shape2D
+ end subroutine get_shape2D_9
+
Modified: seismo/3D/SPECFEM3D/trunk/src/shared/hex_nodes.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/shared/hex_nodes.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/shared/hex_nodes.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -72,92 +72,111 @@
if(NGNOD == 27) then
- ! midside nodes (nodes located in the middle of an edge)
+ ! note: put further initialization into subroutine to avoid compilation errors
+ ! in case ngnod == 8
+ call usual_hex_nodes_27(NGNOD,iaddx,iaddy,iaddz)
- iaddx(9) = 1
- iaddy(9) = 0
- iaddz(9) = 0
+ endif
- iaddx(10) = 2
- iaddy(10) = 1
- iaddz(10) = 0
+ end subroutine usual_hex_nodes
- iaddx(11) = 1
- iaddy(11) = 2
- iaddz(11) = 0
+!
+!-------------------------------------------------------------------------------------------------
+!
- iaddx(12) = 0
- iaddy(12) = 1
- iaddz(12) = 0
+ subroutine usual_hex_nodes_27(ngnod,iaddx,iaddy,iaddz)
- iaddx(13) = 0
- iaddy(13) = 0
- iaddz(13) = 1
+ implicit none
- iaddx(14) = 2
- iaddy(14) = 0
- iaddz(14) = 1
+ integer :: ngnod
+ integer,dimension(ngnod) :: iaddx,iaddy,iaddz
- iaddx(15) = 2
- iaddy(15) = 2
- iaddz(15) = 1
+! define the topology of the hexahedral elements
- iaddx(16) = 0
- iaddy(16) = 2
- iaddz(16) = 1
+ ! midside nodes (nodes located in the middle of an edge)
- iaddx(17) = 1
- iaddy(17) = 0
- iaddz(17) = 2
+ iaddx(9) = 1
+ iaddy(9) = 0
+ iaddz(9) = 0
- iaddx(18) = 2
- iaddy(18) = 1
- iaddz(18) = 2
+ iaddx(10) = 2
+ iaddy(10) = 1
+ iaddz(10) = 0
- iaddx(19) = 1
- iaddy(19) = 2
- iaddz(19) = 2
+ iaddx(11) = 1
+ iaddy(11) = 2
+ iaddz(11) = 0
- iaddx(20) = 0
- iaddy(20) = 1
- iaddz(20) = 2
+ iaddx(12) = 0
+ iaddy(12) = 1
+ iaddz(12) = 0
- ! side center nodes (nodes located in the middle of a face)
+ iaddx(13) = 0
+ iaddy(13) = 0
+ iaddz(13) = 1
- iaddx(21) = 1
- iaddy(21) = 1
- iaddz(21) = 0
+ iaddx(14) = 2
+ iaddy(14) = 0
+ iaddz(14) = 1
- iaddx(22) = 1
- iaddy(22) = 0
- iaddz(22) = 1
+ iaddx(15) = 2
+ iaddy(15) = 2
+ iaddz(15) = 1
- iaddx(23) = 2
- iaddy(23) = 1
- iaddz(23) = 1
+ iaddx(16) = 0
+ iaddy(16) = 2
+ iaddz(16) = 1
- iaddx(24) = 1
- iaddy(24) = 2
- iaddz(24) = 1
+ iaddx(17) = 1
+ iaddy(17) = 0
+ iaddz(17) = 2
- iaddx(25) = 0
- iaddy(25) = 1
- iaddz(25) = 1
+ iaddx(18) = 2
+ iaddy(18) = 1
+ iaddz(18) = 2
- iaddx(26) = 1
- iaddy(26) = 1
- iaddz(26) = 2
+ iaddx(19) = 1
+ iaddy(19) = 2
+ iaddz(19) = 2
- ! center node (barycenter of the eight corners)
+ iaddx(20) = 0
+ iaddy(20) = 1
+ iaddz(20) = 2
- iaddx(27) = 1
- iaddy(27) = 1
- iaddz(27) = 1
+ ! side center nodes (nodes located in the middle of a face)
- endif
+ iaddx(21) = 1
+ iaddy(21) = 1
+ iaddz(21) = 0
- end subroutine usual_hex_nodes
+ iaddx(22) = 1
+ iaddy(22) = 0
+ iaddz(22) = 1
+ iaddx(23) = 2
+ iaddy(23) = 1
+ iaddz(23) = 1
+
+ iaddx(24) = 1
+ iaddy(24) = 2
+ iaddz(24) = 1
+
+ iaddx(25) = 0
+ iaddy(25) = 1
+ iaddz(25) = 1
+
+ iaddx(26) = 1
+ iaddy(26) = 1
+ iaddz(26) = 2
+
+ ! center node (barycenter of the eight corners)
+
+ iaddx(27) = 1
+ iaddy(27) = 1
+ iaddz(27) = 1
+
+ end subroutine usual_hex_nodes_27
+
!
!-------------------------------------------------------------------------------------------------
!
Modified: seismo/3D/SPECFEM3D/trunk/src/shared/read_parameter_file.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/shared/read_parameter_file.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/shared/read_parameter_file.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -254,6 +254,8 @@
IMODEL = IMODEL_USER_EXTERNAL
case( 'ipati' )
IMODEL = IMODEL_IPATI
+ case( 'ipati_water' )
+ IMODEL = IMODEL_IPATI_WATER
case( 'gll' )
IMODEL = IMODEL_GLL
case( 'salton_trough')
@@ -270,8 +272,8 @@
end select
! check
- if( IMODEL == IMODEL_IPATI ) then
- if( USE_RICKER_IPATI .eqv. .false. ) stop 'please set USE_RICKER_IPATI to true in shared/constants.h and recompile'
+ if( IMODEL == IMODEL_IPATI .or. IMODEL == IMODEL_IPATI_WATER ) then
+ if( USE_RICKER_IPATI .eqv. .false. ) stop 'please set USE_RICKER_IPATI to .true. in shared/constants.h and recompile'
endif
end subroutine read_parameter_file
Modified: seismo/3D/SPECFEM3D/trunk/src/shared/recompute_jacobian.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/shared/recompute_jacobian.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/shared/recompute_jacobian.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -48,13 +48,6 @@
double precision xgamma,ygamma,zgamma
double precision ra1,ra2,rb1,rb2,rc1,rc2
- double precision l1xi,l2xi,l3xi
- double precision l1eta,l2eta,l3eta
- double precision l1gamma,l2gamma,l3gamma
- double precision l1pxi,l2pxi,l3pxi
- double precision l1peta,l2peta,l3peta
- double precision l1pgamma,l2pgamma,l3pgamma
-
integer ia
! for 8-node element
@@ -120,164 +113,9 @@
else
-! ***
-! *** create the 3D shape functions and the Jacobian for a 27-node element
-! ***
-
- l1xi=HALF*xi*(xi-ONE)
- l2xi=ONE-xi**2
- l3xi=HALF*xi*(xi+ONE)
-
- l1pxi=xi-HALF
- l2pxi=-TWO*xi
- l3pxi=xi+HALF
-
- l1eta=HALF*eta*(eta-ONE)
- l2eta=ONE-eta**2
- l3eta=HALF*eta*(eta+ONE)
-
- l1peta=eta-HALF
- l2peta=-TWO*eta
- l3peta=eta+HALF
-
- l1gamma=HALF*gamma*(gamma-ONE)
- l2gamma=ONE-gamma**2
- l3gamma=HALF*gamma*(gamma+ONE)
-
- l1pgamma=gamma-HALF
- l2pgamma=-TWO*gamma
- l3pgamma=gamma+HALF
-
-! corner nodes
-
- shape3D(1)=l1xi*l1eta*l1gamma
- shape3D(2)=l3xi*l1eta*l1gamma
- shape3D(3)=l3xi*l3eta*l1gamma
- shape3D(4)=l1xi*l3eta*l1gamma
- shape3D(5)=l1xi*l1eta*l3gamma
- shape3D(6)=l3xi*l1eta*l3gamma
- shape3D(7)=l3xi*l3eta*l3gamma
- shape3D(8)=l1xi*l3eta*l3gamma
-
- dershape3D(1,1)=l1pxi*l1eta*l1gamma
- dershape3D(1,2)=l3pxi*l1eta*l1gamma
- dershape3D(1,3)=l3pxi*l3eta*l1gamma
- dershape3D(1,4)=l1pxi*l3eta*l1gamma
- dershape3D(1,5)=l1pxi*l1eta*l3gamma
- dershape3D(1,6)=l3pxi*l1eta*l3gamma
- dershape3D(1,7)=l3pxi*l3eta*l3gamma
- dershape3D(1,8)=l1pxi*l3eta*l3gamma
-
- dershape3D(2,1)=l1xi*l1peta*l1gamma
- dershape3D(2,2)=l3xi*l1peta*l1gamma
- dershape3D(2,3)=l3xi*l3peta*l1gamma
- dershape3D(2,4)=l1xi*l3peta*l1gamma
- dershape3D(2,5)=l1xi*l1peta*l3gamma
- dershape3D(2,6)=l3xi*l1peta*l3gamma
- dershape3D(2,7)=l3xi*l3peta*l3gamma
- dershape3D(2,8)=l1xi*l3peta*l3gamma
-
- dershape3D(3,1)=l1xi*l1eta*l1pgamma
- dershape3D(3,2)=l3xi*l1eta*l1pgamma
- dershape3D(3,3)=l3xi*l3eta*l1pgamma
- dershape3D(3,4)=l1xi*l3eta*l1pgamma
- dershape3D(3,5)=l1xi*l1eta*l3pgamma
- dershape3D(3,6)=l3xi*l1eta*l3pgamma
- dershape3D(3,7)=l3xi*l3eta*l3pgamma
- dershape3D(3,8)=l1xi*l3eta*l3pgamma
-
-! midside nodes
-
- shape3D(9)=l2xi*l1eta*l1gamma
- shape3D(10)=l3xi*l2eta*l1gamma
- shape3D(11)=l2xi*l3eta*l1gamma
- shape3D(12)=l1xi*l2eta*l1gamma
- shape3D(13)=l1xi*l1eta*l2gamma
- shape3D(14)=l3xi*l1eta*l2gamma
- shape3D(15)=l3xi*l3eta*l2gamma
- shape3D(16)=l1xi*l3eta*l2gamma
- shape3D(17)=l2xi*l1eta*l3gamma
- shape3D(18)=l3xi*l2eta*l3gamma
- shape3D(19)=l2xi*l3eta*l3gamma
- shape3D(20)=l1xi*l2eta*l3gamma
-
- dershape3D(1,9)=l2pxi*l1eta*l1gamma
- dershape3D(1,10)=l3pxi*l2eta*l1gamma
- dershape3D(1,11)=l2pxi*l3eta*l1gamma
- dershape3D(1,12)=l1pxi*l2eta*l1gamma
- dershape3D(1,13)=l1pxi*l1eta*l2gamma
- dershape3D(1,14)=l3pxi*l1eta*l2gamma
- dershape3D(1,15)=l3pxi*l3eta*l2gamma
- dershape3D(1,16)=l1pxi*l3eta*l2gamma
- dershape3D(1,17)=l2pxi*l1eta*l3gamma
- dershape3D(1,18)=l3pxi*l2eta*l3gamma
- dershape3D(1,19)=l2pxi*l3eta*l3gamma
- dershape3D(1,20)=l1pxi*l2eta*l3gamma
-
- dershape3D(2,9)=l2xi*l1peta*l1gamma
- dershape3D(2,10)=l3xi*l2peta*l1gamma
- dershape3D(2,11)=l2xi*l3peta*l1gamma
- dershape3D(2,12)=l1xi*l2peta*l1gamma
- dershape3D(2,13)=l1xi*l1peta*l2gamma
- dershape3D(2,14)=l3xi*l1peta*l2gamma
- dershape3D(2,15)=l3xi*l3peta*l2gamma
- dershape3D(2,16)=l1xi*l3peta*l2gamma
- dershape3D(2,17)=l2xi*l1peta*l3gamma
- dershape3D(2,18)=l3xi*l2peta*l3gamma
- dershape3D(2,19)=l2xi*l3peta*l3gamma
- dershape3D(2,20)=l1xi*l2peta*l3gamma
-
- dershape3D(3,9)=l2xi*l1eta*l1pgamma
- dershape3D(3,10)=l3xi*l2eta*l1pgamma
- dershape3D(3,11)=l2xi*l3eta*l1pgamma
- dershape3D(3,12)=l1xi*l2eta*l1pgamma
- dershape3D(3,13)=l1xi*l1eta*l2pgamma
- dershape3D(3,14)=l3xi*l1eta*l2pgamma
- dershape3D(3,15)=l3xi*l3eta*l2pgamma
- dershape3D(3,16)=l1xi*l3eta*l2pgamma
- dershape3D(3,17)=l2xi*l1eta*l3pgamma
- dershape3D(3,18)=l3xi*l2eta*l3pgamma
- dershape3D(3,19)=l2xi*l3eta*l3pgamma
- dershape3D(3,20)=l1xi*l2eta*l3pgamma
-
-! side center nodes
-
- shape3D(21)=l2xi*l2eta*l1gamma
- shape3D(22)=l2xi*l1eta*l2gamma
- shape3D(23)=l3xi*l2eta*l2gamma
- shape3D(24)=l2xi*l3eta*l2gamma
- shape3D(25)=l1xi*l2eta*l2gamma
- shape3D(26)=l2xi*l2eta*l3gamma
-
- dershape3D(1,21)=l2pxi*l2eta*l1gamma
- dershape3D(1,22)=l2pxi*l1eta*l2gamma
- dershape3D(1,23)=l3pxi*l2eta*l2gamma
- dershape3D(1,24)=l2pxi*l3eta*l2gamma
- dershape3D(1,25)=l1pxi*l2eta*l2gamma
- dershape3D(1,26)=l2pxi*l2eta*l3gamma
-
- dershape3D(2,21)=l2xi*l2peta*l1gamma
- dershape3D(2,22)=l2xi*l1peta*l2gamma
- dershape3D(2,23)=l3xi*l2peta*l2gamma
- dershape3D(2,24)=l2xi*l3peta*l2gamma
- dershape3D(2,25)=l1xi*l2peta*l2gamma
- dershape3D(2,26)=l2xi*l2peta*l3gamma
-
- dershape3D(3,21)=l2xi*l2eta*l1pgamma
- dershape3D(3,22)=l2xi*l1eta*l2pgamma
- dershape3D(3,23)=l3xi*l2eta*l2pgamma
- dershape3D(3,24)=l2xi*l3eta*l2pgamma
- dershape3D(3,25)=l1xi*l2eta*l2pgamma
- dershape3D(3,26)=l2xi*l2eta*l3pgamma
-
-! center node
-
- shape3D(27)=l2xi*l2eta*l2gamma
-
- dershape3D(1,27)=l2pxi*l2eta*l2gamma
- dershape3D(2,27)=l2xi*l2peta*l2gamma
- dershape3D(3,27)=l2xi*l2eta*l2pgamma
-
+ ! note: put further setup for ngnod == 27 into subroutine
+ ! to avoid compilation errors in case ngnod == 8
+ call recompute_jacobian_27(NGNOD,NDIM,xi,eta,gamma,shape3D,dershape3D)
endif
! compute coordinates and jacobian matrix
@@ -327,3 +165,196 @@
end subroutine recompute_jacobian
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine recompute_jacobian_27(ngnod,ndim,xi,eta,gamma,shape3D,dershape3D)
+
+ implicit none
+
+ integer :: ngnod,ndim
+
+ double precision :: xi,eta,gamma
+
+! 3D shape functions and their derivatives at receiver
+ double precision,dimension(ngnod) :: shape3D
+ double precision,dimension(ndim,ngnod) :: dershape3D
+
+ ! local parameters
+ double precision l1xi,l2xi,l3xi
+ double precision l1eta,l2eta,l3eta
+ double precision l1gamma,l2gamma,l3gamma
+ double precision l1pxi,l2pxi,l3pxi
+ double precision l1peta,l2peta,l3peta
+ double precision l1pgamma,l2pgamma,l3pgamma
+
+ double precision, parameter :: HALF = 0.5d0
+ double precision, parameter :: ONE = 1.0d0
+ double precision, parameter :: TWO = 2.0d0
+
+! recompute jacobian for any (xi,eta,gamma) point, not necessarily a GLL point
+
+
+! ***
+! *** create the 3D shape functions and the Jacobian for a 27-node element
+! ***
+
+ l1xi=HALF*xi*(xi-ONE)
+ l2xi=ONE-xi**2
+ l3xi=HALF*xi*(xi+ONE)
+
+ l1pxi=xi-HALF
+ l2pxi=-TWO*xi
+ l3pxi=xi+HALF
+
+ l1eta=HALF*eta*(eta-ONE)
+ l2eta=ONE-eta**2
+ l3eta=HALF*eta*(eta+ONE)
+
+ l1peta=eta-HALF
+ l2peta=-TWO*eta
+ l3peta=eta+HALF
+
+ l1gamma=HALF*gamma*(gamma-ONE)
+ l2gamma=ONE-gamma**2
+ l3gamma=HALF*gamma*(gamma+ONE)
+
+ l1pgamma=gamma-HALF
+ l2pgamma=-TWO*gamma
+ l3pgamma=gamma+HALF
+
+! corner nodes
+
+ shape3D(1)=l1xi*l1eta*l1gamma
+ shape3D(2)=l3xi*l1eta*l1gamma
+ shape3D(3)=l3xi*l3eta*l1gamma
+ shape3D(4)=l1xi*l3eta*l1gamma
+ shape3D(5)=l1xi*l1eta*l3gamma
+ shape3D(6)=l3xi*l1eta*l3gamma
+ shape3D(7)=l3xi*l3eta*l3gamma
+ shape3D(8)=l1xi*l3eta*l3gamma
+
+ dershape3D(1,1)=l1pxi*l1eta*l1gamma
+ dershape3D(1,2)=l3pxi*l1eta*l1gamma
+ dershape3D(1,3)=l3pxi*l3eta*l1gamma
+ dershape3D(1,4)=l1pxi*l3eta*l1gamma
+ dershape3D(1,5)=l1pxi*l1eta*l3gamma
+ dershape3D(1,6)=l3pxi*l1eta*l3gamma
+ dershape3D(1,7)=l3pxi*l3eta*l3gamma
+ dershape3D(1,8)=l1pxi*l3eta*l3gamma
+
+ dershape3D(2,1)=l1xi*l1peta*l1gamma
+ dershape3D(2,2)=l3xi*l1peta*l1gamma
+ dershape3D(2,3)=l3xi*l3peta*l1gamma
+ dershape3D(2,4)=l1xi*l3peta*l1gamma
+ dershape3D(2,5)=l1xi*l1peta*l3gamma
+ dershape3D(2,6)=l3xi*l1peta*l3gamma
+ dershape3D(2,7)=l3xi*l3peta*l3gamma
+ dershape3D(2,8)=l1xi*l3peta*l3gamma
+
+ dershape3D(3,1)=l1xi*l1eta*l1pgamma
+ dershape3D(3,2)=l3xi*l1eta*l1pgamma
+ dershape3D(3,3)=l3xi*l3eta*l1pgamma
+ dershape3D(3,4)=l1xi*l3eta*l1pgamma
+ dershape3D(3,5)=l1xi*l1eta*l3pgamma
+ dershape3D(3,6)=l3xi*l1eta*l3pgamma
+ dershape3D(3,7)=l3xi*l3eta*l3pgamma
+ dershape3D(3,8)=l1xi*l3eta*l3pgamma
+
+! midside nodes
+
+ shape3D(9)=l2xi*l1eta*l1gamma
+ shape3D(10)=l3xi*l2eta*l1gamma
+ shape3D(11)=l2xi*l3eta*l1gamma
+ shape3D(12)=l1xi*l2eta*l1gamma
+ shape3D(13)=l1xi*l1eta*l2gamma
+ shape3D(14)=l3xi*l1eta*l2gamma
+ shape3D(15)=l3xi*l3eta*l2gamma
+ shape3D(16)=l1xi*l3eta*l2gamma
+ shape3D(17)=l2xi*l1eta*l3gamma
+ shape3D(18)=l3xi*l2eta*l3gamma
+ shape3D(19)=l2xi*l3eta*l3gamma
+ shape3D(20)=l1xi*l2eta*l3gamma
+
+ dershape3D(1,9)=l2pxi*l1eta*l1gamma
+ dershape3D(1,10)=l3pxi*l2eta*l1gamma
+ dershape3D(1,11)=l2pxi*l3eta*l1gamma
+ dershape3D(1,12)=l1pxi*l2eta*l1gamma
+ dershape3D(1,13)=l1pxi*l1eta*l2gamma
+ dershape3D(1,14)=l3pxi*l1eta*l2gamma
+ dershape3D(1,15)=l3pxi*l3eta*l2gamma
+ dershape3D(1,16)=l1pxi*l3eta*l2gamma
+ dershape3D(1,17)=l2pxi*l1eta*l3gamma
+ dershape3D(1,18)=l3pxi*l2eta*l3gamma
+ dershape3D(1,19)=l2pxi*l3eta*l3gamma
+ dershape3D(1,20)=l1pxi*l2eta*l3gamma
+
+ dershape3D(2,9)=l2xi*l1peta*l1gamma
+ dershape3D(2,10)=l3xi*l2peta*l1gamma
+ dershape3D(2,11)=l2xi*l3peta*l1gamma
+ dershape3D(2,12)=l1xi*l2peta*l1gamma
+ dershape3D(2,13)=l1xi*l1peta*l2gamma
+ dershape3D(2,14)=l3xi*l1peta*l2gamma
+ dershape3D(2,15)=l3xi*l3peta*l2gamma
+ dershape3D(2,16)=l1xi*l3peta*l2gamma
+ dershape3D(2,17)=l2xi*l1peta*l3gamma
+ dershape3D(2,18)=l3xi*l2peta*l3gamma
+ dershape3D(2,19)=l2xi*l3peta*l3gamma
+ dershape3D(2,20)=l1xi*l2peta*l3gamma
+
+ dershape3D(3,9)=l2xi*l1eta*l1pgamma
+ dershape3D(3,10)=l3xi*l2eta*l1pgamma
+ dershape3D(3,11)=l2xi*l3eta*l1pgamma
+ dershape3D(3,12)=l1xi*l2eta*l1pgamma
+ dershape3D(3,13)=l1xi*l1eta*l2pgamma
+ dershape3D(3,14)=l3xi*l1eta*l2pgamma
+ dershape3D(3,15)=l3xi*l3eta*l2pgamma
+ dershape3D(3,16)=l1xi*l3eta*l2pgamma
+ dershape3D(3,17)=l2xi*l1eta*l3pgamma
+ dershape3D(3,18)=l3xi*l2eta*l3pgamma
+ dershape3D(3,19)=l2xi*l3eta*l3pgamma
+ dershape3D(3,20)=l1xi*l2eta*l3pgamma
+
+! side center nodes
+
+ shape3D(21)=l2xi*l2eta*l1gamma
+ shape3D(22)=l2xi*l1eta*l2gamma
+ shape3D(23)=l3xi*l2eta*l2gamma
+ shape3D(24)=l2xi*l3eta*l2gamma
+ shape3D(25)=l1xi*l2eta*l2gamma
+ shape3D(26)=l2xi*l2eta*l3gamma
+
+ dershape3D(1,21)=l2pxi*l2eta*l1gamma
+ dershape3D(1,22)=l2pxi*l1eta*l2gamma
+ dershape3D(1,23)=l3pxi*l2eta*l2gamma
+ dershape3D(1,24)=l2pxi*l3eta*l2gamma
+ dershape3D(1,25)=l1pxi*l2eta*l2gamma
+ dershape3D(1,26)=l2pxi*l2eta*l3gamma
+
+ dershape3D(2,21)=l2xi*l2peta*l1gamma
+ dershape3D(2,22)=l2xi*l1peta*l2gamma
+ dershape3D(2,23)=l3xi*l2peta*l2gamma
+ dershape3D(2,24)=l2xi*l3peta*l2gamma
+ dershape3D(2,25)=l1xi*l2peta*l2gamma
+ dershape3D(2,26)=l2xi*l2peta*l3gamma
+
+ dershape3D(3,21)=l2xi*l2eta*l1pgamma
+ dershape3D(3,22)=l2xi*l1eta*l2pgamma
+ dershape3D(3,23)=l3xi*l2eta*l2pgamma
+ dershape3D(3,24)=l2xi*l3eta*l2pgamma
+ dershape3D(3,25)=l1xi*l2eta*l2pgamma
+ dershape3D(3,26)=l2xi*l2eta*l3pgamma
+
+! center node
+
+ shape3D(27)=l2xi*l2eta*l2gamma
+
+ dershape3D(1,27)=l2pxi*l2eta*l2gamma
+ dershape3D(2,27)=l2xi*l2peta*l2gamma
+ dershape3D(3,27)=l2xi*l2eta*l2pgamma
+
+
+ end subroutine recompute_jacobian_27
+
+
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/PML_init.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/PML_init.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/PML_init.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -127,7 +127,6 @@
subroutine PML_initialize()
use specfem_par,only: NGLOB_AB,NSPEC_AB,myrank, &
- ibool,xstore,ystore,zstore,&
model_speed_max,hdur
use PML_par
use PML_par_acoustic
@@ -303,9 +302,9 @@
! sets ispec occurrences for first element layer in PML region based on absorbing boundary elements
use PML_par
- use specfem_par,only: NSPEC_AB,NGLOB_AB, &
+ use specfem_par,only: NSPEC_AB, &
abs_boundary_ispec,abs_boundary_normal,num_abs_boundary_faces,&
- abs_boundary_ijk,ibool,myrank
+ abs_boundary_ijk,ibool
use constants,only: NDIM,TINYVAL,NGNOD,NGLLX,NGLLY,NGLLZ,NGLLSQUARE
implicit none
! local parameters
@@ -479,14 +478,13 @@
! finds global interface points of PML region to "regular" domain
- use specfem_par,only: ibool,myrank,NGLOB_AB,NSPEC_AB, &
+ use specfem_par,only: ibool,NGLOB_AB,NSPEC_AB, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
my_neighbours_ext_mesh,NPROC
use PML_par
use PML_par_acoustic
use constants,only: NGLLX,NGLLY,NGLLZ
- use specfem_par_acoustic,only: ispec_is_acoustic
implicit none
! local parameters
@@ -848,7 +846,6 @@
use PML_par
use specfem_par,only: NSPEC_AB,NGLOB_AB, &
- abs_boundary_ispec,num_abs_boundary_faces,&
ibool,myrank,&
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_acoustic.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_acoustic.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -139,8 +139,7 @@
! write(*,*) "fortran dt = ", dt
! change dt -> DT
call compute_add_sources_ac_cuda(Mesh_pointer, phase_is_inner, &
- NSOURCES, SIMULATION_TYPE, &
- stf_pre_compute, myrank)
+ NSOURCES,stf_pre_compute, myrank)
endif
else ! .NOT. GPU_MODE
@@ -435,8 +434,7 @@
! only implements SIMTYPE=3
call compute_add_sources_ac_s3_cuda(Mesh_pointer, phase_is_inner, &
- NSOURCES, SIMULATION_TYPE, &
- stf_pre_compute, myrank)
+ NSOURCES,stf_pre_compute, myrank)
endif
else ! .NOT. GPU_MODE
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_elastic.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_elastic.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -39,7 +39,6 @@
use specfem_par,only: PRINT_SOURCE_TIME_FUNCTION,stf_used_total, &
xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,&
station_name,network_name,adj_source_file, &
- LOCAL_PATH,wgllwgll_xy, &
num_free_surface_faces,free_surface_ispec, &
free_surface_ijk,free_surface_jacobian2Dw, &
noise_sourcearray,irec_master_noise, &
@@ -48,10 +47,6 @@
nrec_local,number_receiver_global, &
nsources_local
- use specfem_par_movie,only: &
- store_val_ux_external_mesh,store_val_uy_external_mesh, &
- store_val_uz_external_mesh
-
implicit none
include "constants.h"
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_poroelastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_poroelastic.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_poroelastic.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -41,7 +41,7 @@
use specfem_par,only: PRINT_SOURCE_TIME_FUNCTION,stf_used_total, &
xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,&
- station_name,network_name,adj_source_file,nrec_local,number_receiver_global
+ station_name,network_name,adj_source_file
implicit none
include "constants.h"
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_coupling_elastic_ac.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_coupling_elastic_ac.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_coupling_elastic_ac.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -120,3 +120,110 @@
end subroutine compute_coupling_elastic_ac
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine compute_coupling_ocean(NSPEC_AB,NGLOB_AB, &
+ ibool,rmassx,rmassy,rmassz, &
+ rmass_ocean_load,accel, &
+ free_surface_normal,free_surface_ijk,free_surface_ispec, &
+ num_free_surface_faces,SIMULATION_TYPE, &
+ NGLOB_ADJOINT,b_accel)
+
+! updates acceleration with ocean load term:
+! approximates ocean-bottom continuity of pressure & displacement for longer period waves (> ~20s ),
+! assuming incompressible fluid column above bathymetry ocean bottom
+
+ implicit none
+
+ include 'constants.h'
+
+ integer :: NSPEC_AB,NGLOB_AB
+
+ real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_AB),intent(inout) :: accel
+ real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: rmassx,rmassy,rmassz
+ real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: rmass_ocean_load
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
+
+ ! free surface
+ integer :: num_free_surface_faces
+ real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
+ integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
+ integer :: free_surface_ispec(num_free_surface_faces)
+
+ ! adjoint simulations
+ integer :: SIMULATION_TYPE,NGLOB_ADJOINT
+ real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_ADJOINT):: b_accel
+
+! local parameters
+ real(kind=CUSTOM_REAL) :: nx,ny,nz
+ real(kind=CUSTOM_REAL) :: force_normal_comp
+ integer :: i,j,k,ispec,iglob
+ integer :: igll,iface
+ logical,dimension(NGLOB_AB) :: updated_dof_ocean_load
+ ! adjoint locals
+ real(kind=CUSTOM_REAL) :: b_force_normal_comp
+
+ ! initialize the updates
+ updated_dof_ocean_load(:) = .false.
+
+ ! for surface elements exactly at the top of the model (ocean bottom)
+ do iface = 1,num_free_surface_faces
+
+ ispec = free_surface_ispec(iface)
+ do igll = 1, NGLLSQUARE
+ i = free_surface_ijk(1,igll,iface)
+ j = free_surface_ijk(2,igll,iface)
+ k = free_surface_ijk(3,igll,iface)
+
+ ! get global point number
+ iglob = ibool(i,j,k,ispec)
+
+ ! only update once
+ if(.not. updated_dof_ocean_load(iglob)) then
+
+ ! get normal
+ nx = free_surface_normal(1,igll,iface)
+ ny = free_surface_normal(2,igll,iface)
+ nz = free_surface_normal(3,igll,iface)
+
+ ! make updated component of right-hand side
+ ! we divide by rmass() which is 1 / M
+ ! we use the total force which includes the Coriolis term above
+ force_normal_comp = accel(1,iglob)*nx / rmassx(iglob) &
+ + accel(2,iglob)*ny / rmassy(iglob) &
+ + accel(3,iglob)*nz / rmassz(iglob)
+
+ accel(1,iglob) = accel(1,iglob) &
+ + (rmass_ocean_load(iglob) - rmassx(iglob)) * force_normal_comp * nx
+ accel(2,iglob) = accel(2,iglob) &
+ + (rmass_ocean_load(iglob) - rmassy(iglob)) * force_normal_comp * ny
+ accel(3,iglob) = accel(3,iglob) &
+ + (rmass_ocean_load(iglob) - rmassz(iglob)) * force_normal_comp * nz
+
+ ! adjoint simulations
+ if (SIMULATION_TYPE == 3) then
+ b_force_normal_comp = b_accel(1,iglob)*nx / rmassx(iglob) &
+ + b_accel(2,iglob)*ny / rmassy(iglob) &
+ + b_accel(3,iglob)*nz / rmassz(iglob)
+
+ b_accel(1,iglob) = b_accel(1,iglob) &
+ + (rmass_ocean_load(iglob) - rmassx(iglob)) * b_force_normal_comp * nx
+ b_accel(2,iglob) = b_accel(2,iglob) &
+ + (rmass_ocean_load(iglob) - rmassy(iglob)) * b_force_normal_comp * ny
+ b_accel(3,iglob) = b_accel(3,iglob) &
+ + (rmass_ocean_load(iglob) - rmassz(iglob)) * b_force_normal_comp * nz
+ endif !adjoint
+
+ ! done with this point
+ updated_dof_ocean_load(iglob) = .true.
+
+ endif
+
+ enddo ! igll
+ enddo ! iface
+
+ end subroutine compute_coupling_ocean
+
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -81,7 +81,7 @@
num_free_surface_faces,ispec_is_acoustic)
else
! on GPU
- call acoustic_enforce_free_surf_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
+ call acoustic_enforce_free_surf_cuda(Mesh_pointer,ABSORB_FREE_SURFACE)
endif
if(ABSORB_USE_PML .and. ABSORBING_CONDITIONS) then
@@ -148,8 +148,7 @@
! on GPU
! includes code for SIMULATION_TYPE==3
call compute_forces_acoustic_cuda(Mesh_pointer, iphase, &
- nspec_outer_acoustic, nspec_inner_acoustic, &
- SIMULATION_TYPE)
+ nspec_outer_acoustic, nspec_inner_acoustic)
endif
if(ABSORB_USE_PML .and. ABSORBING_CONDITIONS) then
@@ -253,7 +252,7 @@
else
! on GPU
call compute_coupling_ac_el_cuda(Mesh_pointer,phase_is_inner, &
- num_coupling_ac_el_faces,SIMULATION_TYPE)
+ num_coupling_ac_el_faces)
endif ! GPU_MODE
endif
endif
@@ -401,7 +400,7 @@
b_potential_dot_dot_acoustic(:) = b_potential_dot_dot_acoustic(:) * rmass_acoustic(:)
else
! on GPU
- call kernel_3_a_acoustic_cuda(Mesh_pointer,NGLOB_AB,SIMULATION_TYPE)
+ call kernel_3_a_acoustic_cuda(Mesh_pointer,NGLOB_AB)
endif
@@ -448,7 +447,7 @@
b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) + b_deltatover2*b_potential_dot_dot_acoustic(:)
else
! on GPU
- call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,SIMULATION_TYPE,b_deltatover2)
+ call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,b_deltatover2)
endif
! updates potential_dot_acoustic and potential_dot_dot_acoustic inside PML region for plotting seismograms/movies
@@ -497,7 +496,7 @@
num_free_surface_faces,ispec_is_acoustic)
else
! on GPU
- call acoustic_enforce_free_surf_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
+ call acoustic_enforce_free_surf_cuda(Mesh_pointer,ABSORB_FREE_SURFACE)
endif
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -124,7 +124,6 @@
call compute_forces_elastic_cuda(Mesh_pointer, iphase, &
nspec_outer_elastic, &
nspec_inner_elastic, &
- SIMULATION_TYPE, &
COMPUTE_AND_STORE_STRAIN,ATTENUATION,ANISOTROPY)
if(phase_is_inner .eqv. .true.) then
@@ -203,7 +202,7 @@
else
! on GPU
call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, &
- num_coupling_ac_el_faces,SIMULATION_TYPE)
+ num_coupling_ac_el_faces)
endif ! GPU_MODE
endif ! num_coupling_ac_el_faces
endif
@@ -329,30 +328,31 @@
! multiplies with inverse of mass matrix (note: rmass has been inverted already)
if(.NOT. GPU_MODE) then
- accel(1,:) = accel(1,:)*rmass(:)
- accel(2,:) = accel(2,:)*rmass(:)
- accel(3,:) = accel(3,:)*rmass(:)
+ accel(1,:) = accel(1,:)*rmassx(:)
+ accel(2,:) = accel(2,:)*rmassy(:)
+ accel(3,:) = accel(3,:)*rmassz(:)
! adjoint simulations
if (SIMULATION_TYPE == 3) then
- b_accel(1,:) = b_accel(1,:)*rmass(:)
- b_accel(2,:) = b_accel(2,:)*rmass(:)
- b_accel(3,:) = b_accel(3,:)*rmass(:)
+ b_accel(1,:) = b_accel(1,:)*rmassx(:)
+ b_accel(2,:) = b_accel(2,:)*rmassy(:)
+ b_accel(3,:) = b_accel(3,:)*rmassz(:)
endif !adjoint
else ! GPU_MODE == 1
- call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS)
+ call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,b_deltatover2,OCEANS)
endif
! updates acceleration with ocean load term
if(OCEANS) then
if( .NOT. GPU_MODE ) then
- call elastic_ocean_load(NSPEC_AB,NGLOB_AB, &
- ibool,rmass,rmass_ocean_load,accel, &
- free_surface_normal,free_surface_ijk,free_surface_ispec, &
- num_free_surface_faces,SIMULATION_TYPE, &
- NGLOB_ADJOINT,b_accel)
+ call compute_coupling_ocean(NSPEC_AB,NGLOB_AB, &
+ ibool,rmassx,rmassy,rmassz, &
+ rmass_ocean_load,accel, &
+ free_surface_normal,free_surface_ijk,free_surface_ispec, &
+ num_free_surface_faces,SIMULATION_TYPE, &
+ NGLOB_ADJOINT,b_accel)
else
! on GPU
- call elastic_ocean_load_cuda(Mesh_pointer,SIMULATION_TYPE)
+ call compute_coupling_ocean_cuda(Mesh_pointer)
endif
endif
@@ -378,7 +378,7 @@
! adjoint simulations
if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
else ! GPU_MODE == 1
- if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2)
+ if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,b_deltatover2)
endif
@@ -389,108 +389,6 @@
!-------------------------------------------------------------------------------------------------
!
-subroutine elastic_ocean_load(NSPEC_AB,NGLOB_AB, &
- ibool,rmass,rmass_ocean_load,accel, &
- free_surface_normal,free_surface_ijk,free_surface_ispec, &
- num_free_surface_faces,SIMULATION_TYPE, &
- NGLOB_ADJOINT,b_accel)
-
-! updates acceleration with ocean load term:
-! approximates ocean-bottom continuity of pressure & displacement for longer period waves (> ~20s ),
-! assuming incompressible fluid column above bathymetry ocean bottom
-
- implicit none
-
- include 'constants.h'
-
- integer :: NSPEC_AB,NGLOB_AB
-
- real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_AB),intent(inout) :: accel
- real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: rmass,rmass_ocean_load
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
-
- ! free surface
- integer :: num_free_surface_faces
- real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
- integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
- integer :: free_surface_ispec(num_free_surface_faces)
-
- ! adjoint simulations
- integer :: SIMULATION_TYPE,NGLOB_ADJOINT
- real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_ADJOINT):: b_accel
-
-! local parameters
- real(kind=CUSTOM_REAL) :: nx,ny,nz
- real(kind=CUSTOM_REAL) :: additional_term,force_normal_comp
- integer :: i,j,k,ispec,iglob
- integer :: igll,iface
- logical,dimension(NGLOB_AB) :: updated_dof_ocean_load
- ! adjoint locals
- real(kind=CUSTOM_REAL) :: b_additional_term,b_force_normal_comp
-
- ! initialize the updates
- updated_dof_ocean_load(:) = .false.
-
- ! for surface elements exactly at the top of the model (ocean bottom)
- do iface = 1,num_free_surface_faces
-
- ispec = free_surface_ispec(iface)
- do igll = 1, NGLLSQUARE
- i = free_surface_ijk(1,igll,iface)
- j = free_surface_ijk(2,igll,iface)
- k = free_surface_ijk(3,igll,iface)
-
- ! get global point number
- iglob = ibool(i,j,k,ispec)
-
- ! only update once
- if(.not. updated_dof_ocean_load(iglob)) then
-
- ! get normal
- nx = free_surface_normal(1,igll,iface)
- ny = free_surface_normal(2,igll,iface)
- nz = free_surface_normal(3,igll,iface)
-
- ! make updated component of right-hand side
- ! we divide by rmass() which is 1 / M
- ! we use the total force which includes the Coriolis term above
- force_normal_comp = ( accel(1,iglob)*nx + &
- accel(2,iglob)*ny + &
- accel(3,iglob)*nz ) / rmass(iglob)
-
- additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * force_normal_comp
-
- accel(1,iglob) = accel(1,iglob) + additional_term * nx
- accel(2,iglob) = accel(2,iglob) + additional_term * ny
- accel(3,iglob) = accel(3,iglob) + additional_term * nz
-
- ! adjoint simulations
- if (SIMULATION_TYPE == 3) then
- b_force_normal_comp = ( b_accel(1,iglob)*nx + &
- b_accel(2,iglob)*ny + &
- b_accel(3,iglob)*nz) / rmass(iglob)
- b_additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * b_force_normal_comp
-
- b_accel(1,iglob) = b_accel(1,iglob) + b_additional_term * nx
- b_accel(2,iglob) = b_accel(2,iglob) + b_additional_term * ny
- b_accel(3,iglob) = b_accel(3,iglob) + b_additional_term * nz
- endif !adjoint
-
- ! done with this point
- updated_dof_ocean_load(iglob) = .true.
-
- endif
-
- enddo ! igll
- enddo ! iface
-
-end subroutine elastic_ocean_load
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
! distributes routines according to chosen NGLLX in constants.h
!daniel: note -- i put it here rather than in compute_forces_elastic_Dev.f90 because compiler complains that:
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_acoustic.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_acoustic.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -147,7 +147,7 @@
! GPU_MODE == .true.
if( num_abs_boundary_faces > 0 ) &
call compute_stacey_acoustic_cuda(Mesh_pointer, phase_is_inner, &
- SIMULATION_TYPE,SAVE_FORWARD,b_absorb_potential)
+ SAVE_FORWARD,b_absorb_potential)
endif
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_elastic.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_elastic.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -166,7 +166,7 @@
! GPU_MODE == .true.
if( num_abs_boundary_faces > 0 ) &
call compute_stacey_elastic_cuda(Mesh_pointer,phase_is_inner, &
- SIMULATION_TYPE,SAVE_FORWARD,b_absorb_field)
+ SAVE_FORWARD,b_absorb_field)
endif
! adjoint simulations: stores absorbed wavefield part
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/create_color_image.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/create_color_image.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/create_color_image.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -103,7 +103,7 @@
use specfem_par,only: NGLOB_AB,NSPEC_AB,ibool,xstore,ystore,zstore,&
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,my_neighbours_ext_mesh, &
- ibool_interfaces_ext_mesh,prname
+ ibool_interfaces_ext_mesh !,prname
use constants,only: HUGEVAL,NGLLX,NGLLY,NGLLZ
implicit none
! local parameters
@@ -816,7 +816,7 @@
subroutine get_iglob_vp(iglob,ispec,vp)
use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,FOUR_THIRDS
- use specfem_par,only: mustore,kappastore,ibool,myrank,NSPEC_AB
+ use specfem_par,only: mustore,kappastore,ibool,myrank
use specfem_par_acoustic,only: ACOUSTIC_SIMULATION,rhostore
use specfem_par_elastic,only: ELASTIC_SIMULATION,rho_vp
implicit none
@@ -857,7 +857,7 @@
rhostore,ispec_is_acoustic, &
b_potential_acoustic,b_potential_dot_acoustic
use specfem_par_elastic,only: ELASTIC_SIMULATION,displ,veloc, &
- ispec_is_elastic,b_displ,b_veloc
+ ispec_is_elastic ! ,b_displ,b_veloc
use specfem_par,only: NSPEC_AB,NGLOB_AB,hprime_xx,hprime_yy,hprime_zz, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
ibool,SIMULATION_TYPE,GRAVITY
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/finalize_simulation.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/finalize_simulation.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -140,6 +140,18 @@
endif
endif
+ ! frees dynamically allocated memory
+
+ ! mass matrices
+ if( ELASTIC_SIMULATION ) then
+ deallocate(rmassx)
+ deallocate(rmassy)
+ deallocate(rmassz)
+ endif
+ if( ACOUSTIC_SIMULATION ) then
+ deallocate(rmass_acoustic)
+ endif
+
! close the main output file
if(myrank == 0) then
write(IMAIN,*)
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/initialize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/initialize_simulation.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/initialize_simulation.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -116,6 +116,8 @@
write(IMAIN,'(a)',advance='yes') ' external'
case( IMODEL_IPATI )
write(IMAIN,'(a)',advance='yes') ' ipati'
+ case( IMODEL_IPATI_WATER )
+ write(IMAIN,'(a)',advance='yes') ' ipati_water'
end select
write(IMAIN,*)
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -416,8 +416,8 @@
else
! on GPU
call it_update_displacement_ac_cuda(Mesh_pointer, NGLOB_AB, &
- deltat, deltatsqover2, deltatover2, &
- SIMULATION_TYPE, b_deltat, b_deltatsqover2, b_deltatover2)
+ deltat, deltatsqover2, deltatover2, &
+ b_deltat, b_deltatsqover2, b_deltatover2)
endif
! time marching potentials
@@ -457,7 +457,7 @@
! on GPU
! Includes SIM_TYPE 1 & 3 (for noise tomography)
call it_update_displacement_cuda(Mesh_pointer, size(displ), deltat, deltatsqover2,&
- deltatover2, SIMULATION_TYPE, b_deltat, b_deltatsqover2, b_deltatover2)
+ deltatover2, b_deltat, b_deltatsqover2, b_deltatover2)
endif
endif
@@ -821,7 +821,7 @@
! frees allocated memory on GPU
call prepare_cleanup_device(Mesh_pointer, &
- SIMULATION_TYPE,SAVE_FORWARD, &
+ SAVE_FORWARD, &
ACOUSTIC_SIMULATION,ELASTIC_SIMULATION, &
ABSORBING_CONDITIONS,NOISE_TOMOGRAPHY,COMPUTE_AND_STORE_STRAIN, &
ATTENUATION,ANISOTROPY,OCEANS, &
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/model_update.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/model_update.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/model_update.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -827,47 +827,47 @@
rmass_new(:) = 0._CUSTOM_REAL
+ ! note: this does not update the absorbing boundary contributions to the mass matrix
+ ! elastic mass matrix
do ispec=1,nspec
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
+ if( ispec_is_elastic(ispec) ) then
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
- weight = wxgll(i)*wygll(j)*wzgll(k)
- jacobianl = jacobianstore(i,j,k,ispec)
+ weight = wxgll(i)*wygll(j)*wzgll(k)
+ jacobianl = jacobianstore(i,j,k,ispec)
- if( myrank == 0) then
- print*, 'weight', weight
- print*, 'jacobianl', jacobianl
- endif
+ if( myrank == 0) then
+ print*, 'weight', weight
+ print*, 'jacobianl', jacobianl
+ endif
-! elastic mass matrix
- if( ispec_is_elastic(ispec) ) then
if(CUSTOM_REAL == SIZE_REAL) then
rmass_new(iglob) = rmass_new(iglob) + &
- sngl( dble(jacobianl) * weight * dble(rhostore_new(i,j,k,ispec)) )
+ sngl( dble(jacobianl) * weight * dble(rhostore_new(i,j,k,ispec)) )
else
- rmass_new(iglob) = rmass_new(iglob) + &
- jacobianl * weight * rhostore_new(i,j,k,ispec)
+ rmass_new(iglob) = rmass_new(iglob) + &
+ jacobianl * weight * rhostore_new(i,j,k,ispec)
endif
- endif
+ enddo
enddo
enddo
- enddo
+ endif
enddo ! nspec
call sync_all()
-
+ ! dummy allocations, arrays are not needed since the update here only works for elastic models
allocate(rmass_acoustic_new(NGLOB))
allocate(rmass_solid_poroelastic_new(NGLOB))
allocate(rmass_fluid_poroelastic_new(NGLOB))
rmass_acoustic_new = 0._CUSTOM_REAL
rmass_solid_poroelastic_new = 0._CUSTOM_REAL
rmass_fluid_poroelastic_new = 0._CUSTOM_REAL
-
-
+
!-------- attenuation -------
! read the proc*attenuation.vtk for the old model in LOCAL_PATH and store qmu_attenuation_store
@@ -979,8 +979,6 @@
abs_boundary_ijk,abs_boundary_ispec,num_abs_boundary_faces, &
free_surface_normal,free_surface_jacobian2Dw, &
free_surface_ijk,free_surface_ispec,num_free_surface_faces, &
- coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
- coupling_ac_el_ijk,coupling_ac_el_ispec,num_coupling_ac_el_faces, &
num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
max_nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
prname_new,SAVE_MESH_FILES,ANISOTROPY,NSPEC_ANISO, &
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -219,6 +219,14 @@
! the mass matrix needs to be assembled with MPI here once and for all
if(ACOUSTIC_SIMULATION) then
+
+ ! adds contributions
+ if( ABSORBING_CONDITIONS ) then
+ rmass_acoustic(:) = rmass_acoustic(:) + rmassz_acoustic(:)
+ ! not needed anymore
+ deallocate(rmassz_acoustic)
+ endif
+
call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_acoustic, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
@@ -231,21 +239,52 @@
endif
if(ELASTIC_SIMULATION) then
- call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass, &
+
+ ! switches to three-component mass matrix
+ if( ABSORBING_CONDITIONS ) then
+ ! adds boundary contributions
+ rmassx(:) = rmass(:) + rmassx(:)
+ rmassy(:) = rmass(:) + rmassy(:)
+ rmassz(:) = rmass(:) + rmassz(:)
+ else
+ rmassx(:) = rmass(:)
+ rmassy(:) = rmass(:)
+ rmassz(:) = rmass(:)
+ endif
+ ! not needed anymore
+ deallocate(rmass)
+
+ ! assemble mass matrix
+ call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmassx, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
my_neighbours_ext_mesh)
+ call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmassy, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh)
+ call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmassz, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh)
! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally
- where(rmass <= 0._CUSTOM_REAL) rmass = 1._CUSTOM_REAL
- rmass(:) = 1._CUSTOM_REAL / rmass(:)
+ where(rmassx <= 0._CUSTOM_REAL) rmassx = 1._CUSTOM_REAL
+ where(rmassy <= 0._CUSTOM_REAL) rmassy = 1._CUSTOM_REAL
+ where(rmassz <= 0._CUSTOM_REAL) rmassz = 1._CUSTOM_REAL
+ rmassx(:) = 1._CUSTOM_REAL / rmassx(:)
+ rmassy(:) = 1._CUSTOM_REAL / rmassy(:)
+ rmassz(:) = 1._CUSTOM_REAL / rmassz(:)
+ ! ocean load
if(OCEANS ) then
- if( minval(rmass_ocean_load(:)) <= 0._CUSTOM_REAL) &
- call exit_MPI(myrank,'negative ocean load mass matrix term')
- rmass_ocean_load(:) = 1. / rmass_ocean_load(:)
+ call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_ocean_load, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh)
+ where(rmass_ocean_load <= 0._CUSTOM_REAL) rmass_ocean_load = 1._CUSTOM_REAL
+ rmass_ocean_load(:) = 1._CUSTOM_REAL / rmass_ocean_load(:)
endif
-
endif
if(POROELASTIC_SIMULATION) then
@@ -1077,7 +1116,7 @@
ispec_is_acoustic, &
NOISE_TOMOGRAPHY,num_free_surface_faces, &
free_surface_ispec,free_surface_ijk, &
- ABSORBING_CONDITIONS,b_reclen_potential,b_absorb_potential, &
+ b_reclen_potential,b_absorb_potential, &
ELASTIC_SIMULATION, num_coupling_ac_el_faces, &
coupling_ac_el_ispec,coupling_ac_el_ijk, &
coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
@@ -1086,19 +1125,19 @@
if( SIMULATION_TYPE == 3 ) &
call prepare_fields_acoustic_adj_dev(Mesh_pointer, &
- SIMULATION_TYPE, &
- APPROXIMATE_HESS_KL)
+ APPROXIMATE_HESS_KL)
endif
! prepares fields on GPU for elastic simulations
if( ELASTIC_SIMULATION ) then
call prepare_fields_elastic_device(Mesh_pointer, NDIM*NGLOB_AB, &
- rmass,rho_vp,rho_vs, &
+ rmassx,rmassy,rmassz, &
+ rho_vp,rho_vs, &
num_phase_ispec_elastic,phase_ispec_inner_elastic, &
ispec_is_elastic, &
- ABSORBING_CONDITIONS,b_absorb_field,b_reclen_field, &
- SIMULATION_TYPE,SAVE_FORWARD, &
+ b_absorb_field,b_reclen_field, &
+ SAVE_FORWARD, &
COMPUTE_AND_STORE_STRAIN, &
epsilondev_xx,epsilondev_yy,epsilondev_xy, &
epsilondev_xz,epsilondev_yz, &
@@ -1122,7 +1161,6 @@
if( SIMULATION_TYPE == 3 ) &
call prepare_fields_elastic_adj_dev(Mesh_pointer, NDIM*NGLOB_AB, &
- SIMULATION_TYPE, &
COMPUTE_AND_STORE_STRAIN, &
epsilon_trace_over_3, &
b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
@@ -1155,7 +1193,7 @@
free_surface_ispec, &
free_surface_ijk, &
num_free_surface_faces, &
- SIMULATION_TYPE,NOISE_TOMOGRAPHY, &
+ NOISE_TOMOGRAPHY, &
NSTEP,noise_sourcearray, &
normal_x_noise,normal_y_noise,normal_z_noise, &
mask_noise,free_surface_jacobian2Dw)
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/read_mesh_databases.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/read_mesh_databases.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -95,6 +95,12 @@
! mass matrix, density
allocate(rmass_acoustic(NGLOB_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array rmass_acoustic'
+
+ ! initializes mass matrix contribution
+ allocate(rmassz_acoustic(NGLOB_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array rmassz_acoustic'
+ rmassz_acoustic(:) = 0._CUSTOM_REAL
+
allocate(rhostore(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array rhostore'
@@ -120,8 +126,20 @@
allocate(accel_adj_coupling(NDIM,NGLOB_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array accel_adj_coupling'
endif
+
+ ! allocates mass matrix
allocate(rmass(NGLOB_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array rmass'
+ ! initializes mass matrix contributions
+ allocate(rmassx(NGLOB_AB), &
+ rmassy(NGLOB_AB), &
+ rmassz(NGLOB_AB), &
+ stat=ier)
+ if( ier /= 0 ) stop 'error allocating array rmassx,rmassy,rmassz'
+ rmassx(:) = 0._CUSTOM_REAL
+ rmassy(:) = 0._CUSTOM_REAL
+ rmassz(:) = 0._CUSTOM_REAL
+
allocate(rho_vp(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array rho_vp'
allocate(rho_vs(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
@@ -312,6 +330,15 @@
read(27) abs_boundary_ijk
read(27) abs_boundary_jacobian2Dw
read(27) abs_boundary_normal
+ ! reads mass matrix contributions on boundaries
+ if( ELASTIC_SIMULATION ) then
+ read(27) rmassx
+ read(27) rmassy
+ read(27) rmassz
+ endif
+ if( ACOUSTIC_SIMULATION ) then
+ read(27) rmassz_acoustic
+ endif
endif
! free surface
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/save_external_bin_m_up.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/save_external_bin_m_up.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/save_external_bin_m_up.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -45,9 +45,6 @@
free_surface_normal,free_surface_jacobian2Dw, &
free_surface_ijk,free_surface_ispec, &
num_free_surface_faces, &
- coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
- coupling_ac_el_ijk,coupling_ac_el_ispec, &
- num_coupling_ac_el_faces, &
num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
max_nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
prname,SAVE_MESH_FILES, &
@@ -58,6 +55,24 @@
c55store,c56store,c66store, &
ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
+ use specfem_par,only: &
+ ispec_is_inner
+
+ use specfem_par_elastic,only: &
+ rmassx,rmassy,rmassz, &
+ nspec_inner_elastic,nspec_outer_elastic,num_phase_ispec_elastic,phase_ispec_inner_elastic, &
+ num_colors_outer_elastic,num_colors_inner_elastic,num_elem_colors_elastic
+
+ use specfem_par_acoustic,only: &
+ rmassz_acoustic,num_coupling_ac_po_faces, &
+ num_coupling_ac_el_faces,coupling_ac_el_ijk,coupling_ac_el_ispec, &
+ nspec_inner_acoustic,nspec_outer_acoustic,num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
+ num_colors_outer_acoustic,num_colors_inner_acoustic,num_elem_colors_acoustic
+
+ use specfem_par_poroelastic,only: &
+ num_coupling_el_po_faces, &
+ nspec_inner_poroelastic,nspec_outer_poroelastic,num_phase_ispec_poroelastic,phase_ispec_inner_poroelastic
+
implicit none
include "constants.h"
@@ -76,6 +91,7 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rhostore,kappastore,mustore
real(kind=CUSTOM_REAL), dimension(nglob) :: rmass,rmass_acoustic, &
rmass_solid_poroelastic,rmass_fluid_poroelastic
+
! ocean load
logical :: OCEANS
integer :: NGLOB_OCEAN
@@ -99,13 +115,6 @@
integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
integer :: free_surface_ispec(num_free_surface_faces)
-! acoustic-elastic coupling surface
- integer :: num_coupling_ac_el_faces
- real(kind=CUSTOM_REAL) :: coupling_ac_el_normal(NDIM,NGLLSQUARE,num_coupling_ac_el_faces)
- real(kind=CUSTOM_REAL) :: coupling_ac_el_jacobian2Dw(NGLLSQUARE,num_coupling_ac_el_faces)
- integer :: coupling_ac_el_ijk(3,NGLLSQUARE,num_coupling_ac_el_faces)
- integer :: coupling_ac_el_ispec(num_coupling_ac_el_faces)
-
! MPI interfaces
integer :: num_interfaces_ext_mesh
integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh
@@ -192,7 +201,6 @@
!pll Stacey
write(IOUT) rho_vp
write(IOUT) rho_vs
-
endif
! poroelastic
@@ -200,39 +208,65 @@
if( POROELASTIC_SIMULATION ) then
write(IOUT) rmass_solid_poroelastic
write(IOUT) rmass_fluid_poroelastic
+ stop 'model update with poroelastic domains not supported yet'
endif
! absorbing boundary surface
write(IOUT) num_abs_boundary_faces
- write(IOUT) abs_boundary_ispec
- write(IOUT) abs_boundary_ijk
- write(IOUT) abs_boundary_jacobian2Dw
- write(IOUT) abs_boundary_normal
-
+ if(num_abs_boundary_faces > 0 ) then
+ write(IOUT) abs_boundary_ispec
+ write(IOUT) abs_boundary_ijk
+ write(IOUT) abs_boundary_jacobian2Dw
+ write(IOUT) abs_boundary_normal
+ ! store mass matrix contributions
+ if(ELASTIC_SIMULATION) then
+ write(IOUT) rmassx
+ write(IOUT) rmassy
+ write(IOUT) rmassz
+ endif
+ if(ACOUSTIC_SIMULATION) then
+ write(IOUT) rmassz_acoustic
+ endif
+ endif
+
! free surface
write(IOUT) num_free_surface_faces
- write(IOUT) free_surface_ispec
- write(IOUT) free_surface_ijk
- write(IOUT) free_surface_jacobian2Dw
- write(IOUT) free_surface_normal
+ if( num_free_surface_faces > 0 ) then
+ write(IOUT) free_surface_ispec
+ write(IOUT) free_surface_ijk
+ write(IOUT) free_surface_jacobian2Dw
+ write(IOUT) free_surface_normal
+ endif
! acoustic-elastic coupling surface
write(IOUT) num_coupling_ac_el_faces
- write(IOUT) coupling_ac_el_ispec
- write(IOUT) coupling_ac_el_ijk
- write(IOUT) coupling_ac_el_jacobian2Dw
- write(IOUT) coupling_ac_el_normal
+ if( num_coupling_ac_el_faces > 0 ) then
+ stop 'coupling ac_po not updated yet'
+ endif
+! acoustic-poroelastic coupling surface
+ write(IOUT) num_coupling_ac_po_faces
+ if( num_coupling_ac_po_faces > 0 ) then
+ stop 'coupling ac_po not updated yet'
+ endif
+
+! elastic-poroelastic coupling surface
+ write(IOUT) num_coupling_el_po_faces
+ if( num_coupling_el_po_faces > 0 ) then
+ stop 'coupling ac_po not updated yet'
+ endif
+
!MPI interfaces
write(IOUT) num_interfaces_ext_mesh
- write(IOUT) max_nibool_interfaces_ext_mesh !magnoni
- write(IOUT) my_neighbours_ext_mesh
- write(IOUT) nibool_interfaces_ext_mesh !magnoni
- write(IOUT) ibool_interfaces_ext_mesh !magnoni
+ if( num_interfaces_ext_mesh > 0 ) then
+ write(IOUT) max_nibool_interfaces_ext_mesh
+ write(IOUT) my_neighbours_ext_mesh
+ write(IOUT) nibool_interfaces_ext_mesh
+ write(IOUT) ibool_interfaces_ext_mesh !magnoni
+ endif
-
! anisotropy
- if( ANISOTROPY ) then
+ if( ELASTIC_SIMULATION .and. ANISOTROPY ) then
write(IOUT) c11store
write(IOUT) c12store
write(IOUT) c13store
@@ -256,6 +290,39 @@
write(IOUT) c66store
endif
+! inner/outer elements
+ write(IOUT) ispec_is_inner
+
+ if( ACOUSTIC_SIMULATION ) then
+ write(IOUT) nspec_inner_acoustic,nspec_outer_acoustic
+ write(IOUT) num_phase_ispec_acoustic
+ if(num_phase_ispec_acoustic > 0 ) write(IOUT) phase_ispec_inner_acoustic
+ endif
+
+ if( ELASTIC_SIMULATION ) then
+ write(IOUT) nspec_inner_elastic,nspec_outer_elastic
+ write(IOUT) num_phase_ispec_elastic
+ if(num_phase_ispec_elastic > 0 ) write(IOUT) phase_ispec_inner_elastic
+ endif
+
+ if( POROELASTIC_SIMULATION ) then
+ write(IOUT) nspec_inner_poroelastic,nspec_outer_poroelastic
+ write(IOUT) num_phase_ispec_poroelastic
+ if(num_phase_ispec_poroelastic > 0 ) write(IOUT) phase_ispec_inner_poroelastic
+ endif
+
+ ! mesh coloring
+ if( USE_MESH_COLORING_GPU ) then
+ if( ACOUSTIC_SIMULATION ) then
+ write(IOUT) num_colors_outer_acoustic,num_colors_inner_acoustic
+ write(IOUT) num_elem_colors_acoustic
+ endif
+ if( ELASTIC_SIMULATION ) then
+ write(IOUT) num_colors_outer_elastic,num_colors_inner_elastic
+ write(IOUT) num_elem_colors_elastic
+ endif
+ endif
+
close(IOUT)
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -297,6 +297,7 @@
real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass
! Stacey
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassx,rmassy,rmassz
real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rho_vp,rho_vs
! anisotropic
@@ -381,6 +382,7 @@
! mass matrix
real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_acoustic
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassz_acoustic
! acoustic-elastic coupling surface
real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: coupling_ac_el_normal
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/write_output_SU.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/write_output_SU.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/write_output_SU.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -38,17 +38,12 @@
character(len=256) procname,final_LOCAL_PATH
integer :: irec_local,irec
- ! headers
- integer,parameter :: nheader=240 ! 240 bytes
-!! DK DK integer(kind=2) :: i2head(nheader/2) ! 2-byte-integer
- integer(kind=4) :: i4head(nheader/4) ! 4-byte-integer
-!! DK DK equivalence (i2head,i4head) ! share the same 240-byte-memory
-
double precision, allocatable, dimension(:) :: x_found,y_found,z_found
double precision :: x_found_source,y_found_source,z_found_source
real(kind=4),dimension(:),allocatable :: rtmpseis
- integer :: ier
+ real :: dx
+ integer :: i,ier
allocate(x_found(nrec),y_found(nrec),z_found(nrec),stat=ier)
if( ier /= 0 ) stop 'error allocating array x_found y_found z_found'
@@ -83,87 +78,141 @@
! write seismograms (dx)
open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dx_SU' ,&
- form='unformatted', access='direct', recl=240+4*NSTEP, iostat=ier)
+ status='unknown', access='direct', recl=4, iostat=ier)
+
if( ier /= 0 ) stop 'error opening ***_dx_SU file'
+ ! receiver interval
+ if( nrec > 1) then
+ dx = SNGL(x_found(2)-x_found(1))
+ else
+ dx = 0.0
+ endif
+
+
do irec_local = 1,nrec_local
- irec = number_receiver_global(irec_local)
- i4head(1) =irec
- i4head(11) =z_found(irec)
- i4head(13) =z_found_source
- i4head(19) =x_found_source !utm_x_source(1)
- i4head(20) =y_found_source !utm_y_source(1)
- i4head(21) =x_found(irec) !stutm_x(irec)
- i4head(22) =y_found(irec) !stutm_y(irec)
-!! DK DK i2head(58) =NSTEP
-!! DK DK i2head(59) =DT*1.0d6
-!! DK DK almost all modern processors are little-endian, thus we do not need an equivalence statement any more
-!! DK DK (some compilers give a warning message for the equivalent statement that we used to have above)
- i4head(58/2) = NSTEP + 65536*int(DT*1.0d6)
+ irec = number_receiver_global(irec_local)
- ! convert to real 4-byte
- rtmpseis(1:NSTEP) = seismograms_d(1,irec_local,1:NSTEP)
+ ! write section header
+ call write_SU_header(irec_local,irec,nrec,NSTEP,DT,dx, &
+ x_found(irec),y_found(irec),z_found(irec),x_found_source,y_found_source,z_found_source)
- ! write record
- write(IOUT_SU,rec=irec_local) i4head, rtmpseis
+ ! convert trace to real 4-byte
+ if( CUSTOM_REAL == SIZE_REAL) then
+ rtmpseis(1:NSTEP) = seismograms_d(1,irec_local,1:NSTEP)
+ else
+ rtmpseis(1:NSTEP) = sngl(seismograms_d(1,irec_local,1:NSTEP))
+ endif
+ do i=1,NSTEP
+ write(IOUT_SU,rec=irec_local*60+(irec_local-1)*NSTEP+i) rtmpseis(i)
+ enddo
+
enddo
close(IOUT_SU)
! write seismograms (dy)
open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dy_SU' ,&
- form='unformatted', access='direct', recl=240+4*NSTEP, iostat=ier)
+ status='unknown', access='direct', recl=4, iostat=ier)
+
if( ier /= 0 ) stop 'error opening ***_dy_SU file'
do irec_local = 1,nrec_local
- irec = number_receiver_global(irec_local)
- i4head(1) =irec
- i4head(11) =z_found(irec)
- i4head(13) =z_found_source
- i4head(19) =x_found_source !utm_x_source(1)
- i4head(20) =y_found_source !utm_y_source(1)
- i4head(21) =x_found(irec) !stutm_x(irec)
- i4head(22) =y_found(irec) !stutm_y(irec)
-!! DK DK i2head(58) =NSTEP
-!! DK DK i2head(59) =DT*1.0d6
-!! DK DK almost all modern processors are little-endian, thus we do not need an equivalence statement any more
-!! DK DK (some compilers give a warning message for the equivalent statement that we used to have above)
- i4head(58/2) = NSTEP + 65536*int(DT*1.0d6)
+ irec = number_receiver_global(irec_local)
- ! convert to real 4-byte
- rtmpseis(1:NSTEP) = seismograms_d(2,irec_local,1:NSTEP)
+ ! write section header
+ call write_SU_header(irec_local,irec,nrec,NSTEP,DT,dx, &
+ x_found(irec),y_found(irec),z_found(irec),x_found_source,y_found_source,z_found_source)
- ! write record
- write(IOUT_SU,rec=irec_local) i4head, rtmpseis
+ ! convert trace to real 4-byte
+ if( CUSTOM_REAL == SIZE_REAL) then
+ rtmpseis(1:NSTEP) = seismograms_d(2,irec_local,1:NSTEP)
+ else
+ rtmpseis(1:NSTEP) = sngl(seismograms_d(2,irec_local,1:NSTEP))
+ endif
+ do i=1,NSTEP
+ write(IOUT_SU,rec=irec_local*60+(irec_local-1)*NSTEP+i) rtmpseis(i)
+ enddo
enddo
close(IOUT_SU)
! write seismograms (dz)
open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dz_SU' ,&
- form='unformatted', access='direct', recl=240+4*NSTEP, iostat=ier)
+ status='unknown', access='direct', recl=4, iostat=ier)
+
if( ier /= 0 ) stop 'error opening ***_dz_SU file'
do irec_local = 1,nrec_local
- irec = number_receiver_global(irec_local)
- i4head(1) =irec
- i4head(11) =z_found(irec)
- i4head(13) =z_found_source
- i4head(19) =x_found_source !utm_x_source(1)
- i4head(20) =y_found_source !utm_y_source(1)
- i4head(21) =x_found(irec) !stutm_x(irec)
- i4head(22) =y_found(irec) !stutm_y(irec)
-!! DK DK i2head(58) =NSTEP
-!! DK DK i2head(59) =DT*1.0d6
-!! DK DK almost all modern processors are little-endian, thus we do not need an equivalence statement any more
-!! DK DK (some compilers give a warning message for the equivalent statement that we used to have above)
- i4head(58/2) = NSTEP + 65536*int(DT*1.0d6)
+ irec = number_receiver_global(irec_local)
- ! convert to real 4-byte
- rtmpseis(1:NSTEP) = seismograms_d(3,irec_local,1:NSTEP)
+ ! write section header
+ call write_SU_header(irec_local,irec,nrec,NSTEP,DT,dx, &
+ x_found(irec),y_found(irec),z_found(irec),x_found_source,y_found_source,z_found_source)
- ! write record
- write(IOUT_SU,rec=irec_local) i4head, rtmpseis
+ ! convert trace to real 4-byte
+ if( CUSTOM_REAL == SIZE_REAL) then
+ rtmpseis(1:NSTEP) = seismograms_d(3,irec_local,1:NSTEP)
+ else
+ rtmpseis(1:NSTEP) = sngl(seismograms_d(3,irec_local,1:NSTEP))
+ endif
+ do i=1,NSTEP
+ write(IOUT_SU,rec=irec_local*60+(irec_local-1)*NSTEP+i) rtmpseis(i)
+ enddo
+
enddo
close(IOUT_SU)
end subroutine write_output_SU
+!
+!------------------------------------------------------------------------------------------------------
+!
+
+ subroutine write_SU_header(irec_local,irec,nrec,NSTEP,DT,dx, &
+ x_found,y_found,z_found,x_found_source,y_found_source,z_found_source)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: irec_local,irec,nrec
+ integer :: NSTEP
+ double precision :: x_found,y_found,z_found
+ double precision :: x_found_source,y_found_source,z_found_source
+ double precision :: DT
+ real :: dx
+
+ ! local parameters
+ integer(kind=2) :: header2(2)
+
+ ! write SU headers (refer to Seismic Unix for details)
+ write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+1) irec ! receiver ID
+ write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+10) NINT(x_found-x_found_source) ! offset
+ write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+11) NINT(z_found)
+
+ write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+13) NINT(z_found_source) ! source location
+ write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+19) NINT(x_found_source) ! source location
+ write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+20) NINT(y_found_source) ! source location
+
+ write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+21) NINT(x_found) ! receiver location xr
+ write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+22) NINT(y_found) ! receiver location zr
+
+ if (nrec>1) write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+48) dx ! receiver interval
+
+ ! time steps
+ header2(1)=0 ! dummy
+ header2(2)=NSTEP
+ write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+29) header2
+
+ ! time increment
+ if( NINT(DT*1.0d6) < 65536 ) then
+ header2(1)=NINT(DT*1.0d6) ! deltat (unit: 10^{-6} second)
+ else if( NINT(DT*1.0d3) < 65536 ) then
+ header2(1)=NINT(DT*1.0d3) ! deltat (unit: 10^{-3} second)
+ else
+ header2(1)=NINT(DT) ! deltat (unit: 10^{0} second)
+ endif
+ header2(2)=0 ! dummy
+ write(IOUT_SU,rec=(irec_local-1)*60+(irec_local-1)*NSTEP+30) header2
+
+ end subroutine write_SU_header
+
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/write_seismograms.f90 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/write_seismograms.f90 2012-09-03 03:54:59 UTC (rev 20674)
@@ -55,11 +55,10 @@
if( ACOUSTIC_SIMULATION ) then
! only copy corresponding elements to CPU host
! timing: Elapsed time: 5.230904e-04
- call transfer_station_ac_from_device( &
- potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
+ call transfer_station_ac_from_device(potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
Mesh_pointer,number_receiver_global, &
- ispec_selected_rec,ispec_selected_source,ibool,SIMULATION_TYPE)
+ ispec_selected_rec,ispec_selected_source,ibool)
! alternative: transfers whole fields
! timing: Elapsed time: 4.138947e-03
@@ -71,7 +70,6 @@
if( ELASTIC_SIMULATION ) then
if(USE_CUDA_SEISMOGRAMS) then
call transfer_seismograms_el_from_d(nrec_local,Mesh_pointer, &
- SIMULATION_TYPE,&
seismograms_d,seismograms_v,seismograms_a,&
it)
else
@@ -79,7 +77,7 @@
b_displ,b_veloc,b_accel, &
Mesh_pointer,number_receiver_global, &
ispec_selected_rec,ispec_selected_source, &
- ibool,SIMULATION_TYPE)
+ ibool)
endif
! alternative: transfers whole fields
! call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
Modified: seismo/3D/SPECFEM3D/trunk/utils/update_headers_change_word_f90.pl
===================================================================
--- seismo/3D/SPECFEM3D/trunk/utils/update_headers_change_word_f90.pl 2012-09-03 02:09:56 UTC (rev 20673)
+++ seismo/3D/SPECFEM3D/trunk/utils/update_headers_change_word_f90.pl 2012-09-03 03:54:59 UTC (rev 20674)
@@ -16,7 +16,7 @@
# in the code (which is dangerous, but really easier to program...)
#
- at objects = `ls ../src/*/*.f90 ../src/*/*.F90 ../src/*/*.c ../src/*/*.cu ../src/*/*.h.in ../src/*/*.h`;
+ at objects = `ls src/*/*.f90 src/*/*.F90 src/*/*.c src/*/*.cu src/*/*.h.in src/*/*.h`;
foreach $name (@objects) {
chop $name;
More information about the CIG-COMMITS
mailing list