[cig-commits] r19129 - in seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src: cuda decompose_mesh_SCOTCH generate_databases shared specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Sat Oct 29 19:25:30 PDT 2011
Author: danielpeter
Date: 2011-10-29 19:25:28 -0700 (Sat, 29 Oct 2011)
New Revision: 19129
Modified:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/decompose_mesh_SCOTCH.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/part_decompose_mesh_SCOTCH.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/program_decompose_mesh_SCOTCH.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/calc_jacobian.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_global.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/memory_eval.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_external_values.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_tomography.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_arrays_source.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/convolve_source_timefunction.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_name_database.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_serial_name_database.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/define_derivation_matrices.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/exit_mpi.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_attenuation_model.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_cmt.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_jacobian_boundaries.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape2D.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape3D.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/gll_library.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/hex_nodes.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/lagrange_poly.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_topo_bathy_file.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_value_parameters.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/recompute_jacobian.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_header_file.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/smooth_vol_data.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/utm_geo.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/comp_source_time_function.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_receivers.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_source.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/save_adjoint_kernels.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_GLL_points.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
Log:
updates preparation routines; kernels tested w/ absorbing boundaries; version used for HP node benchmark
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -45,17 +45,17 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(check_max_norm_displ_gpu,
CHECK_MAX_NORM_DISPL_GPU)(int* size, float* displ,long* Mesh_pointer_f,int* announceID) {
TRACE("check_max_norm_displ_gpu");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
cudaMemcpy(displ, mp->d_displ,*size*sizeof(float),cudaMemcpyDeviceToHost);
float maxnorm=0;
-
+
for(int i=0;i<*size;i++) {
maxnorm = MAX(maxnorm,fabsf(displ[i]));
}
@@ -64,7 +64,7 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(check_max_norm_vector,
CHECK_MAX_NORM_VECTOR)(int* size, float* vector1, int* announceID) {
@@ -85,14 +85,14 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(check_max_norm_displ,
CHECK_MAX_NORM_DISPL)(int* size, float* displ, int* announceID) {
TRACE("check_max_norm_displ");
float maxnorm=0;
-
+
for(int i=0;i<*size;i++) {
maxnorm = MAX(maxnorm,fabsf(displ[i]));
}
@@ -101,22 +101,22 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(check_max_norm_b_displ_gpu,
CHECK_MAX_NORM_B_DISPL_GPU)(int* size, float* b_displ,long* Mesh_pointer_f,int* announceID) {
TRACE("check_max_norm_b_displ_gpu");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
float* b_accel = (float*)malloc(*size*sizeof(float));
-
+
cudaMemcpy(b_displ, mp->d_b_displ,*size*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(float),cudaMemcpyDeviceToHost);
float maxnorm=0;
float maxnorm_accel=0;
-
+
for(int i=0;i<*size;i++) {
maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
maxnorm_accel = MAX(maxnorm,fabsf(b_accel[i]));
@@ -128,18 +128,18 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(check_max_norm_b_accel_gpu,
CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, float* b_accel,long* Mesh_pointer_f,int* announceID) {
TRACE("check_max_norm_b_accel_gpu");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(float),cudaMemcpyDeviceToHost);
float maxnorm=0;
-
+
for(int i=0;i<*size;i++) {
maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
}
@@ -148,18 +148,18 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(check_max_norm_b_veloc_gpu,
CHECK_MAX_NORM_B_VELOC_GPU)(int* size, float* b_veloc,long* Mesh_pointer_f,int* announceID) {
TRACE("check_max_norm_b_veloc_gpu");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
cudaMemcpy(b_veloc, mp->d_b_veloc,*size*sizeof(float),cudaMemcpyDeviceToHost);
float maxnorm=0;
-
+
for(int i=0;i<*size;i++) {
maxnorm = MAX(maxnorm,fabsf(b_veloc[i]));
}
@@ -168,14 +168,14 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(check_max_norm_b_displ,
CHECK_MAX_NORM_B_DISPL)(int* size, float* b_displ,int* announceID) {
TRACE("check_max_norm_b_displ");
-
+
float maxnorm=0;
-
+
for(int i=0;i<*size;i++) {
maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
}
@@ -184,14 +184,14 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(check_max_norm_b_accel,
CHECK_MAX_NORM_B_ACCEL)(int* size, float* b_accel,int* announceID) {
TRACE("check_max_norm_b_accel");
-
+
float maxnorm=0;
-
+
for(int i=0;i<*size;i++) {
maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
}
@@ -200,7 +200,7 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(check_error_vectors,
CHECK_ERROR_VECTORS)(int* sizef, float* vector1,float* vector2) {
@@ -213,9 +213,9 @@
double temp;
double maxerr=0;
int maxerrorloc;
-
+
for(int i=0;i<size;++i) {
- temp = vector1[i]-vector2[i];
+ temp = vector1[i]-vector2[i];
diff2 += temp*temp;
sum += vector1[i]*vector1[i];
if(maxerr < fabsf(temp)) {
@@ -224,17 +224,18 @@
}
}
- printf("rel error = %f, maxerr = %e @ %d\n",diff2/sum,maxerr,maxerrorloc);
+ printf("rel error = %f, maxerr = %e @ %d\n",diff2/sum,maxerr,maxerrorloc);
int myrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
- if(myrank==0) {
+ if(myrank == 0) {
for(int i=maxerrorloc;i>maxerrorloc-5;i--) {
printf("[%d]: %e vs. %e\n",i,vector1[i],vector2[i]);
}
}
-
+
}
+
/* ----------------------------------------------------------------------------------------------- */
// Auxiliary functions
@@ -244,9 +245,9 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(get_max_accel,
- GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {
+ GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {
TRACE("get_max_accel");
@@ -272,11 +273,11 @@
/* ----------------------------------------------------------------------------------------------- */
__global__ void get_maximum_kernel(float* array, int size, float* d_max){
-
- /* simplest version: uses only 1 thread
+
+ /* simplest version: uses only 1 thread
float max;
max = 0;
- // finds maximum value in array
+ // finds maximum value in array
if( size > 0 ){
max = abs(array[0]);
for( int i=1; i < size; i++){
@@ -285,62 +286,62 @@
}
*d_max = max;
*/
-
+
// reduction example:
__shared__ float sdata[256] ;
-
+
// load shared mem
unsigned int tid = threadIdx.x;
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
-
+
// loads absolute values into shared memory
sdata[tid] = (i < size) ? fabs(array[i]) : 0.0 ;
-
+
__syncthreads();
-
+
// do reduction in shared mem
- for(unsigned int s=blockDim.x/2; s>0; s>>=1)
+ for(unsigned int s=blockDim.x/2; s>0; s>>=1)
{
if (tid < s){
- // summation:
+ // summation:
//sdata[tid] += sdata[tid + s];
- // maximum:
+ // maximum:
if( sdata[tid] < sdata[tid + s] ) sdata[tid] = sdata[tid + s];
}
__syncthreads();
}
-
+
// write result for this block to global mem
- if (tid == 0) d_max[blockIdx.x] = sdata[0];
-
+ if (tid == 0) d_max[blockIdx.x] = sdata[0];
+
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(get_norm_acoustic_from_device_cuda,
- GET_NORM_ACOUSTIC_FROM_DEVICE_CUDA)(float* norm,
+void FC_FUNC_(get_norm_acoustic_from_device,
+ GET_NORM_ACOUSTIC_FROM_DEVICE)(float* norm,
long* Mesh_pointer_f,
int* SIMULATION_TYPE) {
-
-TRACE("get_norm_acoustic_from_device_cuda");
+
+TRACE("get_norm_acoustic_from_device");
//double start_time = get_time();
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
float max;
float *d_max;
-
-
-
+
+
+
max = 0;
-
+
/* way 1 : timing Elapsed time: 8.464813e-03
float* h_array;
h_array = (float*)calloc(mp->NGLOB_AB,sizeof(float));
-
+
print_CUDA_error_if_any(cudaMemcpy(h_array,mp->d_potential_dot_dot_acoustic,
sizeof(float)*(mp->NGLOB_AB),cudaMemcpyDeviceToHost),131);
-
+
// finds maximum value in array
max = h_array[0];
for( int i=1; i < mp->NGLOB_AB; i++){
@@ -348,72 +349,72 @@
}
free(h_array);
*/
-
+
/* way 2: timing Elapsed time: 8.818102e-02
// launch simple kernel
cudaMalloc((void**)&d_max,sizeof(float));
-
+
dim3 grid(1,1);
dim3 threads(1,1,1);
-
+
get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
mp->NGLOB_AB,
- d_max);
+ d_max);
print_CUDA_error_if_any(cudaMemcpy(&max,d_max, sizeof(float), cudaMemcpyDeviceToHost),222);
-
+
cudaFree(d_max);
*/
-
+
// way 2 b: timing Elapsed time: 1.236916e-03
// launch simple reduction kernel
float* h_max;
int blocksize = 256;
-
+
int num_blocks_x = ceil(mp->NGLOB_AB/blocksize);
//printf("num_blocks_x %i \n",num_blocks_x);
-
- h_max = (float*) calloc(num_blocks_x,sizeof(float));
+
+ h_max = (float*) calloc(num_blocks_x,sizeof(float));
cudaMalloc((void**)&d_max,num_blocks_x*sizeof(float));
-
+
dim3 grid(num_blocks_x,1);
dim3 threads(blocksize,1,1);
-
- if(*SIMULATION_TYPE == 1 ){
+
+ if(*SIMULATION_TYPE == 1 ){
get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
mp->NGLOB_AB,
- d_max);
+ d_max);
}
-
+
if(*SIMULATION_TYPE == 3 ){
get_maximum_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
mp->NGLOB_AB,
- d_max);
- }
-
+ d_max);
+ }
+
print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(float),cudaMemcpyDeviceToHost),222);
-
+
// determines max for all blocks
max = h_max[0];
for(int i=1;i<num_blocks_x;i++) {
if( max < h_max[i]) max = h_max[i];
}
-
+
cudaFree(d_max);
free(h_max);
-
+
/* way 3: doesn't work properly...
cublasStatus status;
-
- // Initialize CUBLAS
- status = cublasInit();
+
+ // Initialize CUBLAS
+ status = cublasInit();
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! CUBLAS initialization error\n");
exit(1);
}
-
+
// cublas function: cublasIsamax
- // finds the smallest index of the maximum magnitude element of single
- // precision vector x
+ // finds the smallest index of the maximum magnitude element of single
+ // precision vector x
int incr = 1;
int imax = 0;
imax = cublasIsamax(mp->NGLOB_AB,(float*)mp->d_potential_dot_dot_acoustic, incr);
@@ -421,29 +422,29 @@
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! CUBLAS error in cublasIsamax\n");
exit(1);
- }
-
+ }
+
print_CUDA_error_if_any(cudaMemcpy(&max,&(mp->d_potential_dot_dot_acoustic[imax]), sizeof(float), cudaMemcpyDeviceToHost),222);
-
+
printf("maximum %i %i %f \n",mp->NGLOB_AB,imax,max);
-
- // Shutdown
+
+ // Shutdown
status = cublasShutdown();
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! shutdown error (A)\n");
exit(1);
}
-
+
*/
-
+
// return result
- *norm = max;
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ *norm = max;
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
- exit_on_cuda_error("after get_norm_acoustic_from_device_cuda");
-#endif
+ exit_on_cuda_error("after get_norm_acoustic_from_device");
+#endif
}
/* ----------------------------------------------------------------------------------------------- */
@@ -453,99 +454,99 @@
/* ----------------------------------------------------------------------------------------------- */
__global__ void get_maximum_vector_kernel(float* array, int size, float* d_max){
-
+
// reduction example:
__shared__ float sdata[256] ;
-
+
// load shared mem
unsigned int tid = threadIdx.x;
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
-
- // loads values into shared memory: assume array is a vector array
+
+ // loads values into shared memory: assume array is a vector array
sdata[tid] = (i < size) ? sqrt(array[i*3]*array[i*3]
+ array[i*3+1]*array[i*3+1]
+ array[i*3+2]*array[i*3+2]) : 0.0 ;
-
+
__syncthreads();
-
+
// do reduction in shared mem
- for(unsigned int s=blockDim.x/2; s>0; s>>=1)
+ for(unsigned int s=blockDim.x/2; s>0; s>>=1)
{
if (tid < s){
- // summation:
+ // summation:
//sdata[tid] += sdata[tid + s];
- // maximum:
+ // maximum:
if( sdata[tid] < sdata[tid + s] ) sdata[tid] = sdata[tid + s];
}
__syncthreads();
}
-
+
// write result for this block to global mem
- if (tid == 0) d_max[blockIdx.x] = sdata[0];
-
+ if (tid == 0) d_max[blockIdx.x] = sdata[0];
+
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(get_norm_elastic_from_device_cuda,
- GET_NORM_ELASTIC_FROM_DEVICE_CUDA)(float* norm,
+void FC_FUNC_(get_norm_elastic_from_device,
+ GET_NORM_ELASTIC_FROM_DEVICE)(float* norm,
long* Mesh_pointer_f,
int* SIMULATION_TYPE) {
-
- TRACE("get_norm_elastic_from_device_cuda");
+
+ TRACE("get_norm_elastic_from_device");
//double start_time = get_time();
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
float max;
float *d_max;
-
+
max = 0;
-
+
// launch simple reduction kernel
float* h_max;
int blocksize = 256;
-
+
int num_blocks_x = ceil(mp->NGLOB_AB/blocksize);
//printf("num_blocks_x %i \n",num_blocks_x);
-
- h_max = (float*) calloc(num_blocks_x,sizeof(float));
+
+ h_max = (float*) calloc(num_blocks_x,sizeof(float));
cudaMalloc((void**)&d_max,num_blocks_x*sizeof(float));
-
+
dim3 grid(num_blocks_x,1);
dim3 threads(blocksize,1,1);
-
- if(*SIMULATION_TYPE == 1 ){
+
+ if(*SIMULATION_TYPE == 1 ){
get_maximum_vector_kernel<<<grid,threads>>>(mp->d_displ,
mp->NGLOB_AB,
- d_max);
+ d_max);
}
-
+
if(*SIMULATION_TYPE == 3 ){
get_maximum_vector_kernel<<<grid,threads>>>(mp->d_b_displ,
mp->NGLOB_AB,
- d_max);
- }
-
+ d_max);
+ }
+
print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(float),cudaMemcpyDeviceToHost),222);
-
+
// determines max for all blocks
max = h_max[0];
for(int i=1;i<num_blocks_x;i++) {
if( max < h_max[i]) max = h_max[i];
}
-
+
cudaFree(d_max);
free(h_max);
-
+
// return result
- *norm = max;
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ *norm = max;
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
- exit_on_cuda_error("after get_norm_elastic_from_device_cuda");
-#endif
+ exit_on_cuda_error("after get_norm_elastic_from_device");
+#endif
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -48,133 +48,166 @@
// crashes if the CMTSOLUTION does not match the mesh properly
-__global__ void compute_add_sources_kernel(float* accel, int* ibool, int* ispec_is_inner, int phase_is_inner, float* sourcearrays, double* stf_pre_compute,int myrank, int* islice_selected_source, int* ispec_selected_source, int* ispec_is_elastic, int NSOURCES,float* d_debug) {
+__global__ void compute_add_sources_kernel(float* accel,
+ int* ibool,
+ int* ispec_is_inner,
+ int phase_is_inner,
+ float* sourcearrays,
+ double* stf_pre_compute,
+ int myrank,
+ int* islice_selected_source,
+ int* ispec_selected_source,
+ int* ispec_is_elastic,
+ int NSOURCES,
+ float* d_debug) {
int i = threadIdx.x;
int j = threadIdx.y;
int k = threadIdx.z;
-
+
int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx
int ispec;
int iglob;
- double stf;
+ float stf;
if(isource < NSOURCES) { // when NSOURCES > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
-
+
if(myrank == islice_selected_source[isource]) {
ispec = ispec_selected_source[isource]-1;
- if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] == 1) {
-
- stf = stf_pre_compute[isource];
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] ) {
+
+ stf = (float) stf_pre_compute[isource];
+
+ //if(i==0 && j==0 && k==0) printf("add sources kernel: stf = %e\n",stf);
+
iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
atomicAdd(&accel[iglob*3],
sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf);
atomicAdd(&accel[iglob*3+1],
sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 1, i,j,k)]*stf);
- // if((iglob*3+2 == 304598)) {
- // atomicAdd(&d_debug[0],1.0f);
- // d_debug[1] = accel[iglob*3+2];
- // d_debug[2] = sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)];
- // d_debug[3] = stf;
- // }
- // d_debug[4] = 42.0f;
+ // if((iglob*3+2 == 304598)) {
+ // atomicAdd(&d_debug[0],1.0f);
+ // d_debug[1] = accel[iglob*3+2];
+ // d_debug[2] = sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)];
+ // d_debug[3] = stf;
+ // }
+ // d_debug[4] = 42.0f;
atomicAdd(&accel[iglob*3+2],
- sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)]*stf);
+ sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)]*stf);
}
}
}
-
+
}
+
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(add_sourcearrays_adjoint_cuda,
- ADD_SOURCEARRAYS_ADJOINT_CUDA)(long* Mesh_pointer,
- int* USE_FORCE_POINT_SOURCE,
- double* h_stf_pre_compute,int* NSOURCES,
- int* phase_is_inner,int* myrank) {
-TRACE("add_sourcearrays_adjoint_cuda");
-// EPIK_TRACER("add_sourcearrays_adjoint_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- if(*USE_FORCE_POINT_SOURCE) {
- printf("USE FORCE POINT SOURCE not implemented for GPU_MODE");
- MPI_Abort(MPI_COMM_WORLD, 1);
- }
+extern "C"
+void FC_FUNC_(compute_add_sources_el_cuda,
+ COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
+ //int* NSPEC_ABf, int* NGLOB_ABf,
+ int* phase_is_innerf,
+ int* NSOURCESf,
+ //int* itf, float* dtf, float* t0f,
+ //int* SIMULATION_TYPEf,int* NSTEPf,
+ //int* NOISE_TOMOGRAPHYf,
+ //int* USE_FORCE_POINT_SOURCEf,
+ double* h_stf_pre_compute,
+ int* myrankf) {
- print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,(*NSOURCES)*sizeof(double),cudaMemcpyHostToDevice),18);
+TRACE("compute_add_sources_el_cuda");
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("noise_read_add_surface_movie_cuda_kernel");
-#endif
-
- int num_blocks_x = *NSOURCES;
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // check if anything to do
+ if( mp->nsources_local == 0 ) return;
+
+ //int NSPEC_AB = *NSPEC_ABf;
+ //int NGLOB_AB = *NGLOB_ABf;
+ int phase_is_inner = *phase_is_innerf;
+ //int it = *itf;
+ //float dt = *dtf;
+ //float t0 = *t0f;
+ //int SIMULATION_TYPE = *SIMULATION_TYPEf;
+ //int NSTEP = *NSTEPf;
+ //int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
+ int NSOURCES = *NSOURCESf;
+ //int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
+ int myrank = *myrankf;
+
+ float* d_debug;
+
+ int num_blocks_x = NSOURCES;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
+ //double* d_stf_pre_compute;
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+ NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(5,5,5);
+ // (float* accel, int* ibool, int* ispec_is_inner, int phase_is_inner,
+ // float* sourcearrays, double* stf_pre_compute,int myrank,
+ // int* islice_selected_source, int* ispec_selected_source,
+ // int* ispec_is_elastic, int NSOURCES)
- float* d_debug;
- // float* h_debug = (float*)calloc(128,sizeof(float));
- // cudaMalloc((void**)&d_debug,128*sizeof(float));
- // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-
- compute_add_sources_kernel<<<grid,threads>>>(mp->d_b_accel,mp->d_ibool,
- mp->d_ispec_is_inner, *phase_is_inner,
- mp->d_sourcearrays,
+ //daniel
+ //printf("add sources : nsources_local = %d\n",mp->nsources_local);
+ //printf("add sources : stf = %e\n",h_stf_pre_compute[0]);
+
+
+ compute_add_sources_kernel<<<grid,threads>>>(mp->d_accel,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner,
+ mp->d_sourcearrays,
mp->d_stf_pre_compute,
- *myrank,
- mp->d_islice_selected_source,mp->d_ispec_selected_source,
- mp->d_ispec_is_elastic,
- *NSOURCES,
+ myrank,
+ mp->d_islice_selected_source,
+ mp->d_ispec_selected_source,
+ mp->d_ispec_is_elastic,
+ NSOURCES,
d_debug);
- // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- // for(int i=0;i<10;i++) {
- // printf("debug[%d] = %e \n",i,h_debug[i]);
- // }
-
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("add_sourcearrays_adjoint_cuda");
+ exit_on_cuda_error("compute_add_sources_kernel");
#endif
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(compute_add_sources_elastic_cuda,
- COMPUTE_ADD_SOURCES_ELASTIC_CUDA)(long* Mesh_pointer_f,
- int* NSPEC_ABf, int* NGLOB_ABf,
- int* phase_is_innerf,int* NSOURCESf,
- int* itf, float* dtf, float* t0f,
- int* SIMULATION_TYPEf,int* NSTEPf,
- int* NOISE_TOMOGRAPHYf,
- int* USE_FORCE_POINT_SOURCEf,
- double* h_stf_pre_compute, int* myrankf) {
+extern "C"
+void FC_FUNC_(compute_add_sources_el_s3_cuda,
+ COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
+ int* USE_FORCE_POINT_SOURCE,
+ double* h_stf_pre_compute,
+ int* NSOURCESf,
+ int* phase_is_inner,int* myrank) {
+ TRACE("compute_add_sources_el_s3_cuda");
+ // EPIK_TRACER("compute_add_sources_el_s3_cuda");
-TRACE("compute_add_sources_elastic_cuda");
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- //int NSPEC_AB = *NSPEC_ABf;
- //int NGLOB_AB = *NGLOB_ABf;
- int phase_is_inner = *phase_is_innerf;
- //int it = *itf;
- //float dt = *dtf;
- //float t0 = *t0f;
- //int SIMULATION_TYPE = *SIMULATION_TYPEf;
- //int NSTEP = *NSTEPf;
- //int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
int NSOURCES = *NSOURCESf;
- //int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
- int myrank = *myrankf;
- float* d_debug;
+ if(*USE_FORCE_POINT_SOURCE) {
+ printf("USE FORCE POINT SOURCE not implemented for GPU_MODE");
+ MPI_Abort(MPI_COMM_WORLD, 1);
+ }
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+ NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_add_sources_el_s3_cuda");
+#endif
+
int num_blocks_x = NSOURCES;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
@@ -182,27 +215,44 @@
num_blocks_y = num_blocks_y*2;
}
- //double* d_stf_pre_compute;
- print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
-
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(5,5,5);
- // (float* accel, int* ibool, int* ispec_is_inner, int phase_is_inner, float* sourcearrays, double* stf_pre_compute,int myrank, int* islice_selected_source, int* ispec_selected_source, int* ispec_is_elastic, int NSOURCES)
-
-
-
- compute_add_sources_kernel<<<grid,threads>>>(mp->d_accel,mp->d_ibool, mp->d_ispec_is_inner, phase_is_inner, mp->d_sourcearrays, mp->d_stf_pre_compute,myrank, mp->d_islice_selected_source,mp->d_ispec_selected_source,mp->d_ispec_is_elastic, NSOURCES,d_debug);
+ float* d_debug;
+ // float* h_debug = (float*)calloc(128,sizeof(float));
+ // cudaMalloc((void**)&d_debug,128*sizeof(float));
+ // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
+
+ compute_add_sources_kernel<<<grid,threads>>>(mp->d_b_accel,mp->d_ibool,
+ mp->d_ispec_is_inner, *phase_is_inner,
+ mp->d_sourcearrays,
+ mp->d_stf_pre_compute,
+ *myrank,
+ mp->d_islice_selected_source,mp->d_ispec_selected_source,
+ mp->d_ispec_is_elastic,
+ NSOURCES,
+ d_debug);
+
+ // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ // for(int i=0;i<10;i++) {
+ // printf("debug[%d] = %e \n",i,h_debug[i]);
+ // }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_add_sources_kernel");
+ exit_on_cuda_error("compute_add_sources_el_s3_cuda");
#endif
}
/* ----------------------------------------------------------------------------------------------- */
-__global__ void add_source_master_rec_noise_cuda_kernel(int* ibool, int* ispec_selected_rec,
- int irec_master_noise,
- real* accel, real* noise_sourcearray,
+// NOISE sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void add_source_master_rec_noise_cuda_kernel(int* ibool, int* ispec_selected_rec,
+ int irec_master_noise,
+ realw* accel,
+ realw* noise_sourcearray,
int it) {
int tx = threadIdx.x;
int iglob = ibool[tx + 125*(ispec_selected_rec[irec_master_noise-1]-1)]-1;
@@ -211,24 +261,24 @@
// accel[3*iglob] += noise_sourcearray[3*tx + 3*125*it];
// accel[1+3*iglob] += noise_sourcearray[1+3*tx + 3*125*it];
// accel[2+3*iglob] += noise_sourcearray[2+3*tx + 3*125*it];
-
+
atomicAdd(&accel[iglob*3],noise_sourcearray[3*tx + 3*125*it]);
atomicAdd(&accel[iglob*3+1],noise_sourcearray[1+3*tx + 3*125*it]);
atomicAdd(&accel[iglob*3+2],noise_sourcearray[2+3*tx + 3*125*it]);
-
+
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(add_source_master_rec_noise_cuda,
- ADD_SOURCE_MASTER_REC_NOISE_CUDA)(long* Mesh_pointer_f,
- int* myrank_f,
- int* it_f,
- int* irec_master_noise_f,
+extern "C"
+void FC_FUNC_(add_source_master_rec_noise_cu,
+ ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
+ int* myrank_f,
+ int* it_f,
+ int* irec_master_noise_f,
int* islice_selected_rec) {
-TRACE("add_source_master_rec_noise_cuda");
+TRACE("add_source_master_rec_noise_cu");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
@@ -239,8 +289,8 @@
dim3 threads(125,1,1);
if(myrank == islice_selected_rec[irec_master_noise-1]) {
add_source_master_rec_noise_cuda_kernel<<<grid,threads>>>(mp->d_ibool, mp->d_ispec_selected_rec,
- irec_master_noise, mp->d_accel,
- mp->d_noise_sourcearray, it);
+ irec_master_noise, mp->d_accel,
+ mp->d_noise_sourcearray, it);
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("add_source_master_rec_noise_cuda_kernel");
@@ -250,57 +300,61 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void add_sources_SIM_TYPE_2_OR_3_kernel(float* accel, int nrec,
- float* adj_sourcearrays,
- int* ibool,
- int* ispec_is_inner,
- int* ispec_selected_rec,
- int phase_is_inner,
- int* islice_selected_rec,
- int* pre_computed_irec,
- int nadj_rec_local,
- int NTSTEP_BETWEEN_ADJSRC,
- int myrank,
- int* debugi,
- float* debugf) {
+// ADJOINT sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void add_sources_SIM_TYPE_2_OR_3_kernel(float* accel, int nrec,
+ float* adj_sourcearrays,
+ int* ibool,
+ int* ispec_is_inner,
+ int* ispec_selected_rec,
+ int phase_is_inner,
+ int* islice_selected_rec,
+ int* pre_computed_irec,
+ int nadj_rec_local,
+ int NTSTEP_BETWEEN_ADJSRC,
+ int myrank,
+ int* debugi,
+ float* debugf) {
int irec_local = blockIdx.x + gridDim.x*blockIdx.y;
if(irec_local<nadj_rec_local) { // when nrec > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
int irec = pre_computed_irec[irec_local];
-
+
int ispec_selected = ispec_selected_rec[irec]-1;
if(ispec_is_inner[ispec_selected] == phase_is_inner) {
int i = threadIdx.x;
int j = threadIdx.y;
int k = threadIdx.z;
int iglob = ibool[i+5*(j+5*(k+5*ispec_selected))]-1;
-
+
// atomic operations are absolutely necessary for correctness!
atomicAdd(&(accel[0+3*iglob]),adj_sourcearrays[INDEX5(5,5,5,3,
- i,j,k,
- 0,
- irec_local)]);
-
+ i,j,k,
+ 0,
+ irec_local)]);
+
atomicAdd(&accel[1+3*iglob], adj_sourcearrays[INDEX5(5,5,5,3,
- i,j,k,
- 1,
- irec_local)]);
-
+ i,j,k,
+ 1,
+ irec_local)]);
+
atomicAdd(&accel[2+3*iglob],adj_sourcearrays[INDEX5(5,5,5,3,
- i,j,k,
- 2,
- irec_local)]);
+ i,j,k,
+ 2,
+ irec_local)]);
}
-
+
}
-
+
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(add_sources_sim_type_2_or_3,
- ADD_SOURCES_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
+ ADD_SOURCES_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
float* h_adj_sourcearrays,
int* size_adj_sourcearrays, int* ispec_is_inner,
int* phase_is_inner, int* ispec_selected_rec,
@@ -312,11 +366,11 @@
TRACE("add_sources_sim_type_2_or_3");
if(*nadj_rec_local > 0) {
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
int rank;
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
-
+
// make sure grid dimension is less than 65535 in x dimension
int num_blocks_x = *nadj_rec_local;
int num_blocks_y = 1;
@@ -326,112 +380,110 @@
}
dim3 grid(num_blocks_x,num_blocks_y,1);
dim3 threads(5,5,5);
-
+
float* d_adj_sourcearrays;
print_CUDA_error_if_any(cudaMalloc((void**)&d_adj_sourcearrays,
- (*nadj_rec_local)*3*125*sizeof(float)),1);
+ (*nadj_rec_local)*3*125*sizeof(float)),1);
float* h_adj_sourcearrays_slice = (float*)malloc((*nadj_rec_local)*3*125*sizeof(float));
int* h_pre_computed_irec = new int[*nadj_rec_local];
int* d_pre_computed_irec;
cudaMalloc((void**)&d_pre_computed_irec,(*nadj_rec_local)*sizeof(int));
-
+
// build slice of adj_sourcearrays because full array is *very* large.
int irec_local = 0;
for(int irec = 0;irec<*nrec;irec++) {
if(*myrank == h_islice_selected_rec[irec]) {
- irec_local++;
- h_pre_computed_irec[irec_local-1] = irec;
- if(ispec_is_inner[ispec_selected_rec[irec]-1] == *phase_is_inner) {
- for(int k=0;k<5;k++) {
- for(int j=0;j<5;j++) {
- for(int i=0;i<5;i++) {
+ irec_local++;
+ h_pre_computed_irec[irec_local-1] = irec;
+ if(ispec_is_inner[ispec_selected_rec[irec]-1] == *phase_is_inner) {
+ for(int k=0;k<5;k++) {
+ for(int j=0;j<5;j++) {
+ for(int i=0;i<5;i++) {
- h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
- i,j,k,0,
- irec_local-1)]
- = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,
- *time_index-1,
- 0,i,j,k)];
-
- h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
- i,j,k,1,
- irec_local-1)]
- = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,
- *time_index-1,
- 1,i,j,k)];
-
- h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
- i,j,k,2,
- irec_local-1)]
- = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,
- *time_index-1,
- 2,i,j,k)];
-
-
- }
- }
- }
- }
+ h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+ i,j,k,0,
+ irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,
+ *time_index-1,
+ 0,i,j,k)];
+
+ h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+ i,j,k,1,
+ irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,
+ *time_index-1,
+ 1,i,j,k)];
+
+ h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+ i,j,k,2,
+ irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,
+ *time_index-1,
+ 2,i,j,k)];
+ }
+ }
+ }
+ }
}
}
// printf("irec_local vs. *nadj_rec_local -> %d vs. %d\n",irec_local,*nadj_rec_local);
// for(int ispec=0;ispec<(*nadj_rec_local);ispec++) {
// for(int i=0;i<5;i++)
// for(int j=0;j<5;j++)
- // for(int k=0;k<5;k++) {
- // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,0,ispec)] =
- // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
- // ispec,
- // *time_index-1,
- // 0,
- // i,j,k)];
- // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,1,ispec)] =
- // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
- // ispec,
- // *time_index-1,
- // 1,
- // i,j,k)];
- // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,2,ispec)] =
- // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_ADJSRC,3,5,5,
- // ispec,
- // *time_index-1,
- // 2,
- // i,j,k)];
- // }
-
+ // for(int k=0;k<5;k++) {
+ // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,0,ispec)] =
+ // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
+ // ispec,
+ // *time_index-1,
+ // 0,
+ // i,j,k)];
+ // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,1,ispec)] =
+ // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
+ // ispec,
+ // *time_index-1,
+ // 1,
+ // i,j,k)];
+ // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,2,ispec)] =
+ // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_ADJSRC,3,5,5,
+ // ispec,
+ // *time_index-1,
+ // 2,
+ // i,j,k)];
+ // }
+
// }
-
+
cudaMemcpy(d_adj_sourcearrays, h_adj_sourcearrays_slice,(*nadj_rec_local)*3*125*sizeof(float),
- cudaMemcpyHostToDevice);
-
+ cudaMemcpyHostToDevice);
+
// the irec_local variable needs to be precomputed (as
// h_pre_comp..), because normally it is in the loop updating accel,
// and due to how it's incremented, it cannot be parallelized
-
+
// int irec_local=0;
// for(int irec=0;irec<*nrec;irec++) {
// if(*myrank == h_islice_selected_rec[irec]) {
// h_pre_computed_irec_local_index[irec] = irec_local;
// irec_local++;
// if(irec_local==1) {
- // // printf("%d:first useful irec==%d\n",rank,irec);
+ // // printf("%d:first useful irec==%d\n",rank,irec);
// }
// }
// else h_pre_computed_irec_local_index[irec] = 0;
// }
cudaMemcpy(d_pre_computed_irec,h_pre_computed_irec,
- (*nadj_rec_local)*sizeof(int),cudaMemcpyHostToDevice);
+ (*nadj_rec_local)*sizeof(int),cudaMemcpyHostToDevice);
// pause_for_debugger(1);
int* d_debugi, *h_debugi;
float* d_debugf, *h_debugf;
@@ -441,33 +493,33 @@
h_debugf = (float*)calloc(num_blocks_x,sizeof(float));
cudaMalloc((void**)&d_debugf,num_blocks_x*sizeof(float));
cudaMemcpy(d_debugf,h_debugf,num_blocks_x*sizeof(float),cudaMemcpyHostToDevice);
-
+
add_sources_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_accel, *nrec,
- d_adj_sourcearrays, mp->d_ibool,
- mp->d_ispec_is_inner,
- mp->d_ispec_selected_rec,
- *phase_is_inner,
- mp->d_islice_selected_rec,
- d_pre_computed_irec,
- *nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- *myrank,
- d_debugi,d_debugf);
+ d_adj_sourcearrays, mp->d_ibool,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_selected_rec,
+ *phase_is_inner,
+ mp->d_islice_selected_rec,
+ d_pre_computed_irec,
+ *nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ *myrank,
+ d_debugi,d_debugf);
cudaMemcpy(h_debugi,d_debugi,num_blocks_x*sizeof(int),cudaMemcpyDeviceToHost);
cudaMemcpy(h_debugf,d_debugf,num_blocks_x*sizeof(float),cudaMemcpyDeviceToHost);
-
+
// printf("%d: pre_com0:%d\n",rank,h_pre_computed_irec_local_index[0]);
// printf("%d: pre_com1:%d\n",rank,h_pre_computed_irec_local_index[1]);
// printf("%d: pre_com2:%d\n",rank,h_pre_computed_irec_local_index[2]);
// for(int i=156;i<(156+30);i++) {
// if(rank==0) printf("%d:debug[%d] = i/f = %d / %e\n",rank,i,h_debugi[i],h_debugf[i]);
// }
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
// MPI_Barrier(MPI_COMM_WORLD);
- exit_on_cuda_error("add_sources_SIM_TYPE_2_OR_3_kernel");
-
+ exit_on_cuda_error("add_sources_SIM_TYPE_2_OR_3_kernel");
+
// printf("Proc %d exiting with successful kernel\n",rank);
// exit(1);
#endif
@@ -483,82 +535,86 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_add_sources_acoustic_kernel(float* potential_dot_dot_acoustic,
- int* ibool,
- int* ispec_is_inner,
- int phase_is_inner,
- float* sourcearrays,
+__global__ void compute_add_sources_acoustic_kernel(float* potential_dot_dot_acoustic,
+ int* ibool,
+ int* ispec_is_inner,
+ int phase_is_inner,
+ float* sourcearrays,
double* stf_pre_compute,
- int myrank,
- int* islice_selected_source,
- int* ispec_selected_source,
- int* ispec_is_acoustic,
+ int myrank,
+ int* islice_selected_source,
+ int* ispec_selected_source,
+ int* ispec_is_acoustic,
float* kappastore,
int NSOURCES) {
int i = threadIdx.x;
int j = threadIdx.y;
int k = threadIdx.z;
-
+
int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx
-
+
int ispec;
int iglob;
- double stf;
+ float stf;
float kappal;
-
+
if( isource < NSOURCES ){
-
+
//if(myrank == 0 && i== 0 && j == 0 && k == 0) printf("source isource = %i \n",isource);
-
+
if(myrank == islice_selected_source[isource]) {
-
+
ispec = ispec_selected_source[isource]-1;
-
- if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] == 1) {
-
- stf = stf_pre_compute[isource];
+
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] ) {
+
+ stf = (float) stf_pre_compute[isource];
iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
-
+
//printf("source ispec = %i %i %e %e \n",ispec,iglob,stf,kappal);
//printf("source arr = %e %i %i %i %i %i\n", -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal,i,j,k,iglob,ispec);
-
+
atomicAdd(&potential_dot_dot_acoustic[iglob],
-sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal);
-
+
// potential_dot_dot_acoustic[iglob] +=
- // -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal;
-
+ // -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal;
+
//printf("potential = %e %i %i %i %i %i\n", potential_dot_dot_acoustic[iglob],i,j,k,iglob,ispec);
-
-
+
+
}
}
- }
+ }
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(compute_add_sources_acoustic_cuda,
- COMPUTE_ADD_SOURCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+extern "C"
+void FC_FUNC_(compute_add_sources_ac_cuda,
+ COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
- int* NSOURCESf,
+ int* NSOURCESf,
int* SIMULATION_TYPEf,
- int* USE_FORCE_POINT_SOURCEf,
- double* h_stf_pre_compute,
+ int* USE_FORCE_POINT_SOURCEf,
+ double* h_stf_pre_compute,
int* myrankf) {
-
-TRACE("compute_add_sources_acoustic_cuda");
-
+
+TRACE("compute_add_sources_ac_cuda");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // check if anything to do
+ if( mp->nsources_local == 0 ) return;
+
int phase_is_inner = *phase_is_innerf;
//int SIMULATION_TYPE = *SIMULATION_TYPEf;
int NSOURCES = *NSOURCESf;
//int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
- int myrank = *myrankf;
-
+ int myrank = *myrankf;
+
int num_blocks_x = NSOURCES;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
@@ -566,51 +622,56 @@
num_blocks_y = num_blocks_y*2;
}
- // copies pre-computed source time factors onto GPU
- print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
-
+ // copies pre-computed source time factors onto GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+ NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(5,5,5);
-
+
compute_add_sources_acoustic_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
- mp->d_ibool,
- mp->d_ispec_is_inner,
- phase_is_inner,
- mp->d_sourcearrays,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner,
+ mp->d_sourcearrays,
mp->d_stf_pre_compute,
- myrank,
+ myrank,
mp->d_islice_selected_source,
mp->d_ispec_selected_source,
- mp->d_ispec_is_acoustic,
+ mp->d_ispec_is_acoustic,
mp->d_kappastore,
NSOURCES);
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_add_sources_acoustic_cuda");
-#endif
+ exit_on_cuda_error("compute_add_sources_ac_cuda");
+#endif
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(compute_add_sources_acoustic_sim3_cuda,
- COMPUTE_ADD_SOURCES_ACOUSTIC_SIM3_CUDA)(long* Mesh_pointer_f,
+extern "C"
+void FC_FUNC_(compute_add_sources_ac_s3_cuda,
+ COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
- int* NSOURCESf,
+ int* NSOURCESf,
int* SIMULATION_TYPEf,
- int* USE_FORCE_POINT_SOURCEf,
- double* h_stf_pre_compute,
+ int* USE_FORCE_POINT_SOURCEf,
+ double* h_stf_pre_compute,
int* myrankf) {
-
-TRACE("compute_add_sources_acoustic_sim3_cuda");
-
+
+TRACE("compute_add_sources_ac_s3_cuda");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // check if anything to do
+ if( mp->nsources_local == 0 ) return;
+
int phase_is_inner = *phase_is_innerf;
//int SIMULATION_TYPE = *SIMULATION_TYPEf;
int NSOURCES = *NSOURCESf;
//int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
- int myrank = *myrankf;
-
+ int myrank = *myrankf;
+
int num_blocks_x = NSOURCES;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
@@ -618,28 +679,29 @@
num_blocks_y = num_blocks_y*2;
}
- // copies source time factors onto GPU
- print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
-
+ // copies source time factors onto GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+ NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(5,5,5);
-
+
compute_add_sources_acoustic_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
- mp->d_ibool,
- mp->d_ispec_is_inner,
- phase_is_inner,
- mp->d_sourcearrays,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner,
+ mp->d_sourcearrays,
mp->d_stf_pre_compute,
- myrank,
+ myrank,
mp->d_islice_selected_source,
mp->d_ispec_selected_source,
- mp->d_ispec_is_acoustic,
+ mp->d_ispec_is_acoustic,
mp->d_kappastore,
NSOURCES);
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_add_sources_acoustic_sim3_cuda");
-#endif
+ exit_on_cuda_error("compute_add_sources_ac_s3_cuda");
+#endif
}
@@ -649,7 +711,7 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void add_sources_acoustic_SIM_TYPE_2_OR_3_kernel(float* potential_dot_dot_acoustic,
+__global__ void add_sources_acoustic_SIM_TYPE_2_OR_3_kernel(float* potential_dot_dot_acoustic,
int nrec,
int pre_computed_index,
float* adj_sourcearrays,
@@ -664,51 +726,51 @@
int nadj_rec_local,
int NTSTEP_BETWEEN_ADJSRC,
int myrank) {
-
+
int irec = blockIdx.x + gridDim.x*blockIdx.y;
-
+
//float kappal;
int i,j,k,iglob,ispec;
-
+
// because of grid shape, irec can be too big
- if(irec<nrec) {
-
+ if(irec<nrec) {
+
// adds source only if this proc carries the sources
if( myrank == islice_selected_rec[irec] ){
-
+
// adds acoustic source
- ispec = ispec_selected_rec[irec]-1;
- if( ispec_is_acoustic[ispec] == 1 ){
-
+ ispec = ispec_selected_rec[irec]-1;
+ if( ispec_is_acoustic[ispec] ){
+
// checks if element is in phase_is_inner run
if(ispec_is_inner[ispec] == phase_is_inner) {
i = threadIdx.x;
j = threadIdx.y;
k = threadIdx.z;
iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-
+
//kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
-
+
//potential_dot_dot_acoustic[iglob] += adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
// pre_computed_irec_local_index[irec],
// pre_computed_index,
// 0,
- // i,j,k)]/kappal;
+ // i,j,k)]/kappal;
- // beware, for acoustic medium, a pressure source would be taking the negative
+ // beware, for acoustic medium, a pressure source would be taking the negative
// and divide by Kappa of the fluid;
// this would have to be done when constructing the adjoint source.
//
// note: we take the first component of the adj_sourcearrays
// the idea is to have e.g. a pressure source, where all 3 components would be the same
-
+
atomicAdd(&potential_dot_dot_acoustic[iglob],
+adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
pre_computed_irec_local_index[irec],pre_computed_index-1,
- 0,i,j,k)] // / kappal
+ 0,i,j,k)] // / kappal
);
- }
- }
+ }
+ }
}
}
}
@@ -716,23 +778,23 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(add_sources_acoustic_sim_type_2_or_3_cuda,
- ADD_SOURCES_ACOUSTIC_SIM_TYPE_2_OR_3_CUDA)(long* Mesh_pointer,
+extern "C"
+void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda,
+ ADD_SOURCES_AC_SIM_2_OR_3_CUDA)(long* Mesh_pointer,
float* h_adj_sourcearrays,
- int* size_adj_sourcearrays,
+ int* size_adj_sourcearrays,
int* phase_is_inner,
- int* myrank,
- int* nrec,
+ int* myrank,
+ int* nrec,
int* pre_computed_index,
int* h_islice_selected_rec,
int* nadj_rec_local,
int* NTSTEP_BETWEEN_ADJSRC) {
-
-TRACE("add_sources_acoustic_sim_type_2_or_3_cuda");
-
+
+TRACE("add_sources_ac_sim_2_or_3_cuda");
+
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-
+
// make sure grid dimension is less than 65535 in x dimension
int num_blocks_x = *nrec;
int num_blocks_y = 1;
@@ -742,23 +804,23 @@
}
dim3 grid(num_blocks_x,num_blocks_y,1);
dim3 threads(5,5,5);
-
+
// copies source arrays
- // daniel: todo workaround -- adj_sourcearrays can be very big, but here only at
+ // daniel: todo workaround -- adj_sourcearrays can be very big, but here only at
// specific time it (pre_computed_irec_local_index) is actually needed...
float* d_adj_sourcearrays;
print_CUDA_error_if_any(cudaMalloc((void**)&d_adj_sourcearrays,
(*size_adj_sourcearrays)*sizeof(float)),731);
print_CUDA_error_if_any(cudaMemcpy(d_adj_sourcearrays, h_adj_sourcearrays,
(*size_adj_sourcearrays)*sizeof(float),cudaMemcpyHostToDevice),732);
-
+
//int* h_pre_computed_irec_local_index = new int[*nadj_rec_local];
int* h_pre_computed_irec_local_index = (int*) calloc(*nrec,sizeof(int));
-
+
int* d_pre_computed_irec_local_index;
print_CUDA_error_if_any(cudaMalloc((void**)&d_pre_computed_irec_local_index,
(*nrec)*sizeof(int)),741);
-
+
// the irec_local variable needs to be precomputed (as
// h_pre_comp..), because normally it is in the loop updating accel,
// and due to how it's incremented, it cannot be parallized
@@ -766,39 +828,39 @@
for(int irec=0;irec<*nrec;irec++) {
if(*myrank == h_islice_selected_rec[irec]) {
h_pre_computed_irec_local_index[irec] = irec_local;
- irec_local++;
+ irec_local++;
}
else h_pre_computed_irec_local_index[irec] = 0;
}
-
+
//daniel
//printf("irec local: rank=%d irec_local=%d nadj_rec_local=%d nrec=%d \n",*myrank,irec_local,*nadj_rec_local,*nrec);
-
+
print_CUDA_error_if_any(cudaMemcpy(d_pre_computed_irec_local_index,h_pre_computed_irec_local_index,
(*nrec)*sizeof(int),cudaMemcpyHostToDevice),742);
-
- add_sources_acoustic_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
- *nrec,
+
+ add_sources_acoustic_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ *nrec,
*pre_computed_index,
- d_adj_sourcearrays,
+ d_adj_sourcearrays,
mp->d_ibool,
mp->d_ispec_is_inner,
mp->d_ispec_is_acoustic,
mp->d_kappastore,
mp->d_ispec_selected_rec,
- *phase_is_inner,
+ *phase_is_inner,
mp->d_islice_selected_rec,
d_pre_computed_irec_local_index,
*nadj_rec_local,
*NTSTEP_BETWEEN_ADJSRC,
*myrank);
-
+
//delete h_pre_computed_irec_local_index;
free(h_pre_computed_irec_local_index);
cudaFree(d_adj_sourcearrays);
- cudaFree(d_pre_computed_irec_local_index);
-
+ cudaFree(d_pre_computed_irec_local_index);
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("add_sources_acoustic_SIM_TYPE_2_OR_3_kernel");
-#endif
+ exit_on_cuda_error("add_sources_acoustic_SIM_TYPE_2_OR_3_kernel");
+#endif
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -44,46 +44,46 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_coupling_acoustic_el_kernel(float* displ,
- float* potential_dot_dot_acoustic,
+__global__ void compute_coupling_acoustic_el_kernel(float* displ,
+ float* potential_dot_dot_acoustic,
int num_coupling_ac_el_faces,
int* coupling_ac_el_ispec,
- int* coupling_ac_el_ijk,
+ int* coupling_ac_el_ijk,
float* coupling_ac_el_normal,
float* coupling_ac_el_jacobian2Dw,
int* ibool,
- int* ispec_is_inner,
+ int* ispec_is_inner,
int phase_is_inner) {
-
- int igll = threadIdx.x;
- int iface = blockIdx.x + gridDim.x*blockIdx.y;
-
+
+ int igll = threadIdx.x;
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
int i,j,k,iglob,ispec;
realw displ_x,displ_y,displ_z,displ_n;
realw nx,ny,nz;
realw jacobianw;
-
+
if( iface < num_coupling_ac_el_faces){
-
- // don't compute points outside NGLLSQUARE==NGLL2==25
+
+ // don't compute points outside NGLLSQUARE==NGLL2==25
// way 2: no further check needed since blocksize = 25
- // if(igll<NGLL2) {
-
+ // if(igll<NGLL2) {
+
// "-1" from index values to convert from Fortran-> C indexing
ispec = coupling_ac_el_ispec[iface]-1;
-
+
if(ispec_is_inner[ispec] == phase_is_inner ) {
-
+
i = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1;
j = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
k = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]- 1;
-
+
// elastic displacement on global point
displ_x = displ[iglob*3] ; // (1,iglob)
displ_y = displ[iglob*3+1] ; // (2,iglob)
displ_z = displ[iglob*3+2] ; // (3,iglob)
-
+
// gets associated normal on GLL point
nx = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; // (1,igll,iface)
ny = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; // (2,igll,iface)
@@ -92,17 +92,17 @@
// calculates displacement component along normal
// (normal points outwards of acoustic element)
displ_n = displ_x*nx + displ_y*ny + displ_z*nz;
-
-
- // gets associated, weighted jacobian
- jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
-
+
+
+ // gets associated, weighted jacobian
+ jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
//daniel
//if( igll == 0 ) printf("gpu: %i %i %i %i %i %e \n",i,j,k,ispec,iglob,jacobianw);
// continuity of pressure and normal displacement on global point
-
+
// note: newark time scheme together with definition of scalar potential:
// pressure = - chi_dot_dot
// requires that this coupling term uses the updated displacement at time step [t+delta_t],
@@ -111,73 +111,73 @@
// it also means you have to calculate and update this here first before
// calculating the coupling on the elastic side for the acceleration...
atomicAdd(&potential_dot_dot_acoustic[iglob],+ jacobianw*displ_n);
-
+
}
- // }
+ // }
}
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(compute_coupling_acoustic_el_cuda,
- COMPUTE_COUPLING_ACOUSTIC_EL_CUDA)(
- long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* num_coupling_ac_el_facesf,
+extern "C"
+void FC_FUNC_(compute_coupling_ac_el_cuda,
+ COMPUTE_COUPLING_AC_EL_CUDA)(
+ long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_coupling_ac_el_facesf,
int* SIMULATION_TYPEf) {
- TRACE("compute_coupling_acoustic_el_cuda");
+ TRACE("compute_coupling_ac_el_cuda");
//double start_time = get_time();
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
int phase_is_inner = *phase_is_innerf;
int num_coupling_ac_el_faces = *num_coupling_ac_el_facesf;
int SIMULATION_TYPE = *SIMULATION_TYPEf;
-
+
// way 1: exact blocksize to match NGLLSQUARE
- int blocksize = 25;
-
+ int blocksize = 25;
+
int num_blocks_x = num_coupling_ac_el_faces;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
//daniel
// printf("gpu: %i %i %i \n",num_coupling_ac_el_faces,SIMULATION_TYPE,phase_is_inner);
-
+
compute_coupling_acoustic_el_kernel<<<grid,threads>>>(mp->d_displ,
mp->d_potential_dot_dot_acoustic,
num_coupling_ac_el_faces,
- mp->d_coupling_ac_el_ispec,
- mp->d_coupling_ac_el_ijk,
+ mp->d_coupling_ac_el_ispec,
+ mp->d_coupling_ac_el_ijk,
mp->d_coupling_ac_el_normal,
- mp->d_coupling_ac_el_jacobian2Dw,
- mp->d_ibool,
- mp->d_ispec_is_inner,
+ mp->d_coupling_ac_el_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
phase_is_inner);
-
+
// adjoint simulations
- if (SIMULATION_TYPE == 3 ){
+ if (SIMULATION_TYPE == 3 ){
compute_coupling_acoustic_el_kernel<<<grid,threads>>>(mp->d_b_displ,
mp->d_b_potential_dot_dot_acoustic,
num_coupling_ac_el_faces,
- mp->d_coupling_ac_el_ispec,
- mp->d_coupling_ac_el_ijk,
+ mp->d_coupling_ac_el_ispec,
+ mp->d_coupling_ac_el_ijk,
mp->d_coupling_ac_el_normal,
- mp->d_coupling_ac_el_jacobian2Dw,
- mp->d_ibool,
- mp->d_ispec_is_inner,
+ mp->d_coupling_ac_el_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
phase_is_inner);
-
+
}
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
exit_on_cuda_error("compute_coupling_acoustic_el_kernel");
@@ -191,56 +191,56 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_coupling_elastic_ac_kernel(float* potential_dot_dot_acoustic,
- float* accel,
+__global__ void compute_coupling_elastic_ac_kernel(float* potential_dot_dot_acoustic,
+ float* accel,
int num_coupling_ac_el_faces,
int* coupling_ac_el_ispec,
- int* coupling_ac_el_ijk,
+ int* coupling_ac_el_ijk,
float* coupling_ac_el_normal,
float* coupling_ac_el_jacobian2Dw,
int* ibool,
- int* ispec_is_inner,
+ int* ispec_is_inner,
int phase_is_inner) {
-
- int igll = threadIdx.x;
- int iface = blockIdx.x + gridDim.x*blockIdx.y;
-
+
+ int igll = threadIdx.x;
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
int i,j,k,iglob,ispec;
realw pressure;
realw nx,ny,nz;
realw jacobianw;
-
+
if( iface < num_coupling_ac_el_faces){
-
- // don't compute points outside NGLLSQUARE==NGLL2==25
+
+ // don't compute points outside NGLLSQUARE==NGLL2==25
// way 2: no further check needed since blocksize = 25
- // if(igll<NGLL2) {
-
+ // if(igll<NGLL2) {
+
// "-1" from index values to convert from Fortran-> C indexing
ispec = coupling_ac_el_ispec[iface]-1;
-
+
if(ispec_is_inner[ispec] == phase_is_inner ) {
-
+
i = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1;
j = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
k = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]- 1;
-
+
// acoustic pressure on global point
pressure = - potential_dot_dot_acoustic[iglob];
-
+
// gets associated normal on GLL point
nx = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; // (1,igll,iface)
ny = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; // (2,igll,iface)
nz = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,2,igll,iface)]; // (3,igll,iface)
-
- // gets associated, weighted jacobian
- jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
-
+
+ // gets associated, weighted jacobian
+ jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
//daniel
//if( igll == 0 ) printf("gpu: %i %i %i %i %i %e \n",i,j,k,ispec,iglob,jacobianw);
-
-
+
+
// continuity of displacement and pressure on global point
//
// note: newark time scheme together with definition of scalar potential:
@@ -254,73 +254,73 @@
atomicAdd(&accel[iglob*3+1],+ jacobianw*ny*pressure);
atomicAdd(&accel[iglob*3+2],+ jacobianw*nz*pressure);
}
- // }
+ // }
}
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(compute_coupling_elastic_ac_cuda,
- COMPUTE_COUPLING_ELASTIC_AC_CUDA)(
- long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* num_coupling_ac_el_facesf,
+extern "C"
+void FC_FUNC_(compute_coupling_el_ac_cuda,
+ COMPUTE_COUPLING_EL_AC_CUDA)(
+ long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_coupling_ac_el_facesf,
int* SIMULATION_TYPEf) {
- TRACE("compute_coupling_elastic_ac_cuda");
+ TRACE("compute_coupling_el_ac_cuda");
//double start_time = get_time();
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
int phase_is_inner = *phase_is_innerf;
int num_coupling_ac_el_faces = *num_coupling_ac_el_facesf;
int SIMULATION_TYPE = *SIMULATION_TYPEf;
-
+
// way 1: exact blocksize to match NGLLSQUARE
- int blocksize = 25;
-
+ int blocksize = 25;
+
int num_blocks_x = num_coupling_ac_el_faces;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
//daniel
// printf("gpu: %i %i %i \n",num_coupling_ac_el_faces,SIMULATION_TYPE,phase_is_inner);
-
-
+
+
compute_coupling_elastic_ac_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
mp->d_accel,
num_coupling_ac_el_faces,
- mp->d_coupling_ac_el_ispec,
- mp->d_coupling_ac_el_ijk,
+ mp->d_coupling_ac_el_ispec,
+ mp->d_coupling_ac_el_ijk,
mp->d_coupling_ac_el_normal,
- mp->d_coupling_ac_el_jacobian2Dw,
- mp->d_ibool,
- mp->d_ispec_is_inner,
+ mp->d_coupling_ac_el_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
phase_is_inner);
-
+
// adjoint simulations
- if (SIMULATION_TYPE == 3 ){
+ if (SIMULATION_TYPE == 3 ){
compute_coupling_elastic_ac_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
mp->d_b_accel,
num_coupling_ac_el_faces,
- mp->d_coupling_ac_el_ispec,
- mp->d_coupling_ac_el_ijk,
+ mp->d_coupling_ac_el_ispec,
+ mp->d_coupling_ac_el_ijk,
mp->d_coupling_ac_el_normal,
- mp->d_coupling_ac_el_jacobian2Dw,
- mp->d_ibool,
- mp->d_ispec_is_inner,
+ mp->d_coupling_ac_el_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
phase_is_inner);
-
+
}
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
- exit_on_cuda_error("compute_coupling_elastic_ac_cuda");
+ exit_on_cuda_error("compute_coupling_el_ac_cuda");
#endif
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -42,22 +42,22 @@
// prepares a device array with with all inter-element edge-nodes -- this
// is followed by a memcpy and MPI operations
-__global__ void prepare_boundary_potential_on_device(float* d_potential_dot_dot_acoustic,
+__global__ void prepare_boundary_potential_on_device(float* d_potential_dot_dot_acoustic,
float* d_send_potential_dot_dot_buffer,
- int num_interfaces_ext_mesh,
+ int num_interfaces_ext_mesh,
int max_nibool_interfaces_ext_mesh,
int* d_nibool_interfaces_ext_mesh,
int* d_ibool_interfaces_ext_mesh) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
- int iinterface=0;
-
+ int iinterface=0;
+
for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
d_send_potential_dot_dot_buffer[(id + max_nibool_interfaces_ext_mesh*iinterface)] =
d_potential_dot_dot_acoustic[(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
}
- }
+ }
}
@@ -66,22 +66,24 @@
// prepares and transfers the inter-element edge-nodes to the host to be MPI'd
extern "C"
-void FC_FUNC_(transfer_boundary_potential_from_device,
- TRANSFER_BOUNDARY_POTENTIAL_FROM_DEVICE)(
- int* size,
- long* Mesh_pointer_f,
- float* potential_dot_dot_acoustic,
+void FC_FUNC_(transfer_boun_pot_from_device,
+ TRANSFER_BOUN_POT_FROM_DEVICE)(
+ int* size,
+ long* Mesh_pointer_f,
+ float* potential_dot_dot_acoustic,
float* send_potential_dot_dot_buffer,
- int* num_interfaces_ext_mesh,
+ int* num_interfaces_ext_mesh,
int* max_nibool_interfaces_ext_mesh,
- int* nibool_interfaces_ext_mesh,
+ int* nibool_interfaces_ext_mesh,
int* ibool_interfaces_ext_mesh,
int* FORWARD_OR_ADJOINT){
-TRACE("transfer_boundary_potential_from_device");
+TRACE("transfer_boun_pot_from_device");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
+ if( *num_interfaces_ext_mesh == 0 ) return;
+
int blocksize = 256;
int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
@@ -90,11 +92,11 @@
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
- if(*FORWARD_OR_ADJOINT == 1) {
+
+ if(*FORWARD_OR_ADJOINT == 1) {
prepare_boundary_potential_on_device<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
mp->d_send_potential_dot_dot_buffer,
*num_interfaces_ext_mesh,
@@ -108,17 +110,12 @@
*num_interfaces_ext_mesh,
*max_nibool_interfaces_ext_mesh,
mp->d_nibool_interfaces_ext_mesh,
- mp->d_ibool_interfaces_ext_mesh);
+ mp->d_ibool_interfaces_ext_mesh);
}
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_boundary_kernel");
-#endif
-
cudaMemcpy(send_potential_dot_dot_buffer,mp->d_send_potential_dot_dot_buffer,
- *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real),cudaMemcpyDeviceToHost);
-
+ *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(realw),cudaMemcpyDeviceToHost);
+
// finish timing of kernel+memcpy
// cudaEventRecord( stop, 0 );
// cudaEventSynchronize( stop );
@@ -126,22 +123,24 @@
// cudaEventDestroy( start );
// cudaEventDestroy( stop );
// printf("boundary xfer d->h Time: %f ms\n",time);
-
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_boundary_kernel");
+#endif
}
/* ----------------------------------------------------------------------------------------------- */
-__global__ void assemble_boundary_potential_on_device(float* d_potential_dot_dot_acoustic,
+__global__ void assemble_boundary_potential_on_device(float* d_potential_dot_dot_acoustic,
float* d_send_potential_dot_dot_buffer,
- int num_interfaces_ext_mesh,
+ int num_interfaces_ext_mesh,
int max_nibool_interfaces_ext_mesh,
int* d_nibool_interfaces_ext_mesh,
int* d_ibool_interfaces_ext_mesh) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
- int iinterface=0;
+ int iinterface=0;
for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
@@ -149,7 +148,7 @@
// for testing atomic operations against not atomic operations (0.1ms vs. 0.04 ms)
// d_potential_dot_dot_acoustic[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)] +=
// d_send_potential_dot_dot_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)];
-
+
atomicAdd(&d_potential_dot_dot_acoustic[(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)],
d_send_potential_dot_dot_buffer[(id + max_nibool_interfaces_ext_mesh*iinterface)]);
}
@@ -166,34 +165,34 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(transfer_and_assemble_potential_to_device,
- TRANSFER_AND_ASSEMBLE_POTENTIAL_TO_DEVICE)(
- long* Mesh_pointer,
- real* potential_dot_dot_acoustic,
- real* buffer_recv_scalar_ext_mesh,
- int* num_interfaces_ext_mesh,
+extern "C"
+void FC_FUNC_(transfer_asmbl_pot_to_device,
+ TRANSFER_ASMBL_POT_TO_DEVICE)(
+ long* Mesh_pointer,
+ realw* potential_dot_dot_acoustic,
+ realw* buffer_recv_scalar_ext_mesh,
+ int* num_interfaces_ext_mesh,
int* max_nibool_interfaces_ext_mesh,
- int* nibool_interfaces_ext_mesh,
+ int* nibool_interfaces_ext_mesh,
int* ibool_interfaces_ext_mesh,
int* FORWARD_OR_ADJOINT) {
-TRACE("transfer_and_assemble_potential_to_device");
+TRACE("transfer_asmbl_pot_to_device");
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
//double start_time = get_time();
- // cudaEvent_t start, stop;
- // float time;
- // cudaEventCreate(&start);
- // cudaEventCreate(&stop);
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
// cudaEventRecord( start, 0 );
- // copies buffer onto GPU
- cudaMemcpy(mp->d_send_potential_dot_dot_buffer, buffer_recv_scalar_ext_mesh,
- *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real), cudaMemcpyHostToDevice);
+ // copies buffer onto GPU
+ cudaMemcpy(mp->d_send_potential_dot_dot_buffer, buffer_recv_scalar_ext_mesh,
+ *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(realw), cudaMemcpyHostToDevice);
// assembles on GPU
- int blocksize = 256;
+ int blocksize = 256;
int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
@@ -205,25 +204,25 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- if(*FORWARD_OR_ADJOINT == 1) {
+ if(*FORWARD_OR_ADJOINT == 1) {
//assemble forward field
- assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
mp->d_send_potential_dot_dot_buffer,
*num_interfaces_ext_mesh,
*max_nibool_interfaces_ext_mesh,
mp->d_nibool_interfaces_ext_mesh,
mp->d_ibool_interfaces_ext_mesh);
}
- else if(*FORWARD_OR_ADJOINT == 3) {
+ else if(*FORWARD_OR_ADJOINT == 3) {
//assemble reconstructed/backward field
- assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
mp->d_send_potential_dot_dot_buffer,
*num_interfaces_ext_mesh,
*max_nibool_interfaces_ext_mesh,
mp->d_nibool_interfaces_ext_mesh,
- mp->d_ibool_interfaces_ext_mesh);
+ mp->d_ibool_interfaces_ext_mesh);
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
// cudaEventRecord( stop, 0 );
// cudaEventSynchronize( stop );
@@ -233,8 +232,8 @@
// printf("Boundary Assemble Kernel Execution Time: %f ms\n",time);
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
- exit_on_cuda_error("transfer_and_assemble_potential_to_device");
-#endif
+ exit_on_cuda_error("transfer_asmbl_pot_to_device");
+#endif
}
@@ -242,14 +241,14 @@
void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, int SIMULATION_TYPE);
-__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_acoustic,
+__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_acoustic,
int num_phase_ispec_acoustic, int d_iphase,
- float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
- float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
- float* d_gammax, float* d_gammay, float* d_gammaz,
- float* hprime_xx, float* hprime_yy, float* hprime_zz,
- float* hprimewgll_xx, float* hprimewgll_yy, float* hprimewgll_zz,
- float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
+ float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
+ float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
+ float* d_gammax, float* d_gammay, float* d_gammaz,
+ float* hprime_xx, float* hprime_yy, float* hprime_zz,
+ float* hprimewgll_xx, float* hprimewgll_yy, float* hprimewgll_zz,
+ float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
float* d_rhostore);
@@ -269,29 +268,31 @@
TRACE("compute_forces_acoustic_cuda");
//double start_time = get_time();
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
int num_elements;
-
+
if( *iphase == 1 )
num_elements = *nspec_outer_acoustic;
else
- num_elements = *nspec_inner_acoustic;
+ num_elements = *nspec_inner_acoustic;
- //int myrank;
+ if( num_elements == 0 ) return;
+
+ //int myrank;
/* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
/* if(myrank==0) { */
-
- Kernel_2_acoustic(num_elements, mp, *iphase, *SIMULATION_TYPE);
-
- cudaThreadSynchronize();
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ Kernel_2_acoustic(num_elements, mp, *iphase, *SIMULATION_TYPE);
+
+ //cudaThreadSynchronize();
+
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
/* MPI_Barrier(MPI_COMM_WORLD); */
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
-#endif
+//#endif
}
@@ -303,14 +304,14 @@
void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, int SIMULATION_TYPE)
{
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("before acoustic kernel Kernel 2");
#endif
-
+
/* if the grid can handle the number of blocks, we let it be 1D */
/* grid_2_x = nb_elem_color; */
- /* nb_elem_color is just how many blocks we are computing now */
+ /* nb_elem_color is just how many blocks we are computing now */
int num_blocks_x = nb_blocks_to_compute;
int num_blocks_y = 1;
@@ -318,49 +319,53 @@
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
int threads_2 = 128;//BLOCK_SIZE_K2;
- dim3 grid_2(num_blocks_x,num_blocks_y);
+ dim3 grid_2(num_blocks_x,num_blocks_y);
-
+
// Cuda timing
- // cudaEvent_t start, stop;
- // float time;
- // cudaEventCreate(&start);
- // cudaEventCreate(&stop);
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
// cudaEventRecord( start, 0 );
-
- Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
- mp->d_phase_ispec_inner_acoustic, mp->num_phase_ispec_acoustic, d_iphase,
- mp->d_potential_acoustic, mp->d_potential_dot_dot_acoustic,
- mp->d_xix, mp->d_xiy, mp->d_xiz,
- mp->d_etax, mp->d_etay, mp->d_etaz,
- mp->d_gammax, mp->d_gammay, mp->d_gammaz,
- mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
- mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
- mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
- mp->d_rhostore);
+ Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
+ mp->NGLOB_AB, mp->d_ibool,
+ mp->d_phase_ispec_inner_acoustic,
+ mp->num_phase_ispec_acoustic, d_iphase,
+ mp->d_potential_acoustic, mp->d_potential_dot_dot_acoustic,
+ mp->d_xix, mp->d_xiy, mp->d_xiz,
+ mp->d_etax, mp->d_etay, mp->d_etaz,
+ mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+ mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
+ mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
+ mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+ mp->d_rhostore);
+
if(SIMULATION_TYPE == 3) {
- Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
- mp->d_phase_ispec_inner_acoustic,mp->num_phase_ispec_acoustic, d_iphase,
- mp->d_b_potential_acoustic, mp->d_b_potential_dot_dot_acoustic,
- mp->d_xix, mp->d_xiy, mp->d_xiz,
- mp->d_etax, mp->d_etay, mp->d_etaz,
- mp->d_gammax, mp->d_gammay, mp->d_gammaz,
- mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
- mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
- mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
- mp->d_rhostore);
+ Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
+ mp->NGLOB_AB, mp->d_ibool,
+ mp->d_phase_ispec_inner_acoustic,
+ mp->num_phase_ispec_acoustic, d_iphase,
+ mp->d_b_potential_acoustic, mp->d_b_potential_dot_dot_acoustic,
+ mp->d_xix, mp->d_xiy, mp->d_xiz,
+ mp->d_etax, mp->d_etay, mp->d_etaz,
+ mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+ mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
+ mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
+ mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+ mp->d_rhostore);
}
-
+
// cudaEventRecord( stop, 0 );
// cudaEventSynchronize( stop );
// cudaEventElapsedTime( &time, start, stop );
// cudaEventDestroy( start );
// cudaEventDestroy( stop );
// printf("Kernel2 Execution Time: %f ms\n",time);
-
+
/* cudaThreadSynchronize(); */
/* TRACE("Kernel 2 finished"); */
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -377,26 +382,26 @@
__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
- int* d_phase_ispec_inner_acoustic,
+ int* d_phase_ispec_inner_acoustic,
int num_phase_ispec_acoustic, int d_iphase,
- float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
- float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
- float* d_gammax, float* d_gammay, float* d_gammaz,
- float* hprime_xx, float* hprime_yy, float* hprime_zz,
+ float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
+ float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
+ float* d_gammax, float* d_gammay, float* d_gammaz,
+ float* hprime_xx, float* hprime_yy, float* hprime_zz,
float* hprimewgll_xx, float* hprimewgll_yy, float* hprimewgll_zz,
float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
float* d_rhostore){
-
+
int bx = blockIdx.y*gridDim.x+blockIdx.x;
int tx = threadIdx.x;
const int NGLL3 = 125;
const int NGLL3_ALIGN = 128;
-
+
int K = (tx/NGLL2);
int J = ((tx-K*NGLL2)/NGLLX);
int I = (tx-K*NGLL2-J*NGLLX);
-
+
int active,offset,offset1,offset2,offset3;
int iglob = 0;
int working_element;
@@ -409,33 +414,33 @@
int l;
float hp1,hp2,hp3;
#endif
-
+
__shared__ reald s_dummy_loc[NGLL3];
-
+
__shared__ reald s_temp1[NGLL3];
__shared__ reald s_temp2[NGLL3];
__shared__ reald s_temp3[NGLL3];
-
+
// use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
// because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
-
+
// copy from global memory to shared memory
// each thread writes one of the NGLL^3 = 125 data points
if (active) {
// iphase-1 and working_element-1 for Fortran->C array conventions
working_element = d_phase_ispec_inner_acoustic[bx + num_phase_ispec_acoustic*(d_iphase-1)]-1;
- // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
+ // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
iglob = d_ibool[working_element*125 + tx]-1;
-
+
#ifdef USE_TEXTURES
s_dummy_loc[tx] = tex1Dfetch(tex_potential_acoustic, iglob);
#else
- // changing iglob indexing to match fortran row changes fast style
+ // changing iglob indexing to match fortran row changes fast style
s_dummy_loc[tx] = d_potential_acoustic[iglob];
#endif
- }
-
+ }
+
// synchronize all the threads (one thread for each of the NGLL grid points of the
// current spectral element) because we need the whole element to be ready in order
// to be able to compute the matrix products along cut planes of the 3D element below
@@ -446,10 +451,10 @@
if (active) {
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-// if(iglob == 0 )printf("kernel 2: iglob %i hprime_xx %f %f %f \n",iglob,hprime_xx[0],hprime_xx[1],hprime_xx[2]);
+// if(iglob == 0 )printf("kernel 2: iglob %i hprime_xx %f %f %f \n",iglob,hprime_xx[0],hprime_xx[1],hprime_xx[2]);
#endif
-
+
#ifndef MANUALLY_UNROLLED_LOOPS
temp1l = 0.f;
@@ -461,7 +466,7 @@
hp1 = hprime_xx[l*NGLLX+I];
offset1 = K*NGLL2+J*NGLLX+l;
temp1l += s_dummy_loc[offset1]*hp1;
-
+
// daniel: not more assumes that hprime_xx = hprime_yy = hprime_zz
hp2 = hprime_yy[l*NGLLX+J];
offset2 = K*NGLL2+l*NGLLX+I;
@@ -477,8 +482,8 @@
+ s_dummy_loc[K*NGLL2+J*NGLLX+1]*hprime_xx[NGLLX+I]
+ s_dummy_loc[K*NGLL2+J*NGLLX+2]*hprime_xx[2*NGLLX+I]
+ s_dummy_loc[K*NGLL2+J*NGLLX+3]*hprime_xx[3*NGLLX+I]
- + s_dummy_loc[K*NGLL2+J*NGLLX+4]*hprime_xx[4*NGLLX+I];
-
+ + s_dummy_loc[K*NGLL2+J*NGLLX+4]*hprime_xx[4*NGLLX+I];
+
temp2l = s_dummy_loc[K*NGLL2+I]*hprime_yy[J]
+ s_dummy_loc[K*NGLL2+NGLLX+I]*hprime_yy[NGLLX+J]
+ s_dummy_loc[K*NGLL2+2*NGLLX+I]*hprime_yy[2*NGLLX+J]
@@ -511,15 +516,15 @@
+xizl*(etaxl*gammayl-etayl*gammaxl));
// derivatives of potential
- dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l;
+ dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l;
dpotentialdyl = xiyl*temp1l + etayl*temp2l + gammayl*temp3l;
dpotentialdzl = xizl*temp1l + etazl*temp2l + gammazl*temp3l;
-
+
// density (reciproc)
rho_invl = 1.f / d_rhostore[offset];
// form the dot product with the test vector
- s_temp1[tx] = jacobianl * rho_invl * (dpotentialdxl*xixl + dpotentialdyl*xiyl + dpotentialdzl*xizl);
+ s_temp1[tx] = jacobianl * rho_invl * (dpotentialdxl*xixl + dpotentialdyl*xiyl + dpotentialdzl*xizl);
s_temp2[tx] = jacobianl * rho_invl * (dpotentialdxl*etaxl + dpotentialdyl*etayl + dpotentialdzl*etazl);
s_temp3[tx] = jacobianl * rho_invl * (dpotentialdxl*gammaxl + dpotentialdyl*gammayl + dpotentialdzl*gammazl);
}
@@ -541,7 +546,7 @@
fac1 = hprimewgll_xx[I*NGLLX+l];
offset1 = K*NGLL2+J*NGLLX+l;
temp1l += s_temp1[offset1]*fac1;
-
+
//daniel: not more assumes hprimewgll_xx = hprimewgll_yy = hprimewgll_zz
fac2 = hprimewgll_yy[J*NGLLX+l];
offset2 = K*NGLL2+l*NGLLX+I;
@@ -558,8 +563,8 @@
+ s_temp1[K*NGLL2+J*NGLLX+2]*hprimewgll_xx[I*NGLLX+2]
+ s_temp1[K*NGLL2+J*NGLLX+3]*hprimewgll_xx[I*NGLLX+3]
+ s_temp1[K*NGLL2+J*NGLLX+4]*hprimewgll_xx[I*NGLLX+4];
-
+
temp2l = s_temp2[K*NGLL2+I]*hprimewgll_yy[J*NGLLX]
+ s_temp2[K*NGLL2+NGLLX+I]*hprimewgll_yy[J*NGLLX+1]
+ s_temp2[K*NGLL2+2*NGLLX+I]*hprimewgll_yy[J*NGLLX+2]
@@ -581,16 +586,16 @@
fac3 = wgllwgll_xy[J*NGLLX+I];
#ifdef USE_TEXTURES
- d_potential_dot_dot_acoustic[iglob] = tex1Dfetch(tex_potential_dot_dot_acoustic, iglob)
+ d_potential_dot_dot_acoustic[iglob] = tex1Dfetch(tex_potential_dot_dot_acoustic, iglob)
- (fac1*temp1l + fac2*temp2l + fac3*temp3l);
#else
- /* OLD version that uses coloring to get around race condition. About 1.6x faster */
+ /* OLD version that uses coloring to get around race condition. About 1.6x faster */
// d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
// d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
- // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
-
- atomicAdd(&d_potential_dot_dot_acoustic[iglob],-(fac1*temp1l + fac2*temp2l + fac3*temp3l));
-
+ // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+
+ atomicAdd(&d_potential_dot_dot_acoustic[iglob],-(fac1*temp1l + fac2*temp2l + fac3*temp3l));
+
#endif
}
@@ -607,31 +612,31 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void kernel_3_a_acoustic_cuda_device(float* potential_dot_dot_acoustic,
+__global__ void kernel_3_a_acoustic_cuda_device(float* potential_dot_dot_acoustic,
int size,
float* rmass_acoustic) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
+
/* because of block and grid sizing problems, there is a small */
/* amount of buffer at the end of the calculation */
- if(id < size) {
+ if(id < size) {
// multiplies pressure with the inverse of the mass matrix
- potential_dot_dot_acoustic[id] = potential_dot_dot_acoustic[id]*rmass_acoustic[id];
+ potential_dot_dot_acoustic[id] = potential_dot_dot_acoustic[id]*rmass_acoustic[id];
}
}
/* ----------------------------------------------------------------------------------------------- */
__global__ void kernel_3_b_acoustic_cuda_device(float* potential_dot_acoustic,
- float* potential_dot_dot_acoustic,
+ float* potential_dot_dot_acoustic,
int size,
- real deltatover2,
+ realw deltatover2,
float* rmass_acoustic) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
+
/* because of block and grid sizing problems, there is a small */
/* amount of buffer at the end of the calculation */
- if(id < size) {
+ if(id < size) {
// Newmark time scheme: corrector term
potential_dot_acoustic[id] = potential_dot_acoustic[id] + deltatover2*potential_dot_dot_acoustic[id];
}
@@ -642,7 +647,7 @@
extern "C"
void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
long* Mesh_pointer,
- int* size_F,
+ int* size_F,
int* SIMULATION_TYPE) {
TRACE("kernel_3_a_acoustic_cuda");
@@ -660,17 +665,17 @@
}
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
- kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_dot_acoustic,
- size,
+
+ kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_dot_acoustic,
+ size,
mp->d_rmass_acoustic);
if(*SIMULATION_TYPE == 3) {
- kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_dot_acoustic,
size,
mp->d_rmass_acoustic);
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
exit_on_cuda_error("after kernel 3 a");
@@ -682,18 +687,18 @@
extern "C"
void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
long* Mesh_pointer,
- int* size_F,
- float* deltatover2_F,
- int* SIMULATION_TYPE,
+ int* size_F,
+ float* deltatover2_F,
+ int* SIMULATION_TYPE,
float* b_deltatover2_F) {
TRACE("kernel_3_b_acoustic_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
int size = *size_F;
- real deltatover2 = *deltatover2_F;
- real b_deltatover2 = *b_deltatover2_F;
-
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+
int blocksize=128;
int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
@@ -704,19 +709,19 @@
}
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
- kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_acoustic,
- mp->d_potential_dot_dot_acoustic,
- size, deltatover2,
+
+ kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_acoustic,
+ mp->d_potential_dot_dot_acoustic,
+ size, deltatover2,
mp->d_rmass_acoustic);
-
+
if(*SIMULATION_TYPE == 3) {
- kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_acoustic,
- mp->d_b_potential_dot_dot_acoustic,
+ kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
size, b_deltatover2,
mp->d_rmass_acoustic);
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
exit_on_cuda_error("after kernel 3 b");
@@ -733,68 +738,71 @@
__global__ void enforce_free_surface_cuda_kernel(
- float* potential_acoustic,
+ float* potential_acoustic,
float* potential_dot_acoustic,
- float* potential_dot_dot_acoustic,
+ float* potential_dot_dot_acoustic,
int num_free_surface_faces,
int* free_surface_ispec,
int* free_surface_ijk,
int* ibool,
int* ispec_is_acoustic) {
// gets spectral element face id
- int iface = blockIdx.x + gridDim.x*blockIdx.y;
-
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
// for all faces on free surface
if( iface < num_free_surface_faces ){
-
+
int ispec = free_surface_ispec[iface]-1;
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
// if( iface > 648-1 ){printf("device iface: %i \n",iface);}
//#endif
-
+
// checks if element is in acoustic domain
- if( ispec_is_acoustic[ispec] == 1 ){
-
+ if( ispec_is_acoustic[ispec] ){
+
// gets global point index
- int igll = threadIdx.x + threadIdx.y*blockDim.x;
+ int igll = threadIdx.x + threadIdx.y*blockDim.x;
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
// if( igll > 25-1 ){printf("device igll: %i \n",igll);}
//#endif
-
+
int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface)
int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-
+
int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
-
+
// sets potentials to zero at free surface
potential_acoustic[iglob] = 0;
potential_dot_acoustic[iglob] = 0;
- potential_dot_dot_acoustic[iglob] = 0;
+ potential_dot_dot_acoustic[iglob] = 0;
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
// if( ispec == 160 && igll < 25 ){printf("device: %i %i %i %i %i \n",igll,i,j,k,iglob);}
//#endif
}
- }
+ }
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(acoustic_enforce_free_surface_cuda,
- ACOUSTIC_ENFORCE_FREE_SURFACE_CUDA)(long* Mesh_pointer_f,
+void FC_FUNC_(acoustic_enforce_free_surf_cuda,
+ ACOUSTIC_ENFORCE_FREE_SURF_CUDA)(long* Mesh_pointer_f,
int* SIMULATION_TYPE,
int* ABSORB_FREE_SURFACE) {
-
-TRACE("acoustic_enforce_free_surface_cuda");
+TRACE("acoustic_enforce_free_surf_cuda");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
// checks if anything to do
if( *ABSORB_FREE_SURFACE == 0 ){
+
+ // does not absorb free surface, thus we enforce the potential to be zero at surface
+
// block sizes
int num_blocks_x = mp->num_free_surface_faces;
int num_blocks_y = 1;
@@ -808,45 +816,45 @@
//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
// debugging
//int* d_debug;
- //printf("acoustic_enforce_free_surface_cuda ...\n");
+ //printf("acoustic_enforce_free_surf_cuda ...\n");
//print_CUDA_error_if_any(cudaMalloc((void**)&d_debug,128*sizeof(int)),999);
//int* h_debug;
//h_debug = (int*) calloc(128,sizeof(int));
//for(int i=0;i<128;i++){h_debug[i] = 0;}
//cudaMemcpy(d_debug,h_debug,128*sizeof(int),cudaMemcpyHostToDevice);
-
- //printf("acoustic_enforce_free_surface_cuda start...\n");
+
+ //printf("acoustic_enforce_free_surf_cuda start...\n");
//doesnt' work...: printf("free_surface_ispec: %i %i %i \n",mp->d_free_surface_ispec[0],mp->d_free_surface_ispec[1],mp->d_free_surface_ispec[2]);
//printf("free_surface_ispec: %i \n",mp->num_free_surface_faces);
//cudaThreadSynchronize();
//#endif
-
+
// sets potentials to zero at free surface
- enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_potential_acoustic,
- mp->d_potential_dot_acoustic,
+ enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_potential_acoustic,
+ mp->d_potential_dot_acoustic,
mp->d_potential_dot_dot_acoustic,
mp->num_free_surface_faces,
- mp->d_free_surface_ispec,
+ mp->d_free_surface_ispec,
mp->d_free_surface_ijk,
mp->d_ibool,
mp->d_ispec_is_acoustic);
// for backward/reconstructed potentials
if(*SIMULATION_TYPE == 3) {
- enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic,
- mp->d_b_potential_dot_acoustic,
+ enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic,
+ mp->d_b_potential_dot_acoustic,
mp->d_b_potential_dot_dot_acoustic,
mp->num_free_surface_faces,
- mp->d_free_surface_ispec,
+ mp->d_free_surface_ispec,
mp->d_free_surface_ijk,
mp->d_ibool,
- mp->d_ispec_is_acoustic);
-
+ mp->d_ispec_is_acoustic);
+
}
//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//cudaThreadSynchronize();
- //cudaMemcpy(h_debug,d_debug,128*sizeof(int),cudaMemcpyDeviceToHost);
+ //cudaMemcpy(h_debug,d_debug,128*sizeof(int),cudaMemcpyDeviceToHost);
//for(int i=0;i<25;i++) {printf("ispec d_debug = %d \n",h_debug[i]);}
//cudaFree(d_debug);
//free(h_debug);
@@ -854,9 +862,9 @@
//#endif
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("enforce_free_surface_cuda");
-#endif
+#endif
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -52,17 +52,17 @@
void Kernel_2(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
- int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE,int ATTENUATION);
+ int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE,int ATTENUATION);
-//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic,
+//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic,
// int num_phase_ispec_elastic, int d_iphase, int* d_ibool);
__global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,
- float* d_displ, float* d_accel,
- float* d_xix, float* d_xiy, float* d_xiz,
- float* d_etax, float* d_etay, float* d_etaz,
- float* d_gammax, float* d_gammay, float* d_gammaz,
+ float* d_displ, float* d_accel,
+ float* d_xix, float* d_xiy, float* d_xiz,
+ float* d_etax, float* d_etay, float* d_etaz,
+ float* d_gammax, float* d_gammay, float* d_gammaz,
float* d_kappav, float* d_muv,
float* d_debug,
int COMPUTE_AND_STORE_STRAIN,
@@ -70,7 +70,7 @@
float* epsilondev_xz,float* epsilondev_yz,float* epsilon_trace_over_3,
int SIMULATION_TYPE,
int ATTENUATION,int NSPEC,
- float* one_minus_sum_beta,float* factor_common,
+ float* one_minus_sum_beta,float* factor_common,
float* R_xx,float* R_yy,float* R_xy,float* R_xz,float* R_yz,
float* alphaval,float* betaval,float* gammaval);
@@ -80,46 +80,48 @@
// prepares a device array with with all inter-element edge-nodes -- this
// is followed by a memcpy and MPI operations
__global__ void prepare_boundary_accel_on_device(float* d_accel, float* d_send_accel_buffer,
- int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
- int* d_nibool_interfaces_ext_mesh,
- int* d_ibool_interfaces_ext_mesh) {
+ int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
//int bx = blockIdx.y*gridDim.x+blockIdx.x;
//int tx = threadIdx.x;
- int iinterface=0;
-
+ int iinterface=0;
+
for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)] =
- d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
+ d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1] =
- d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1];
+ d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1];
d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2] =
- d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2];
+ d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2];
}
- }
+ }
}
/* ----------------------------------------------------------------------------------------------- */
// prepares and transfers the inter-element edge-nodes to the host to be MPI'd
-extern "C"
-void FC_FUNC_(transfer_boundary_accel_from_device,
- TRANSFER_BOUNDARY_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, float* accel,
+// (elements on boundary)
+extern "C"
+void FC_FUNC_(transfer_boun_accel_from_device,
+ TRANSFER_BOUN_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, float* accel,
float* send_accel_buffer,
int* num_interfaces_ext_mesh,
int* max_nibool_interfaces_ext_mesh,
int* nibool_interfaces_ext_mesh,
int* ibool_interfaces_ext_mesh,
int* FORWARD_OR_ADJOINT){
-TRACE("transfer_boundary_accel_from_device");
+TRACE("transfer_boun_accel_from_device");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
+ if( *num_interfaces_ext_mesh == 0 ) return;
+
int blocksize = 256;
-
int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
@@ -127,39 +129,35 @@
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
//timing for memory xfer
- // cudaEvent_t start, stop;
- // float time;
- // cudaEventCreate(&start);
- // cudaEventCreate(&stop);
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
// cudaEventRecord( start, 0 );
if(*FORWARD_OR_ADJOINT == 1) {
- prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel,mp->d_send_accel_buffer,
- *num_interfaces_ext_mesh,
- *max_nibool_interfaces_ext_mesh,
- mp->d_nibool_interfaces_ext_mesh,
- mp->d_ibool_interfaces_ext_mesh);
+ prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel,mp->d_send_accel_buffer,
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
}
else if(*FORWARD_OR_ADJOINT == 3) {
prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel,mp->d_send_accel_buffer,
- *num_interfaces_ext_mesh,
- *max_nibool_interfaces_ext_mesh,
- mp->d_nibool_interfaces_ext_mesh,
- mp->d_ibool_interfaces_ext_mesh);
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
}
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("transfer_boundary_accel_from_device");
-#endif
-
+
cudaMemcpy(send_accel_buffer,mp->d_send_accel_buffer,
- 3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real),cudaMemcpyDeviceToHost);
-
+ 3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(realw),cudaMemcpyDeviceToHost);
+
// finish timing of kernel+memcpy
// cudaEventRecord( stop, 0 );
// cudaEventSynchronize( stop );
@@ -167,20 +165,22 @@
// cudaEventDestroy( start );
// cudaEventDestroy( stop );
// printf("boundary xfer d->h Time: %f ms\n",time);
-
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_boun_accel_from_device");
+#endif
}
/* ----------------------------------------------------------------------------------------------- */
__global__ void assemble_boundary_accel_on_device(float* d_accel, float* d_send_accel_buffer,
- int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
- int* d_nibool_interfaces_ext_mesh,
- int* d_ibool_interfaces_ext_mesh) {
+ int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
//int bx = blockIdx.y*gridDim.x+blockIdx.x;
//int tx = threadIdx.x;
- int iinterface=0;
+ int iinterface=0;
for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
@@ -192,14 +192,14 @@
// d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1];
// d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2] +=
// d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2];
-
-
+
+
atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)],
- d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)]);
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)]);
atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1],
- d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1]);
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1]);
atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2],
- d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2]);
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2]);
}
}
// ! This step is done via previous function transfer_and_assemble...
@@ -214,20 +214,22 @@
/* ----------------------------------------------------------------------------------------------- */
// FORWARD_OR_ADJOINT == 1 for accel, and == 3 for b_accel
-extern "C"
-void FC_FUNC_(transfer_and_assemble_accel_to_device,
- TRANSFER_AND_ASSEMBLE_ACCEL_TO_DEVICE)(long* Mesh_pointer, real* accel,
- real* buffer_recv_vector_ext_mesh,
+extern "C"
+void FC_FUNC_(transfer_asmbl_accel_to_device,
+ TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer, realw* accel,
+ realw* buffer_recv_vector_ext_mesh,
int* num_interfaces_ext_mesh,
int* max_nibool_interfaces_ext_mesh,
int* nibool_interfaces_ext_mesh,
- int* ibool_interfaces_ext_mesh,int* FORWARD_OR_ADJOINT) {
-TRACE("transfer_and_assemble_accel_to_device");
+ int* ibool_interfaces_ext_mesh,
+ int* FORWARD_OR_ADJOINT) {
+TRACE("transfer_asmbl_accel_to_device");
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-
- cudaMemcpy(mp->d_send_accel_buffer, buffer_recv_vector_ext_mesh, 3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real), cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_send_accel_buffer, buffer_recv_vector_ext_mesh,
+ 3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(realw), cudaMemcpyHostToDevice);
+
int blocksize = 256;
int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
int num_blocks_x = size_padded/blocksize;
@@ -240,24 +242,24 @@
//double start_time = get_time();
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- // cudaEvent_t start, stop;
- // float time;
- // cudaEventCreate(&start);
- // cudaEventCreate(&stop);
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
// cudaEventRecord( start, 0 );
if(*FORWARD_OR_ADJOINT == 1) { //assemble forward accel
assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel, mp->d_send_accel_buffer,
- *num_interfaces_ext_mesh,
- *max_nibool_interfaces_ext_mesh,
- mp->d_nibool_interfaces_ext_mesh,
- mp->d_ibool_interfaces_ext_mesh);
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
}
else if(*FORWARD_OR_ADJOINT == 3) { //assemble adjoint accel
assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel, mp->d_send_accel_buffer,
- *num_interfaces_ext_mesh,
- *max_nibool_interfaces_ext_mesh,
- mp->d_nibool_interfaces_ext_mesh,
- mp->d_ibool_interfaces_ext_mesh);
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
}
// cudaEventRecord( stop, 0 );
@@ -269,7 +271,7 @@
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
- exit_on_cuda_error("transfer_and_assemble_accel_to_device_");
+ exit_on_cuda_error("transfer_asmbl_accel_to_device");
#endif
}
@@ -277,43 +279,45 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(compute_forces_elastic_cuda,
COMPUTE_FORCES_ELASTIC_CUDA)(long* Mesh_pointer_f,
int* iphase,
int* nspec_outer_elastic,
int* nspec_inner_elastic,
+ int* SIMULATION_TYPE,
int* COMPUTE_AND_STORE_STRAIN,
- int* SIMULATION_TYPE,
int* ATTENUATION) {
-TRACE("compute_forces_elastic_cuda");
+TRACE("compute_forces_elastic_cuda");
// EPIK_TRACER("compute_forces_elastic_cuda");
//printf("Running compute_forces\n");
//double start_time = get_time();
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
int num_elements;
-
+
if( *iphase == 1 )
num_elements = *nspec_outer_elastic;
else
- num_elements = *nspec_inner_elastic;
+ num_elements = *nspec_inner_elastic;
- //int myrank;
+ if( num_elements == 0 ) return;
+
+ //int myrank;
/* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
/* if(myrank==0) { */
- Kernel_2(num_elements,mp,*iphase,*COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE,*ATTENUATION);
-
- cudaThreadSynchronize();
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ Kernel_2(num_elements,mp,*iphase,*COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE,*ATTENUATION);
+
+ //cudaThreadSynchronize();
+
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
/* MPI_Barrier(MPI_COMM_WORLD); */
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
-#endif
+//#endif
}
@@ -322,28 +326,28 @@
// updates stress
__device__ void compute_element_att_stress(int tx,int working_element,int NSPEC,
- float* R_xx,
- float* R_yy,
- float* R_xy,
- float* R_xz,
- float* R_yz,
- reald* sigma_xx,
- reald* sigma_yy,
+ float* R_xx,
+ float* R_yy,
+ float* R_xy,
+ float* R_xz,
+ float* R_yz,
+ reald* sigma_xx,
+ reald* sigma_yy,
reald* sigma_zz,
- reald* sigma_xy,
+ reald* sigma_xy,
reald* sigma_xz,
reald* sigma_yz) {
int i_sls,offset_sls;
reald R_xx_val,R_yy_val;
-
+
for(i_sls = 0; i_sls < N_SLS; i_sls++){
// index
offset_sls = tx + 125*(working_element + NSPEC*i_sls);
-
+
R_xx_val = R_xx[offset_sls]; //(i,j,k,ispec,i_sls)
R_yy_val = R_yy[offset_sls];
-
+
*sigma_xx = *sigma_xx - R_xx_val;
*sigma_yy = *sigma_yy - R_yy_val;
*sigma_zz = *sigma_zz + R_xx_val + R_yy_val;
@@ -352,7 +356,7 @@
*sigma_yz = *sigma_yz - R_yz[offset_sls];
}
return;
-}
+}
/* ----------------------------------------------------------------------------------------------- */
@@ -375,59 +379,59 @@
reald mul;
reald alphaval_loc,betaval_loc,gammaval_loc;
reald factor_loc,Sn,Snp1;
-
+
// indices
offset_align = tx + 128 * working_element;
ijk_ispec = tx + 125 * working_element;
-
+
mul = d_muv[offset_align];
-
+
// use Runge-Kutta scheme to march in time
for(i_sls = 0; i_sls < N_SLS; i_sls++){
// indices
offset_common = i_sls + N_SLS*(tx + 125*working_element); // (i_sls,i,j,k,ispec)
offset_sls = tx + 125*(working_element + NSPEC*i_sls); // (i,j,k,ispec,i_sls)
-
+
factor_loc = mul * factor_common[offset_common]; //mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
-
+
alphaval_loc = alphaval[i_sls]; // (i_sls)
betaval_loc = betaval[i_sls];
gammaval_loc = gammaval[i_sls];
-
+
// term in xx
Sn = factor_loc * epsilondev_xx[ijk_ispec]; //(i,j,k,ispec)
Snp1 = factor_loc * epsilondev_xx_loc; //(i,j,k)
-
- //R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) +
+
+ //R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) +
// betaval_loc * Sn + gammaval_loc * Snp1;
- R_xx[offset_sls] = alphaval_loc * R_xx[offset_sls] +
+ R_xx[offset_sls] = alphaval_loc * R_xx[offset_sls] +
betaval_loc * Sn + gammaval_loc * Snp1;
// term in yy
Sn = factor_loc * epsilondev_yy[ijk_ispec];
Snp1 = factor_loc * epsilondev_yy_loc;
- R_yy[offset_sls] = alphaval_loc * R_yy[offset_sls] +
+ R_yy[offset_sls] = alphaval_loc * R_yy[offset_sls] +
betaval_loc * Sn + gammaval_loc * Snp1;
// term in zz not computed since zero trace
// term in xy
Sn = factor_loc * epsilondev_xy[ijk_ispec];
Snp1 = factor_loc * epsilondev_xy_loc;
- R_xy[offset_sls] = alphaval_loc * R_xy[offset_sls] +
+ R_xy[offset_sls] = alphaval_loc * R_xy[offset_sls] +
betaval_loc * Sn + gammaval_loc * Snp1;
// term in xz
Sn = factor_loc * epsilondev_xz[ijk_ispec];
Snp1 = factor_loc * epsilondev_xz_loc;
- R_xz[offset_sls] = alphaval_loc * R_xz[offset_sls] +
+ R_xz[offset_sls] = alphaval_loc * R_xz[offset_sls] +
betaval_loc * Sn + gammaval_loc * Snp1;
// term in yz
Sn = factor_loc * epsilondev_yz[ijk_ispec];
Snp1 = factor_loc * epsilondev_yz_loc;
- R_yz[offset_sls] = alphaval_loc * R_yz[offset_sls] +
+ R_yz[offset_sls] = alphaval_loc * R_yz[offset_sls] +
betaval_loc * Sn + gammaval_loc * Snp1;
}
- return;
+ return;
}
/* ----------------------------------------------------------------------------------------------- */
@@ -436,129 +440,139 @@
/* ----------------------------------------------------------------------------------------------- */
-void Kernel_2(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
- int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE,int ATTENUATION)
- {
-
+void Kernel_2(int nb_blocks_to_compute,
+ Mesh* mp,
+ int d_iphase,
+ int COMPUTE_AND_STORE_STRAIN,
+ int SIMULATION_TYPE,
+ int ATTENUATION){
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("before kernel Kernel 2");
#endif
-
- /* if the grid can handle the number of blocks, we let it be 1D */
- /* grid_2_x = nb_elem_color; */
- /* nb_elem_color is just how many blocks we are computing now */
- int num_blocks_x = nb_blocks_to_compute;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- int threads_2 = 128;//BLOCK_SIZE_K2;
- dim3 grid_2(num_blocks_x,num_blocks_y);
+ /* if the grid can handle the number of blocks, we let it be 1D */
+ /* grid_2_x = nb_elem_color; */
+ /* nb_elem_color is just how many blocks we are computing now */
- // debugging
- //printf("Starting with grid %dx%d for %d blocks\n",num_blocks_x,num_blocks_y,nb_blocks_to_compute);
- float* d_debug;
+ int num_blocks_x = nb_blocks_to_compute;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ //int threads_2 = 128;//BLOCK_SIZE_K2;
+ //dim3 grid_2(num_blocks_x,num_blocks_y);
+
+ int blocksize = 128;
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // debugging
+ //printf("Starting with grid %dx%d for %d blocks\n",num_blocks_x,num_blocks_y,nb_blocks_to_compute);
+ float* d_debug;
// float* h_debug;
// h_debug = (float*)calloc(128,sizeof(float));
// cudaMalloc((void**)&d_debug,128*sizeof(float));
// cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-
- // Cuda timing
- // cudaEvent_t start, stop;
- // float time;
- // cudaEventCreate(&start);
- // cudaEventCreate(&stop);
- // cudaEventRecord( start, 0 );
-
- Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
- mp->d_phase_ispec_inner_elastic,
- mp->d_num_phase_ispec_elastic, d_iphase,
- mp->d_displ, mp->d_accel,
- mp->d_xix, mp->d_xiy, mp->d_xiz,
- mp->d_etax, mp->d_etay, mp->d_etaz,
- mp->d_gammax, mp->d_gammay, mp->d_gammaz,
- mp->d_kappav, mp->d_muv,
- d_debug,
- COMPUTE_AND_STORE_STRAIN,
- mp->d_epsilondev_xx,
- mp->d_epsilondev_yy,
- mp->d_epsilondev_xy,
- mp->d_epsilondev_xz,
- mp->d_epsilondev_yz,
- mp->d_epsilon_trace_over_3,
- SIMULATION_TYPE,
- ATTENUATION,mp->NSPEC_AB,
- mp->d_one_minus_sum_beta,mp->d_factor_common,
- mp->d_R_xx,mp->d_R_yy,mp->d_R_xy,mp->d_R_xz,mp->d_R_yz,
- mp->d_alphaval,mp->d_betaval,mp->d_gammaval
- );
-
- // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- // int procid;
- // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- // if(procid==0) {
- // for(int i=0;i<17;i++) {
- // printf("cudadebug[%d] = %e\n",i,h_debug[i]);
- // }
- // }
+ // Cuda timing
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+
+ //Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+ Kernel_2_impl<<<grid,threads>>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+ mp->d_phase_ispec_inner_elastic,
+ mp->num_phase_ispec_elastic,
+ d_iphase,
+ mp->d_displ, mp->d_accel,
+ mp->d_xix, mp->d_xiy, mp->d_xiz,
+ mp->d_etax, mp->d_etay, mp->d_etaz,
+ mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+ mp->d_kappav, mp->d_muv,
+ d_debug,
+ COMPUTE_AND_STORE_STRAIN,
+ mp->d_epsilondev_xx,
+ mp->d_epsilondev_yy,
+ mp->d_epsilondev_xy,
+ mp->d_epsilondev_xz,
+ mp->d_epsilondev_yz,
+ mp->d_epsilon_trace_over_3,
+ SIMULATION_TYPE,
+ ATTENUATION,mp->NSPEC_AB,
+ mp->d_one_minus_sum_beta,mp->d_factor_common,
+ mp->d_R_xx,mp->d_R_yy,mp->d_R_xy,mp->d_R_xz,mp->d_R_yz,
+ mp->d_alphaval,mp->d_betaval,mp->d_gammaval
+ );
+
+
+ // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ // int procid;
+ // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ // if(procid==0) {
+ // for(int i=0;i<17;i++) {
+ // printf("cudadebug[%d] = %e\n",i,h_debug[i]);
+ // }
+ // }
// free(h_debug);
// cudaFree(d_debug);
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("Kernel_2_impl");
- #endif
-
- if(SIMULATION_TYPE == 3) {
- Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
- mp->d_phase_ispec_inner_elastic,
- mp->d_num_phase_ispec_elastic, d_iphase,
- mp->d_b_displ, mp->d_b_accel,
- mp->d_xix, mp->d_xiy, mp->d_xiz,
- mp->d_etax, mp->d_etay, mp->d_etaz,
- mp->d_gammax, mp->d_gammay, mp->d_gammaz,
- mp->d_kappav, mp->d_muv,
- d_debug,
- COMPUTE_AND_STORE_STRAIN,
- mp->d_b_epsilondev_xx,
- mp->d_b_epsilondev_yy,
- mp->d_b_epsilondev_xy,
- mp->d_b_epsilondev_xz,
- mp->d_b_epsilondev_yz,
- mp->d_b_epsilon_trace_over_3,
- SIMULATION_TYPE,
- ATTENUATION,mp->NSPEC_AB,
- mp->d_one_minus_sum_beta,mp->d_factor_common,
- mp->d_b_R_xx,mp->d_b_R_yy,mp->d_b_R_xy,mp->d_b_R_xz,mp->d_b_R_yz,
- mp->d_b_alphaval,mp->d_b_betaval,mp->d_b_gammaval
- );
- }
-
- // cudaEventRecord( stop, 0 );
- // cudaEventSynchronize( stop );
- // cudaEventElapsedTime( &time, start, stop );
- // cudaEventDestroy( start );
- // cudaEventDestroy( stop );
- // printf("Kernel2 Execution Time: %f ms\n",time);
-
- // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- // for(int i=0;i<10;i++) {
- // printf("debug[%d]=%e\n",i,h_debug[i]);
- // }
-
- /* cudaThreadSynchronize(); */
- /* LOG("Kernel 2 finished"); */
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("Kernel_2_impl SIM_TYPE==3");
- #endif
+// #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// exit_on_cuda_error("Kernel_2_impl");
+// #endif
+ if(SIMULATION_TYPE == 3) {
+ //Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+ Kernel_2_impl<<< grid,threads>>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+ mp->d_phase_ispec_inner_elastic,
+ mp->num_phase_ispec_elastic,
+ d_iphase,
+ mp->d_b_displ, mp->d_b_accel,
+ mp->d_xix, mp->d_xiy, mp->d_xiz,
+ mp->d_etax, mp->d_etay, mp->d_etaz,
+ mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+ mp->d_kappav, mp->d_muv,
+ d_debug,
+ COMPUTE_AND_STORE_STRAIN,
+ mp->d_b_epsilondev_xx,
+ mp->d_b_epsilondev_yy,
+ mp->d_b_epsilondev_xy,
+ mp->d_b_epsilondev_xz,
+ mp->d_b_epsilondev_yz,
+ mp->d_b_epsilon_trace_over_3,
+ SIMULATION_TYPE,
+ ATTENUATION,mp->NSPEC_AB,
+ mp->d_one_minus_sum_beta,mp->d_factor_common,
+ mp->d_b_R_xx,mp->d_b_R_yy,mp->d_b_R_xy,mp->d_b_R_xz,mp->d_b_R_yz,
+ mp->d_b_alphaval,mp->d_b_betaval,mp->d_b_gammaval
+ );
}
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("Kernel2 Execution Time: %f ms\n",time);
+
+ // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ // for(int i=0;i<10;i++) {
+ // printf("debug[%d]=%e\n",i,h_debug[i]);
+ // }
+
+ /* cudaThreadSynchronize(); */
+ /* LOG("Kernel 2 finished"); */
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("Kernel_2_impl ");
+#endif
+}
+
/* ----------------------------------------------------------------------------------------------- */
-//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic,
+//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic,
// int num_phase_ispec_elastic, int d_iphase, int* d_ibool) {
// int bx = blockIdx.x;
// int tx = threadIdx.x;
@@ -577,7 +591,7 @@
// /* d_debug_output[1] = d_ibool[working_element*NGLL3_ALIGN + tx]-1; */
// }
// /* d_debug_output[1+tx+128*bx] = 69.0; */
-//
+//
//}
/* ----------------------------------------------------------------------------------------------- */
@@ -592,10 +606,10 @@
__global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,
- float* d_displ, float* d_accel,
- float* d_xix, float* d_xiy, float* d_xiz,
- float* d_etax, float* d_etay, float* d_etaz,
- float* d_gammax, float* d_gammay, float* d_gammaz,
+ float* d_displ, float* d_accel,
+ float* d_xix, float* d_xiy, float* d_xiz,
+ float* d_etax, float* d_etay, float* d_etaz,
+ float* d_gammax, float* d_gammay, float* d_gammaz,
float* d_kappav, float* d_muv,
float* d_debug,
int COMPUTE_AND_STORE_STRAIN,
@@ -604,29 +618,29 @@
float* epsilon_trace_over_3,
int SIMULATION_TYPE,
int ATTENUATION, int NSPEC,
- float* one_minus_sum_beta,float* factor_common,
+ float* one_minus_sum_beta,float* factor_common,
float* R_xx, float* R_yy, float* R_xy, float* R_xz, float* R_yz,
float* alphaval,float* betaval,float* gammaval
){
-
+
/* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
int bx = blockIdx.y*gridDim.x+blockIdx.x;
/* int bx = blockIdx.x; */
- int tx = threadIdx.x;
-
+ int tx = threadIdx.x;
+
//const int NGLLX = 5;
- // const int NGLL2 = 25;
+ // const int NGLL2 = 25;
const int NGLL3 = 125;
const int NGLL3_ALIGN = 128;
-
+
int K = (tx/NGLL2);
int J = ((tx-K*NGLL2)/NGLLX);
int I = (tx-K*NGLL2-J*NGLLX);
-
+
int active,offset;
int iglob = 0;
int working_element;
-
+
reald tempx1l,tempx2l,tempx3l,tempy1l,tempy2l,tempy3l,tempz1l,tempz2l,tempz3l;
reald xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl;
reald duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl;
@@ -640,11 +654,11 @@
int l;
float hp1,hp2,hp3;
#endif
-
+
__shared__ reald s_dummyx_loc[NGLL3];
__shared__ reald s_dummyy_loc[NGLL3];
__shared__ reald s_dummyz_loc[NGLL3];
-
+
__shared__ reald s_tempx1[NGLL3];
__shared__ reald s_tempx2[NGLL3];
__shared__ reald s_tempx3[NGLL3];
@@ -654,19 +668,19 @@
__shared__ reald s_tempz1[NGLL3];
__shared__ reald s_tempz2[NGLL3];
__shared__ reald s_tempz3[NGLL3];
-
+
// use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
// because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
-
+
// copy from global memory to shared memory
// each thread writes one of the NGLL^3 = 125 data points
if (active) {
// iphase-1 and working_element-1 for Fortran->C array conventions
working_element = d_phase_ispec_inner_elastic[bx + num_phase_ispec_elastic*(d_iphase-1)]-1;
- // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
+ // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
iglob = d_ibool[working_element*125 + tx]-1;
-
+
#ifdef USE_TEXTURES
s_dummyx_loc[tx] = tex1Dfetch(tex_displ, iglob);
s_dummyy_loc[tx] = tex1Dfetch(tex_displ, iglob + NGLOB);
@@ -708,7 +722,7 @@
tempx1l += s_dummyx_loc[offset]*hp1;
tempy1l += s_dummyy_loc[offset]*hp1;
tempz1l += s_dummyz_loc[offset]*hp1;
-
+
hp2 = d_hprime_xx[l*NGLLX+J];
offset = K*NGLL2+l*NGLLX+I;
tempx2l += s_dummyx_loc[offset]*hp2;
@@ -724,10 +738,10 @@
// if(working_element == 169 && tx == 0) {
// atomicAdd(&d_debug[0],1.0);
// d_debug[1+3*l] = tempz3l;
- // d_debug[2+3*l] = s_dummyz_loc[offset];
- // d_debug[3+3*l] = hp3;
+ // d_debug[2+3*l] = s_dummyz_loc[offset];
+ // d_debug[3+3*l] = hp3;
// }
-
+
}
#else
@@ -735,7 +749,7 @@
+ s_dummyx_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+ s_dummyx_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+ s_dummyx_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
- + s_dummyx_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
tempy1l = s_dummyy_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+ s_dummyy_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
@@ -800,7 +814,7 @@
gammayl = d_gammay[offset];
gammazl = d_gammaz[offset];
- duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l;
+ duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l;
duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l;
duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l;
@@ -831,12 +845,12 @@
epsilondev_xz[offset] = 0.5f * duzdxl_plus_duxdzl;
epsilondev_yz[offset] = 0.5f * duzdyl_plus_duydzl;
*/
- // local storage: stresses at this current time step
+ // local storage: stresses at this current time step
epsilondev_xx_loc = duxdxl - templ;
epsilondev_yy_loc = duydyl - templ;
epsilondev_xy_loc = 0.5f * duxdyl_plus_duydxl;
epsilondev_xz_loc = 0.5f * duzdxl_plus_duxdzl;
- epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl;
+ epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl;
if(SIMULATION_TYPE == 3) {
epsilon_trace_over_3[tx + working_element*125] = templ;
@@ -852,8 +866,8 @@
// use unrelaxed parameters if attenuation
mul = mul * one_minus_sum_beta[tx+working_element*125]; // (i,j,k,ispec)
}
-
- // isotropic case
+
+ // isotropic case
lambdalplus2mul = kappal + 1.33333333333333333333f * mul; // 4./3. = 1.3333333
lambdal = lambdalplus2mul - 2.0f * mul;
@@ -861,7 +875,7 @@
sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl;
sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl;
sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl;
-
+
sigma_xy = mul*duxdyl_plus_duydxl;
sigma_xz = mul*duzdxl_plus_duxdzl;
sigma_yz = mul*duzdyl_plus_duydzl;
@@ -872,11 +886,11 @@
R_xx,R_yy,R_xy,R_xz,R_yz,
&sigma_xx,&sigma_yy,&sigma_zz,&sigma_xy,&sigma_xz,&sigma_yz);
}
-
+
jacobianl = 1.0f / (xixl*(etayl*gammazl-etazl*gammayl)-xiyl*(etaxl*gammazl-etazl*gammaxl)+xizl*(etaxl*gammayl-etayl*gammaxl));
// form the dot product with the test vector
- s_tempx1[tx] = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl);
+ s_tempx1[tx] = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl);
s_tempy1[tx] = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_yz*xizl);
s_tempz1[tx] = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl);
@@ -887,7 +901,7 @@
s_tempx3[tx] = jacobianl * (sigma_xx*gammaxl + sigma_xy*gammayl + sigma_xz*gammazl);
s_tempy3[tx] = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_yz*gammazl);
s_tempz3[tx] = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl);
-
+
}
// synchronize all the threads (one thread for each of the NGLL grid points of the
@@ -911,14 +925,14 @@
tempy3l = 0.f;
tempz3l = 0.f;
- for (l=0;l<NGLLX;l++) {
-
+ for (l=0;l<NGLLX;l++) {
+
fac1 = d_hprimewgll_xx[I*NGLLX+l];
offset = K*NGLL2+J*NGLLX+l;
tempx1l += s_tempx1[offset]*fac1;
tempy1l += s_tempy1[offset]*fac1;
tempz1l += s_tempz1[offset]*fac1;
-
+
fac2 = d_hprimewgll_yy[J*NGLLX+l];
offset = K*NGLL2+l*NGLLX+I;
tempx2l += s_tempx2[offset]*fac2;
@@ -1007,12 +1021,12 @@
d_accel[iglob + NGLOB] = tex1Dfetch(tex_accel, iglob + NGLOB) - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
d_accel[iglob + 2*NGLOB] = tex1Dfetch(tex_accel, iglob + 2*NGLOB) - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
#else
- /* OLD/To be implemented version that uses coloring to get around race condition. About 1.6x faster */
+ /* OLD/To be implemented version that uses coloring to get around race condition. About 1.6x faster */
// d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
// d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
- // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+ // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
- if(iglob*3+2 == 41153) {
+ //if(iglob*3+2 == 41153) {
// int ot = d_debug[5];
// d_debug[0+1+ot] = d_accel[iglob*3+2];
// // d_debug[1+1+ot] = fac1*tempz1l;
@@ -1024,12 +1038,12 @@
// d_debug[4+1+ot] = d_accel[iglob*3+2]-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
// atomicAdd(&d_debug[0],1.0);
// d_debug[6+ot] = d_displ[iglob*3+2];
- }
-
- atomicAdd(&d_accel[iglob*3],-(fac1*tempx1l + fac2*tempx2l + fac3*tempx3l));
+ //}
+
+ atomicAdd(&d_accel[iglob*3],-(fac1*tempx1l + fac2*tempx2l + fac3*tempx3l));
atomicAdd(&d_accel[iglob*3+1],-(fac1*tempy1l + fac2*tempy2l + fac3*tempy3l));
atomicAdd(&d_accel[iglob*3+2],-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l));
-
+
#endif
// update memory variables based upon the Runge-Kutta scheme
@@ -1045,9 +1059,9 @@
// save deviatoric strain for Runge-Kutta scheme
if( COMPUTE_AND_STORE_STRAIN ){
int ijk_ispec = tx + working_element*125;
-
+
// fortran: epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
- epsilondev_xx[ijk_ispec] = epsilondev_xx_loc;
+ epsilondev_xx[ijk_ispec] = epsilondev_xx_loc;
epsilondev_yy[ijk_ispec] = epsilondev_yy_loc;
epsilondev_xy[ijk_ispec] = epsilondev_xy_loc;
epsilondev_xz[ijk_ispec] = epsilondev_xz_loc;
@@ -1069,88 +1083,89 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void kernel_3_cuda_device(real* veloc,
- real* accel, int size,
- real deltatover2, real* rmass) {
+__global__ void kernel_3_cuda_device(realw* veloc,
+ realw* accel, int size,
+ realw deltatover2,
+ realw* rmass) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
+
/* because of block and grid sizing problems, there is a small */
/* amount of buffer at the end of the calculation */
if(id < size) {
- accel[3*id] = accel[3*id]*rmass[id];
- accel[3*id+1] = accel[3*id+1]*rmass[id];
+ accel[3*id] = accel[3*id]*rmass[id];
+ accel[3*id+1] = accel[3*id+1]*rmass[id];
accel[3*id+2] = accel[3*id+2]*rmass[id];
-
+
veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
- veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
+ veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
}
}
/* ----------------------------------------------------------------------------------------------- */
-__global__ void kernel_3_accel_cuda_device(real* accel,
+__global__ void kernel_3_accel_cuda_device(realw* accel,
int size,
- real* rmass) {
+ realw* rmass) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
+
/* because of block and grid sizing problems, there is a small */
/* amount of buffer at the end of the calculation */
if(id < size) {
- accel[3*id] = accel[3*id]*rmass[id];
- accel[3*id+1] = accel[3*id+1]*rmass[id];
- accel[3*id+2] = accel[3*id+2]*rmass[id];
+ accel[3*id] = accel[3*id]*rmass[id];
+ accel[3*id+1] = accel[3*id+1]*rmass[id];
+ accel[3*id+2] = accel[3*id+2]*rmass[id];
}
}
/* ----------------------------------------------------------------------------------------------- */
-__global__ void kernel_3_veloc_cuda_device(real* veloc,
- real* accel,
+__global__ void kernel_3_veloc_cuda_device(realw* veloc,
+ realw* accel,
int size,
- real deltatover2) {
+ realw deltatover2) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
+
/* because of block and grid sizing problems, there is a small */
/* amount of buffer at the end of the calculation */
if(id < size) {
veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
- veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
+ veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
}
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(kernel_3_a_cuda,
KERNEL_3_A_CUDA)(long* Mesh_pointer,
- int* size_F,
- float* deltatover2_F,
- int* SIMULATION_TYPE_f,
+ int* size_F,
+ float* deltatover2_F,
+ int* SIMULATION_TYPE_f,
float* b_deltatover2_F,
int* OCEANS) {
-TRACE("kernel_3_a_cuda");
+TRACE("kernel_3_a_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
int size = *size_F;
int SIMULATION_TYPE = *SIMULATION_TYPE_f;
- real deltatover2 = *deltatover2_F;
- real b_deltatover2 = *b_deltatover2_F;
-
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+
int blocksize=128;
int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
-
+
int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
// check whether we can update accel and veloc, or only accel at this point
if( *OCEANS == 0 ){
// updates both, accel and veloc
@@ -1160,14 +1175,14 @@
kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc, mp->d_b_accel, size, b_deltatover2,mp->d_rmass);
}
}else{
- // updates only accel
+ // updates only accel
kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel, size, mp->d_rmass);
-
+
if(SIMULATION_TYPE == 3) {
kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_b_accel, size, mp->d_rmass);
- }
+ }
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
exit_on_cuda_error("after kernel 3 a");
@@ -1176,21 +1191,21 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(kernel_3_b_cuda,
KERNEL_3_B_CUDA)(long* Mesh_pointer,
- int* size_F,
- float* deltatover2_F,
- int* SIMULATION_TYPE_f,
+ int* size_F,
+ float* deltatover2_F,
+ int* SIMULATION_TYPE_f,
float* b_deltatover2_F) {
- TRACE("kernel_3_b_cuda");
-
+ TRACE("kernel_3_b_cuda");
+
Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
int size = *size_F;
int SIMULATION_TYPE = *SIMULATION_TYPE_f;
- real deltatover2 = *deltatover2_F;
- real b_deltatover2 = *b_deltatover2_F;
-
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+
int blocksize=128;
int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
@@ -1203,14 +1218,14 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
// updates only veloc at this point
kernel_3_veloc_cuda_device<<< grid, threads>>>(mp->d_veloc,mp->d_accel,size,deltatover2);
-
+
if(SIMULATION_TYPE == 3) {
kernel_3_veloc_cuda_device<<< grid, threads>>>(mp->d_b_veloc,mp->d_b_accel,size,b_deltatover2);
- }
-
+ }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
exit_on_cuda_error("after kernel 3 b");
@@ -1220,24 +1235,24 @@
/* ----------------------------------------------------------------------------------------------- */
-/* note:
+/* note:
constant arrays when used in compute_forces_acoustic_cuda.cu routines stay zero,
constant declaration and cudaMemcpyToSymbol would have to be in the same file...
-
+
extern keyword doesn't work for __constant__ declarations.
-
+
also:
cudaMemcpyToSymbol("deviceCaseParams", caseParams, sizeof(CaseParams));
..
and compile with -arch=sm_20
-
+
see also: http://stackoverflow.com/questions/4008031/how-to-use-cuda-constant-memory-in-a-programmer-pleasant-way
doesn't seem to work.
-
+
we could keep arrays separated for acoustic and elastic routines...
-
+
for now, we store pointers with cudaGetSymbolAddress() function calls.
-
+
*/
@@ -1245,7 +1260,7 @@
void setConst_hprime_xx(float* array,Mesh* mp)
{
-
+
cudaError_t err = cudaMemcpyToSymbol(d_hprime_xx, array, NGLL2*sizeof(float));
if (err != cudaSuccess)
{
@@ -1253,17 +1268,17 @@
fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprime_xx),"d_hprime_xx");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprime_xx: %s\n", cudaGetErrorString(err));
exit(1);
- }
+ }
}
void setConst_hprime_yy(float* array,Mesh* mp)
{
-
+
cudaError_t err = cudaMemcpyToSymbol(d_hprime_yy, array, NGLL2*sizeof(float));
if (err != cudaSuccess)
{
@@ -1271,17 +1286,17 @@
fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprime_yy),"d_hprime_yy");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprime_yy: %s\n", cudaGetErrorString(err));
exit(1);
- }
+ }
}
void setConst_hprime_zz(float* array,Mesh* mp)
{
-
+
cudaError_t err = cudaMemcpyToSymbol(d_hprime_zz, array, NGLL2*sizeof(float));
if (err != cudaSuccess)
{
@@ -1289,12 +1304,12 @@
fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprime_zz),"d_hprime_zz");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprime_zz: %s\n", cudaGetErrorString(err));
exit(1);
- }
+ }
}
@@ -1306,12 +1321,12 @@
fprintf(stderr, "Error in setConst_hprimewgll_xx: %s\n", cudaGetErrorString(err));
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_xx),"d_hprimewgll_xx");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprimewgll_xx: %s\n", cudaGetErrorString(err));
exit(1);
- }
+ }
}
void setConst_hprimewgll_yy(float* array,Mesh* mp)
@@ -1322,12 +1337,12 @@
fprintf(stderr, "Error in setConst_hprimewgll_yy: %s\n", cudaGetErrorString(err));
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_yy),"d_hprimewgll_yy");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprimewgll_yy: %s\n", cudaGetErrorString(err));
exit(1);
- }
+ }
}
void setConst_hprimewgll_zz(float* array,Mesh* mp)
@@ -1338,12 +1353,12 @@
fprintf(stderr, "Error in setConst_hprimewgll_zz: %s\n", cudaGetErrorString(err));
exit(1);
}
-
+
err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_zz),"d_hprimewgll_zz");
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_hprimewgll_zz: %s\n", cudaGetErrorString(err));
exit(1);
- }
+ }
}
void setConst_wgllwgll_xy(float* array,Mesh* mp)
@@ -1359,8 +1374,8 @@
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_wgllwgll_xy: %s\n", cudaGetErrorString(err));
exit(1);
- }
-
+ }
+
}
void setConst_wgllwgll_xz(float* array,Mesh* mp)
@@ -1376,8 +1391,8 @@
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_wgllwgll_xz: %s\n", cudaGetErrorString(err));
exit(1);
- }
-
+ }
+
}
void setConst_wgllwgll_yz(float* array,Mesh* mp)
@@ -1393,21 +1408,21 @@
if(err != cudaSuccess) {
fprintf(stderr, "Error with d_wgllwgll_yz: %s\n", cudaGetErrorString(err));
exit(1);
- }
-
+ }
+
}
/* ----------------------------------------------------------------------------------------------- */
-/* KERNEL for ocean load on free surface */
+/* OCEANS load on free surface */
/* ----------------------------------------------------------------------------------------------- */
-__global__ void elastic_ocean_load_cuda_kernel(float* accel,
+__global__ void elastic_ocean_load_cuda_kernel(float* accel,
float* rmass,
- float* rmass_ocean_load,
+ float* rmass_ocean_load,
int num_free_surface_faces,
int* free_surface_ispec,
int* free_surface_ijk,
@@ -1416,109 +1431,118 @@
int* updated_dof_ocean_load) {
// gets spectral element face id
int igll = threadIdx.x ; // threadIdx.y*blockDim.x will be always = 0 for thread block (25,1,1)
- int iface = blockIdx.x + gridDim.x*blockIdx.y;
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
realw nx,ny,nz;
realw force_normal_comp,additional_term;
-
+
// for all faces on free surface
if( iface < num_free_surface_faces ){
-
+
int ispec = free_surface_ispec[iface]-1;
-
- // gets global point index
+
+ // gets global point index
int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface)
int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-
+
int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
-
+
+ //if(igll == 0 ) printf("igll %d %d %d %d\n",igll,i,j,k,iglob);
+
// only update this global point once
// daniel: todo - workaround to not use the temporary update array
// atomicExch returns the old value, i.e. 0 indicates that we still have to do this point
+
+
+ //if(igll == 0 ) printf("updated_dof %d %d\n",iglob,updated_dof_ocean_load[iglob]);
+
if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){
-
+
// get normal
nx = free_surface_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; //(1,igll,iface)
ny = free_surface_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
- nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
-
+ nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
+
// make updated component of right-hand side
// we divide by rmass() which is 1 / M
// we use the total force which includes the Coriolis term above
force_normal_comp = ( accel[iglob*3]*nx + accel[iglob*3+1]*ny + accel[iglob*3+2]*nz ) / rmass[iglob];
-
+
additional_term = (rmass_ocean_load[iglob] - rmass[iglob]) * force_normal_comp;
-
+
// daniel: probably wouldn't need atomicAdd anymore, but just to be sure...
atomicAdd(&accel[iglob*3], + additional_term * nx);
atomicAdd(&accel[iglob*3+1], + additional_term * ny);
- atomicAdd(&accel[iglob*3+2], + additional_term * nz);
+ atomicAdd(&accel[iglob*3+2], + additional_term * nz);
}
- }
+ }
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
void FC_FUNC_(elastic_ocean_load_cuda,
- ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f,
+ ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f,
int* SIMULATION_TYPE) {
-
+
TRACE("elastic_ocean_load_cuda");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
// checks if anything to do
if( mp->num_free_surface_faces == 0 ) return;
-
+
// block sizes: exact blocksize to match NGLLSQUARE
int blocksize = 25;
-
+
int num_blocks_x = mp->num_free_surface_faces;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- // temporary global array: used to synchronize updates on global accel array
- int* d_updated_dof_ocean_load;
- print_CUDA_error_if_any(cudaMalloc((void**)&(d_updated_dof_ocean_load),sizeof(int)*mp->NGLOB_AB),88501);
- // initializes array
- cudaMemset((void*)d_updated_dof_ocean_load,0,sizeof(int)*mp->NGLOB_AB);
-
- elastic_ocean_load_cuda_kernel<<<grid,threads>>>(mp->d_accel,
- mp->d_rmass,
+
+ // initializes temporary array to zero
+ print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
+ sizeof(int)*mp->NGLOB_AB),88501);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("before kernel elastic_ocean_load_cuda");
+#endif
+
+ elastic_ocean_load_cuda_kernel<<<grid,threads>>>(mp->d_accel,
+ mp->d_rmass,
mp->d_rmass_ocean_load,
mp->num_free_surface_faces,
- mp->d_free_surface_ispec,
+ mp->d_free_surface_ispec,
mp->d_free_surface_ijk,
mp->d_free_surface_normal,
mp->d_ibool,
- d_updated_dof_ocean_load);
+ mp->d_updated_dof_ocean_load);
// for backward/reconstructed potentials
if(*SIMULATION_TYPE == 3) {
// re-initializes array
- cudaMemset(d_updated_dof_ocean_load,0,sizeof(int)*mp->NGLOB_AB);
+ print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
+ sizeof(int)*mp->NGLOB_AB),88502);
- elastic_ocean_load_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
- mp->d_rmass,
+ elastic_ocean_load_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
+ mp->d_rmass,
mp->d_rmass_ocean_load,
mp->num_free_surface_faces,
- mp->d_free_surface_ispec,
+ mp->d_free_surface_ispec,
mp->d_free_surface_ijk,
mp->d_free_surface_normal,
mp->d_ibool,
- d_updated_dof_ocean_load);
-
+ mp->d_updated_dof_ocean_load);
+
}
-
- cudaFree(d_updated_dof_ocean_load);
-
+
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("enforce_free_surface_cuda");
-#endif
+ exit_on_cuda_error("elastic_ocean_load_cuda");
+#endif
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -45,20 +45,21 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_kernels_cudakernel(int* ispec_is_elastic, int* ibool,
+__global__ void compute_kernels_cudakernel(int* ispec_is_elastic,
+ int* ibool,
float* accel,
float* b_displ,
- float* epsilondev_xx,
- float* epsilondev_yy,
- float* epsilondev_xy,
- float* epsilondev_xz,
- float* epsilondev_yz,
+ float* epsilondev_xx,
+ float* epsilondev_yy,
+ float* epsilondev_xy,
+ float* epsilondev_xz,
+ float* epsilondev_yz,
float* b_epsilondev_xx,
float* b_epsilondev_yy,
float* b_epsilondev_xy,
float* b_epsilondev_xz,
float* b_epsilondev_yz,
- float* rho_kl,
+ float* rho_kl,
float deltat,
float* mu_kl,
float* kappa_kl,
@@ -70,33 +71,33 @@
int ispec = blockIdx.x + blockIdx.y*gridDim.x;
// handles case when there is 1 extra block (due to rectangular grid)
- if(ispec < NSPEC_AB) {
+ if(ispec < NSPEC_AB) {
// elastic elements only
- if(ispec_is_elastic[ispec] == 1) {
-
+ if( ispec_is_elastic[ispec] ) {
+
int ijk = threadIdx.x;
int ijk_ispec = ijk + 125*ispec;
int iglob = ibool[ijk_ispec] - 1 ;
- // debug
+ // debug
// if(ijk_ispec == 9480531) {
-// d_debug[0] = rho_kl[ijk_ispec];
-// d_debug[1] = accel[3*iglob];
-// d_debug[2] = b_displ[3*iglob];
+// d_debug[0] = rho_kl[ijk_ispec];
+// d_debug[1] = accel[3*iglob];
+// d_debug[2] = b_displ[3*iglob];
// d_debug[3] = deltat * (accel[3*iglob]*b_displ[3*iglob]+
// accel[3*iglob+1]*b_displ[3*iglob+1]+
// accel[3*iglob+2]*b_displ[3*iglob+2]);
// }
-
-
- // isotropic kernels:
+
+
+ // isotropic kernels:
// density kernel
rho_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_displ[3*iglob]+
accel[3*iglob+1]*b_displ[3*iglob+1]+
accel[3*iglob+2]*b_displ[3*iglob+2]);
-
+
// debug
// if(rho_kl[ijk_ispec] < 1.9983e+18) {
// atomicAdd(&d_debug[3],1.0);
@@ -105,7 +106,7 @@
// d_debug[1] = accel[3*iglob];
// d_debug[2] = b_displ[3*iglob];
// }
-
+
// shear modulus kernel
mu_kl[ijk_ispec] += deltat * (epsilondev_xx[ijk_ispec]*b_epsilondev_xx[ijk_ispec]+ // 1*b1
epsilondev_yy[ijk_ispec]*b_epsilondev_yy[ijk_ispec]+ // 2*b2
@@ -114,37 +115,38 @@
2*(epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+
epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+
epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec]));
-
+
// bulk modulus kernel
kappa_kl[ijk_ispec] += deltat*(9*epsilon_trace_over_3[ijk_ispec]*
b_epsilon_trace_over_3[ijk_ispec]);
-
+
}
}
}
-
-/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
void FC_FUNC_(compute_kernels_elastic_cuda,
COMPUTE_KERNELS_ELASTIC_CUDA)(long* Mesh_pointer,
- float* deltat) {
+ float* deltat_f) {
TRACE("compute_kernels_elastic_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
int blocksize = 125; // NGLLX*NGLLY*NGLLZ
-
+ float deltat = *deltat_f;
+
int num_blocks_x = mp->NSPEC_AB;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
float* d_debug;
/*
float* h_debug;
@@ -152,7 +154,7 @@
cudaMalloc((void**)&d_debug,128*sizeof(float));
cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
*/
-
+
compute_kernels_cudakernel<<<grid,threads>>>(mp->d_ispec_is_elastic,mp->d_ibool,
mp->d_accel, mp->d_b_displ,
mp->d_epsilondev_xx,
@@ -166,7 +168,7 @@
mp->d_b_epsilondev_xz,
mp->d_b_epsilondev_yz,
mp->d_rho_kl,
- *deltat,
+ deltat,
mp->d_mu_kl,
mp->d_kappa_kl,
mp->d_epsilon_trace_over_3,
@@ -193,9 +195,9 @@
// number_big_values++;
// }
// }
-
+
// printf("maval rho = %e, number>1e10 = %d vs. %d\n",maxval,number_big_values,mp->NSPEC_AB*125);
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("compute_kernels_elastic_cuda");
#endif
@@ -209,33 +211,33 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_kernels_strength_noise_cuda_kernel(float* displ,
+__global__ void compute_kernels_strength_noise_cuda_kernel(float* displ,
int* free_surface_ispec,
int* free_surface_ijk,
- int* ibool,
- float* noise_surface_movie,
- float* normal_x_noise,
- float* normal_y_noise,
- float* normal_z_noise,
- float* Sigma_kl,
+ int* ibool,
+ float* noise_surface_movie,
+ float* normal_x_noise,
+ float* normal_y_noise,
+ float* normal_z_noise,
+ float* Sigma_kl,
float deltat,
- int num_free_surface_faces,
+ int num_free_surface_faces,
float* d_debug) {
int iface = blockIdx.x + blockIdx.y*gridDim.x;
-
+
if(iface < num_free_surface_faces) {
int ispec = free_surface_ispec[iface]-1;
- int igll = threadIdx.x;
+ int igll = threadIdx.x;
int ipoin = igll + 25*iface;
int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1 ;
int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-
+
int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1 ;
-
+
float eta = ( noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)]*normal_x_noise[ipoin]+
- noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)]*normal_y_noise[ipoin]+
+ noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)]*normal_y_noise[ipoin]+
noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)]*normal_z_noise[ipoin]);
// if(ijk_ispec == 78496) {
@@ -244,29 +246,29 @@
// d_debug[2] = normal_x_noise[ipoin];
// d_debug[3] = normal_y_noise[ipoin];
// d_debug[4] = normal_z_noise[ipoin];
- // d_debug[5] = displ[3*iglob+2];
+ // d_debug[5] = displ[3*iglob+2];
// d_debug[6] = deltat*eta*normal_z_noise[ipoin]*displ[2+3*iglob];
// d_debug[7] = 0.008*1.000000e-24*normal_z_noise[ipoin]*3.740546e-13;
// }
-
+
Sigma_kl[INDEX4(5,5,5,i,j,k,ispec)] += deltat*eta*(normal_x_noise[ipoin]*displ[3*iglob]+
normal_y_noise[ipoin]*displ[1+3*iglob]+
normal_z_noise[ipoin]*displ[2+3*iglob]);
}
-
+
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(compute_kernels_strength_noise_cuda,
- COMPUTE_KERNELS_STRENGTH_NOISE_CUDA)(long* Mesh_pointer,
+extern "C"
+void FC_FUNC_(compute_kernels_strgth_noise_cu,
+ COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
float* h_noise_surface_movie,
int* num_free_surface_faces_f,
float* deltat) {
-TRACE("compute_kernels_strength_noise_cuda");
-
+TRACE("compute_kernels_strgth_noise_cu");
+
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
int num_free_surface_faces = *num_free_surface_faces_f;
@@ -287,7 +289,7 @@
float* d_debug;
// cudaMalloc((void**)&d_debug,128*sizeof(float));
// cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-
+
compute_kernels_strength_noise_cuda_kernel<<<grid,threads>>>(mp->d_displ,
mp->d_free_surface_ispec,
mp->d_free_surface_ijk,
@@ -304,11 +306,11 @@
// for(int i=0;i<8;i++) {
// printf("debug[%d]= %e\n",i,h_debug[i]);
// }
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("compute_kernels_strength_noise_cuda_kernel");
#endif
-
+
}
@@ -327,30 +329,30 @@
float* hprime_xx,
float* hprime_yy,
float* hprime_zz,
- float* d_xix,
- float* d_xiy,
- float* d_xiz,
- float* d_etax,
- float* d_etay,
- float* d_etaz,
- float* d_gammax,
- float* d_gammay,
+ float* d_xix,
+ float* d_xiy,
+ float* d_xiz,
+ float* d_etax,
+ float* d_etay,
+ float* d_etaz,
+ float* d_gammax,
+ float* d_gammay,
float* d_gammaz,
float rhol) {
-
+
float temp1l,temp2l,temp3l;
float hp1,hp2,hp3;
float xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl;
float rho_invl;
int l,offset,offset1,offset2,offset3;
-
+
//const int NGLLX = 5;
const int NGLL3_ALIGN = 128;
-
+
int K = (ijk/NGLL2);
int J = ((ijk-K*NGLL2)/NGLLX);
int I = (ijk-K*NGLL2-J*NGLLX);
-
+
// derivative along x
temp1l = 0.f;
for( l=0; l<NGLLX;l++){
@@ -358,7 +360,7 @@
offset1 = K*NGLL2+J*NGLLX+l;
temp1l += scalar_field[offset1]*hp1;
}
-
+
// derivative along y
temp2l = 0.f;
for( l=0; l<NGLLX;l++){
@@ -366,18 +368,18 @@
offset2 = K*NGLL2+l*NGLLX+I;
temp2l += scalar_field[offset2]*hp2;
}
-
- // derivative along z
+
+ // derivative along z
temp3l = 0.f;
for( l=0; l<NGLLX;l++){
hp3 = hprime_zz[l*NGLLX+K];
offset3 = l*NGLL2+J*NGLLX+I;
temp3l += scalar_field[offset3]*hp3;
-
+
}
-
+
offset = ispec*NGLL3_ALIGN + ijk;
-
+
xixl = d_xix[offset];
xiyl = d_xiy[offset];
xizl = d_xiz[offset];
@@ -387,149 +389,326 @@
gammaxl = d_gammax[offset];
gammayl = d_gammay[offset];
gammazl = d_gammaz[offset];
-
+
rho_invl = 1.0f / rhol;
-
+
// derivatives of acoustic scalar potential field on GLL points
vector_field_element[0] = (temp1l*xixl + temp2l*etaxl + temp3l*gammaxl) * rho_invl;
vector_field_element[1] = (temp1l*xiyl + temp2l*etayl + temp3l*gammayl) * rho_invl;
- vector_field_element[2] = (temp1l*xizl + temp2l*etazl + temp3l*gammazl) * rho_invl;
-
+ vector_field_element[2] = (temp1l*xizl + temp2l*etazl + temp3l*gammazl) * rho_invl;
+
}
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_kernels_acoustic_kernel(int* ispec_is_acoustic,
+__global__ void compute_kernels_acoustic_kernel(int* ispec_is_acoustic,
int* ibool,
float* rhostore,
float* kappastore,
float* hprime_xx,
float* hprime_yy,
float* hprime_zz,
- float* d_xix,
- float* d_xiy,
- float* d_xiz,
- float* d_etax,
- float* d_etay,
- float* d_etaz,
- float* d_gammax,
- float* d_gammay,
- float* d_gammaz,
+ float* d_xix,
+ float* d_xiy,
+ float* d_xiz,
+ float* d_etax,
+ float* d_etay,
+ float* d_etaz,
+ float* d_gammax,
+ float* d_gammay,
+ float* d_gammaz,
float* potential_dot_dot_acoustic,
float* b_potential_acoustic,
float* b_potential_dot_dot_acoustic,
- float* rho_ac_kl,
+ float* rho_ac_kl,
float* kappa_ac_kl,
float deltat,
int NSPEC_AB) {
-
+
int ispec = blockIdx.x + blockIdx.y*gridDim.x;
- // handles case when there is 1 extra block (due to rectangular grid)
+ // handles case when there is 1 extra block (due to rectangular grid)
if( ispec < NSPEC_AB ){
-
+
// acoustic elements only
- if( ispec_is_acoustic[ispec] == 1) {
-
+ if( ispec_is_acoustic[ispec] ) {
+
int ijk = threadIdx.x;
-
+
// local and global indices
int ijk_ispec = ijk + 125*ispec;
int ijk_ispec_padded = ijk + 128*ispec;
int iglob = ibool[ijk_ispec] - 1;
-
+
float accel_elm[3];
float b_displ_elm[3];
float rhol,kappal;
-
+
// shared memory between all threads within this block
- __shared__ float scalar_field_displ[125];
- __shared__ float scalar_field_accel[125];
-
+ __shared__ float scalar_field_displ[125];
+ __shared__ float scalar_field_accel[125];
+
// copy field values
scalar_field_displ[ijk] = b_potential_acoustic[iglob];
scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob];
__syncthreads();
-
+
// gets material parameter
rhol = rhostore[ijk_ispec_padded];
-
+
// displacement vector from backward field
compute_gradient_kernel(ijk,ispec,scalar_field_displ,b_displ_elm,
hprime_xx,hprime_yy,hprime_zz,
d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
rhol);
-
+
// acceleration vector
compute_gradient_kernel(ijk,ispec,scalar_field_accel,accel_elm,
hprime_xx,hprime_yy,hprime_zz,
d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
rhol);
-
+
// density kernel
rho_ac_kl[ijk_ispec] -= deltat * rhol * (accel_elm[0]*b_displ_elm[0] +
accel_elm[1]*b_displ_elm[1] +
accel_elm[2]*b_displ_elm[2]);
-
+
// bulk modulus kernel
kappal = kappastore[ijk_ispec];
- kappa_ac_kl[ijk_ispec] -= deltat / kappal * potential_dot_dot_acoustic[iglob]
- * b_potential_dot_dot_acoustic[iglob];
+ kappa_ac_kl[ijk_ispec] -= deltat / kappal * potential_dot_dot_acoustic[iglob]
+ * b_potential_dot_dot_acoustic[iglob];
}
- }
+ }
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(compute_kernels_acoustic_cuda,
COMPUTE_KERNELS_ACOUSTIC_CUDA)(
- long* Mesh_pointer,
- float* deltat) {
-
+ long* Mesh_pointer,
+ float* deltat_f) {
+
TRACE("compute_kernels_acoustic_cuda");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-
+
int blocksize = 125; // NGLLX*NGLLY*NGLLZ
+ float deltat = *deltat_f;
+
int num_blocks_x = mp->NSPEC_AB;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
compute_kernels_acoustic_kernel<<<grid,threads>>>(mp->d_ispec_is_acoustic,
mp->d_ibool,
mp->d_rhostore,
mp->d_kappastore,
mp->d_hprime_xx,
mp->d_hprime_yy,
- mp->d_hprime_zz,
- mp->d_xix,
- mp->d_xiy,
+ mp->d_hprime_zz,
+ mp->d_xix,
+ mp->d_xiy,
mp->d_xiz,
- mp->d_etax,
- mp->d_etay,
+ mp->d_etax,
+ mp->d_etay,
mp->d_etaz,
- mp->d_gammax,
- mp->d_gammay,
- mp->d_gammaz,
- mp->d_potential_dot_dot_acoustic,
+ mp->d_gammax,
+ mp->d_gammay,
+ mp->d_gammaz,
+ mp->d_potential_dot_dot_acoustic,
mp->d_b_potential_acoustic,
mp->d_b_potential_dot_dot_acoustic,
mp->d_rho_ac_kl,
mp->d_kappa_ac_kl,
- *deltat,
+ deltat,
mp->NSPEC_AB);
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("compute_kernels_acoustic_kernel");
#endif
}
+/* ----------------------------------------------------------------------------------------------- */
+
+// preconditioner (approximate Hessian kernel)
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_kernels_hess_el_cudakernel(int* ispec_is_elastic,
+ int* ibool,
+ float* accel,
+ float* b_accel,
+ float* hess_kl,
+ float deltat,
+ int NSPEC_AB) {
+
+ int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+
+ // handles case when there is 1 extra block (due to rectangular grid)
+ if(ispec < NSPEC_AB) {
+
+ // elastic elements only
+ if( ispec_is_elastic[ispec] ) {
+
+ int ijk = threadIdx.x;
+ int ijk_ispec = ijk + 125*ispec;
+ int iglob = ibool[ijk_ispec] - 1 ;
+
+ // approximate hessian
+ hess_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_accel[3*iglob]+
+ accel[3*iglob+1]*b_accel[3*iglob+1]+
+ accel[3*iglob+2]*b_accel[3*iglob+2]);
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_kernels_hess_ac_cudakernel(int* ispec_is_acoustic,
+ int* ibool,
+ float* potential_dot_dot_acoustic,
+ float* b_potential_dot_dot_acoustic,
+ float* rhostore,
+ float* hprime_xx,
+ float* hprime_yy,
+ float* hprime_zz,
+ float* d_xix,
+ float* d_xiy,
+ float* d_xiz,
+ float* d_etax,
+ float* d_etay,
+ float* d_etaz,
+ float* d_gammax,
+ float* d_gammay,
+ float* d_gammaz,
+ float* hess_kl,
+ float deltat,
+ int NSPEC_AB) {
+
+ int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+
+ // handles case when there is 1 extra block (due to rectangular grid)
+ if(ispec < NSPEC_AB) {
+
+ // acoustic elements only
+ if( ispec_is_acoustic[ispec] ){
+
+ // local and global indices
+ int ijk = threadIdx.x;
+ int ijk_ispec = ijk + 125*ispec;
+ int iglob = ibool[ijk_ispec] - 1 ;
+
+ int ijk_ispec_padded = ijk + 128*ispec;
+
+ float accel_elm[3];
+ float b_accel_elm[3];
+ float rhol;
+
+ // shared memory between all threads within this block
+ __shared__ float scalar_field_accel[125];
+ __shared__ float scalar_field_b_accel[125];
+
+ // copy field values
+ scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob];
+ scalar_field_b_accel[ijk] = b_potential_dot_dot_acoustic[iglob];
+ __syncthreads();
+
+ // gets material parameter
+ rhol = rhostore[ijk_ispec_padded];
+
+ // acceleration vector
+ compute_gradient_kernel(ijk,ispec,
+ scalar_field_accel,accel_elm,
+ hprime_xx,hprime_yy,hprime_zz,
+ d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
+ rhol);
+
+ // acceleration vector from backward field
+ compute_gradient_kernel(ijk,ispec,
+ scalar_field_b_accel,b_accel_elm,
+ hprime_xx,hprime_yy,hprime_zz,
+ d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
+ rhol);
+ // approximates hessian
+ hess_kl[ijk_ispec] += deltat * (accel_elm[0]*b_accel_elm[0] +
+ accel_elm[1]*b_accel_elm[1] +
+ accel_elm[2]*b_accel_elm[2]);
+
+ } // ispec_is_acoustic
+
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_kernels_hess_cuda,
+ COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
+ float* deltat_f,
+ int* ELASTIC_SIMULATION,
+ int* ACOUSTIC_SIMULATION) {
+ TRACE("compute_kernels_hess_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ int blocksize = 125; // NGLLX*NGLLY*NGLLZ
+ float deltat = *deltat_f;
+
+ int num_blocks_x = mp->NSPEC_AB;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ if( *ELASTIC_SIMULATION ) {
+ compute_kernels_hess_el_cudakernel<<<grid,threads>>>(mp->d_ispec_is_elastic,
+ mp->d_ibool,
+ mp->d_accel,
+ mp->d_b_accel,
+ mp->d_hess_el_kl,
+ deltat,
+ mp->NSPEC_AB);
+ }
+
+ if( *ACOUSTIC_SIMULATION ) {
+ compute_kernels_hess_ac_cudakernel<<<grid,threads>>>(mp->d_ispec_is_acoustic,
+ mp->d_ibool,
+ mp->d_potential_dot_dot_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ mp->d_rhostore,
+ mp->d_hprime_xx,
+ mp->d_hprime_yy,
+ mp->d_hprime_zz,
+ mp->d_xix,
+ mp->d_xiy,
+ mp->d_xiz,
+ mp->d_etax,
+ mp->d_etay,
+ mp->d_etaz,
+ mp->d_gammax,
+ mp->d_gammay,
+ mp->d_gammaz,
+ mp->d_hess_ac_kl,
+ deltat,
+ mp->NSPEC_AB);
+ }
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_kernels_hess_cuda");
+#endif
+}
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -40,72 +40,71 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_stacey_acoustic_kernel(float* potential_dot_acoustic,
- float* potential_dot_dot_acoustic,
+__global__ void compute_stacey_acoustic_kernel(float* potential_dot_acoustic,
+ float* potential_dot_dot_acoustic,
int* abs_boundary_ispec,
- int* abs_boundary_ijk,
- real* abs_boundary_jacobian2Dw,
+ int* abs_boundary_ijk,
+ realw* abs_boundary_jacobian2Dw,
int* ibool,
float* rhostore,
float* kappastore,
- int* ispec_is_inner,
+ int* ispec_is_inner,
int* ispec_is_acoustic,
int phase_is_inner,
int SIMULATION_TYPE, int SAVE_FORWARD,
int num_abs_boundary_faces,
float* b_potential_dot_acoustic,
float* b_potential_dot_dot_acoustic,
- float* b_absorb_potential
+ float* b_absorb_potential
) {
- int igll = threadIdx.x;
- int iface = blockIdx.x + gridDim.x*blockIdx.y;
-
+ int igll = threadIdx.x;
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
int i,j,k,iglob,ispec;
realw rhol,kappal,cpl;
realw jacobianw;
-
-
- // don't compute points outside NGLLSQUARE==NGLL2==25
+
+
+ // don't compute points outside NGLLSQUARE==NGLL2==25
// way 2: no further check needed since blocksize = 25
if( iface < num_abs_boundary_faces){
-
- // if(igll<NGLL2 && iface < num_abs_boundary_faces) {
-
+
+ // if(igll<NGLL2 && iface < num_abs_boundary_faces) {
+
// "-1" from index values to convert from Fortran-> C indexing
ispec = abs_boundary_ispec[iface]-1;
-
- if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec]==1) {
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] ) {
+
i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-
+
// determines bulk sound speed
rhol = rhostore[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
cpl = sqrt( kappal / rhol );
-
- // gets associated, weighted jacobian
- jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+ // gets associated, weighted jacobian
+ jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
//daniel
//if( igll == 0 ) printf("gpu: %i %i %i %i %i %e %e %e\n",i,j,k,ispec,iglob,rhol,kappal,jacobianw);
-
+
// Sommerfeld condition
- atomicAdd(&potential_dot_dot_acoustic[iglob],-potential_dot_acoustic[iglob]*jacobianw/cpl/rhol);
-
+ atomicAdd(&potential_dot_dot_acoustic[iglob],-potential_dot_acoustic[iglob]*jacobianw/cpl/rhol);
+
// adjoint simulations
if( SIMULATION_TYPE == 3 ){
// Sommerfeld condition
atomicAdd(&b_potential_dot_dot_acoustic[iglob],-b_absorb_potential[INDEX2(NGLL2,igll,iface)]);
- }else if( SIMULATION_TYPE == 1 && SAVE_FORWARD == 1 ){
- b_absorb_potential[INDEX2(NGLL2,igll,iface)] = potential_dot_acoustic[iglob]*jacobianw/cpl/rhol;
+ }else if( SIMULATION_TYPE == 1 && SAVE_FORWARD ){
+ b_absorb_potential[INDEX2(NGLL2,igll,iface)] = potential_dot_acoustic[iglob]*jacobianw/cpl/rhol;
}
-
}
// }
}
@@ -113,12 +112,12 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
+extern "C"
void FC_FUNC_(compute_stacey_acoustic_cuda,
COMPUTE_STACEY_ACOUSTIC_CUDA)(
- long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* SIMULATION_TYPEf,
+ long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* SIMULATION_TYPEf,
int* SAVE_FORWARDf,
float* h_b_absorb_potential) {
TRACE("compute_stacey_acoustic_cuda");
@@ -127,16 +126,16 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
int phase_is_inner = *phase_is_innerf;
int SIMULATION_TYPE = *SIMULATION_TYPEf;
- int SAVE_FORWARD = *SAVE_FORWARDf;
+ int SAVE_FORWARD = *SAVE_FORWARDf;
// way 1: Elapsed time: 4.385948e-03
// > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
- // int blocksize = 32;
-
+ // int blocksize = 32;
+
// way 2: Elapsed time: 4.379034e-03
// > NGLLSQUARE==NGLL2==25, no further check inside kernel
- int blocksize = 25;
-
+ int blocksize = 25;
+
int num_blocks_x = mp->d_num_abs_boundary_faces;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
@@ -148,37 +147,37 @@
dim3 threads(blocksize,1,1);
// adjoint simulations: reads in absorbing boundary
- if (SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0 ){
+ if (SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0 ){
// copies array to GPU
print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,h_b_absorb_potential,
- mp->d_b_reclen_potential,cudaMemcpyHostToDevice),7700);
+ mp->d_b_reclen_potential,cudaMemcpyHostToDevice),7700);
}
-
+
compute_stacey_acoustic_kernel<<<grid,threads>>>(mp->d_potential_dot_acoustic,
mp->d_potential_dot_dot_acoustic,
- mp->d_abs_boundary_ispec,
- mp->d_abs_boundary_ijk,
- mp->d_abs_boundary_jacobian2Dw,
- mp->d_ibool,
- mp->d_rhostore,
- mp->d_kappastore,
- mp->d_ispec_is_inner,
- mp->d_ispec_is_acoustic,
+ mp->d_abs_boundary_ispec,
+ mp->d_abs_boundary_ijk,
+ mp->d_abs_boundary_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_rhostore,
+ mp->d_kappastore,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_is_acoustic,
phase_is_inner,
SIMULATION_TYPE,SAVE_FORWARD,
mp->d_num_abs_boundary_faces,
mp->d_b_potential_dot_acoustic,
mp->d_b_potential_dot_dot_acoustic,
mp->d_b_absorb_potential);
-
+
// adjoint simulations: stores absorbed wavefield part
- if (SIMULATION_TYPE == 1 && SAVE_FORWARD == 1 && mp->d_num_abs_boundary_faces > 0 ){
- // copies array to CPU
+ if (SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ){
+ // copies array to CPU
print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_potential,mp->d_b_absorb_potential,
mp->d_b_reclen_potential,cudaMemcpyDeviceToHost),7701);
}
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("compute_stacey_acoustic_kernel");
#endif
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -40,81 +40,74 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_stacey_elastic_kernel(real* veloc,
- real* accel,
+__global__ void compute_stacey_elastic_kernel(realw* veloc,
+ realw* accel,
int* abs_boundary_ispec,
- int* abs_boundary_ijk,
- real* abs_boundary_normal,
- real* abs_boundary_jacobian2Dw,
+ int* abs_boundary_ijk,
+ realw* abs_boundary_normal,
+ realw* abs_boundary_jacobian2Dw,
int* ibool,
- real* rho_vp,
- real* rho_vs,
- int* ispec_is_inner,
+ realw* rho_vp,
+ realw* rho_vs,
+ int* ispec_is_inner,
int* ispec_is_elastic,
int phase_is_inner,
- int SIMULATION_TYPE,int SAVE_FORWARD,
+ int SIMULATION_TYPE,
+ int SAVE_FORWARD,
int num_abs_boundary_faces,
- real* b_accel,
- real* b_absorb_field,
- float* debug_val,
- int* debug_val_int
+ realw* b_accel,
+ realw* b_absorb_field //,float* debug_val,int* debug_val_int
) {
int igll = threadIdx.x; // tx
int iface = blockIdx.x + gridDim.x*blockIdx.y; // bx
- int i;
- int j;
- int k;
- int iglob;
- int ispec;
+
+ int i,j,k,iglob,ispec;
realw vx,vy,vz,vn;
realw nx,ny,nz;
realw rho_vp_temp,rho_vs_temp;
realw tx,ty,tz;
realw jacobianw;
-
- // don't compute points outside NGLLSQUARE==NGLL2==25
+ // don't compute points outside NGLLSQUARE==NGLL2==25
// way 2: no further check needed since blocksize = 25
if( iface < num_abs_boundary_faces){
-
- //if(igll < NGLL2 && iface < num_abs_boundary_faces) {
-
+
+ //if(igll < NGLL2 && iface < num_abs_boundary_faces) {
+
// "-1" from index values to convert from Fortran-> C indexing
ispec = abs_boundary_ispec[iface]-1;
- i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
- j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
- k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
- iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
-
- if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec]==1) {
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] ) {
+
i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
-
+
// gets associated velocity
-
+
vx = veloc[iglob*3+0];
vy = veloc[iglob*3+1];
vz = veloc[iglob*3+2];
-
+
// gets associated normal
nx = abs_boundary_normal[INDEX3(NDIM,NGLL2,0,igll,iface)];
ny = abs_boundary_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
nz = abs_boundary_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
-
+
// // velocity component in normal direction (normal points out of element)
vn = vx*nx + vy*ny + vz*nz;
+
rho_vp_temp = rho_vp[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
rho_vs_temp = rho_vs[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+
tx = rho_vp_temp*vn*nx + rho_vs_temp*(vx-vn*nx);
ty = rho_vp_temp*vn*ny + rho_vs_temp*(vy-vn*ny);
tz = rho_vp_temp*vn*nz + rho_vs_temp*(vz-vn*nz);
-
- jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
-
+
+ jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
atomicAdd(&accel[iglob*3],-tx*jacobianw);
atomicAdd(&accel[iglob*3+1],-ty*jacobianw);
atomicAdd(&accel[iglob*3+2],-tz*jacobianw);
@@ -128,10 +121,10 @@
b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)] = tx*jacobianw;
b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)] = ty*jacobianw;
b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)] = tz*jacobianw;
- }
-
+ } // SIMULATION_TYPE
}
- }
+ } // num_abs_boundary_faces
+
}
/* ----------------------------------------------------------------------------------------------- */
@@ -139,28 +132,31 @@
extern "C"
void FC_FUNC_(compute_stacey_elastic_cuda,
- COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* SIMULATION_TYPEf,
+ COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* SIMULATION_TYPEf,
int* SAVE_FORWARDf,
float* h_b_absorb_field) {
-TRACE("compute_stacey_elastic_cuda");
-
+TRACE("compute_stacey_elastic_cuda");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- int phase_is_inner = *phase_is_innerf;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
- int SAVE_FORWARD = *SAVE_FORWARDf;
+ // check
+ if( mp->d_num_abs_boundary_faces == 0 ) return;
+ int phase_is_inner = *phase_is_innerf;
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+ int SAVE_FORWARD = *SAVE_FORWARDf;
+
// way 1
// > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
//int blocksize = 32;
-
- // way 2: seems sligthly faster
+
+ // way 2: seems sligthly faster
// > NGLLSQUARE==NGLL2==25, no further check inside kernel
int blocksize = 25;
-
+
int num_blocks_x = mp->d_num_abs_boundary_faces;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
@@ -171,57 +167,56 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- float* d_debug_val;
- int* d_debug_val_int;
+ //float* d_debug_val;
+ //int* d_debug_val_int;
if(SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0) {
// int val = NSTEP-it+1;
- // read_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&val);
+ // read_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&val);
// The read is done in fortran
print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field,h_b_absorb_field,
mp->d_b_reclen_field,cudaMemcpyHostToDevice),7700);
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("between cudamemcpy and compute_stacey_elastic_kernel");
#endif
-
+
compute_stacey_elastic_kernel<<<grid,threads>>>(mp->d_veloc,
mp->d_accel,
- mp->d_abs_boundary_ispec,
- mp->d_abs_boundary_ijk,
- mp->d_abs_boundary_normal,
- mp->d_abs_boundary_jacobian2Dw,
- mp->d_ibool,
- mp->d_rho_vp,
- mp->d_rho_vs,
- mp->d_ispec_is_inner,
- mp->d_ispec_is_elastic,
+ mp->d_abs_boundary_ispec,
+ mp->d_abs_boundary_ijk,
+ mp->d_abs_boundary_normal,
+ mp->d_abs_boundary_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_rho_vp,
+ mp->d_rho_vs,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_is_elastic,
phase_is_inner,
SIMULATION_TYPE,SAVE_FORWARD,
mp->d_num_abs_boundary_faces,
mp->d_b_accel,
- mp->d_b_absorb_field,
- d_debug_val,
- d_debug_val_int);
-
+ mp->d_b_absorb_field //,d_debug_val,d_debug_val_int
+ );
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_stacey_elastic_kernel");
+ exit_on_cuda_error("compute_stacey_elastic_kernel");
#endif
// ! adjoint simulations: stores absorbed wavefield part
// if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) &
// write(IOABS,rec=it) b_reclen_field,b_absorb_field,b_reclen_field
-
- if(SIMULATION_TYPE==1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces>0) {
+
+ if(SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ) {
print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_field,mp->d_b_absorb_field,
mp->d_b_reclen_field,cudaMemcpyDeviceToHost),7701);
// The write is done in fortran
- // write_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&it);
+ // write_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&it);
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after compute_stacey_elastic after cudamemcpy");
+ exit_on_cuda_error("after compute_stacey_elastic after cudamemcpy");
#endif
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -35,8 +35,8 @@
#include "mesh_constants_cuda.h"
-#define CUBLAS_ERROR(s,n) if (s != CUBLAS_STATUS_SUCCESS) { \
-fprintf (stderr, "CUBLAS Memory Write Error @ %d\n",n); \
+#define CUBLAS_ERROR(s,n) if (s != CUBLAS_STATUS_SUCCESS) { \
+fprintf (stderr, "CUBLAS Memory Write Error @ %d\n",n); \
exit(EXIT_FAILURE); }
/* ----------------------------------------------------------------------------------------------- */
@@ -45,15 +45,19 @@
/* ----------------------------------------------------------------------------------------------- */
-
-__global__ void UpdateDispVeloc_kernel(real* displ, real* veloc,
- real* accel, int size,
- real deltat, real deltatsqover2, real deltatover2) {
+
+__global__ void UpdateDispVeloc_kernel(realw* displ,
+ realw* veloc,
+ realw* accel,
+ int size,
+ realw deltat,
+ realw deltatsqover2,
+ realw deltatover2) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
/* because of block and grid sizing problems, there is a small */
/* amount of buffer at the end of the calculation */
- if(id < size) {
+ if(id < size) {
displ[id] = displ[id] + deltat*veloc[id] + deltatsqover2*accel[id];
veloc[id] = veloc[id] + deltatover2*accel[id];
accel[id] = 0; // can do this using memset...not sure if faster
@@ -63,72 +67,76 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(it_update_displacement_scheme_cuda,
- IT_UPDATE_DISPLACMENT_SCHEME_CUDA)(long* Mesh_pointer_f,
- int* size_F,
- float* deltat_F,
- float* deltatsqover2_F,
+void FC_FUNC_(it_update_displacement_cuda,
+ IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f,
+ int* size_F,
+ float* deltat_F,
+ float* deltatsqover2_F,
float* deltatover2_F,
- int* SIMULATION_TYPE,
- float* b_deltat_F,
- float* b_deltatsqover2_F,
+ int* SIMULATION_TYPE,
+ float* b_deltat_F,
+ float* b_deltatsqover2_F,
float* b_deltatover2_F) {
-TRACE("it_update_displacement_scheme_cuda");
+TRACE("it_update_displacement_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-
+
//int i,device;
int size = *size_F;
- real deltat = *deltat_F;
- real deltatsqover2 = *deltatsqover2_F;
- real deltatover2 = *deltatover2_F;
- real b_deltat = *b_deltat_F;
- real b_deltatsqover2 = *b_deltatsqover2_F;
- real b_deltatover2 = *b_deltatover2_F;
+ realw deltat = *deltat_F;
+ realw deltatsqover2 = *deltatsqover2_F;
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltat = *b_deltat_F;
+ realw b_deltatsqover2 = *b_deltatsqover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
//cublasStatus status;
-
+
int blocksize = 128;
- int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
-
- int num_blocks_x = size_padded/blocksize;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
- exit_on_cuda_error("Before UpdateDispVeloc_kernel");
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// exit_on_cuda_error("Before UpdateDispVeloc_kernel");
+//#endif
+
//launch kernel
UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ,mp->d_veloc,mp->d_accel,
- size,deltat,deltatsqover2,deltatover2);
+ size,deltat,deltatsqover2,deltatover2);
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- // sync and check to catch errors from previous async operations
- exit_on_cuda_error("UpdateDispVeloc_kernel");
-#endif
+ //cudaThreadSynchronize();
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+// // sync and check to catch errors from previous async operations
+// exit_on_cuda_error("UpdateDispVeloc_kernel");
+//#endif
-
// kernel for backward fields
if(*SIMULATION_TYPE == 3) {
-
+
UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_b_displ,mp->d_b_veloc,mp->d_b_accel,
- size,b_deltat, b_deltatsqover2, b_deltatover2);
+ size,b_deltat,b_deltatsqover2,b_deltatover2);
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+// exit_on_cuda_error("after SIM_TYPE==3 UpdateDispVeloc_kernel");
+//#endif
+ }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- exit_on_cuda_error("after SIM_TYPE==3 UpdateDispVeloc_kernel");
+ exit_on_cuda_error("it_update_displacement_cuda");
#endif
- }
-
- cudaThreadSynchronize();
}
/* ----------------------------------------------------------------------------------------------- */
@@ -138,84 +146,84 @@
// KERNEL 1
/* ----------------------------------------------------------------------------------------------- */
-__global__ void UpdatePotential_kernel(real* potential_acoustic,
- real* potential_dot_acoustic,
- real* potential_dot_dot_acoustic,
+__global__ void UpdatePotential_kernel(realw* potential_acoustic,
+ realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
int size,
- real deltat,
- real deltatsqover2,
- real deltatover2) {
+ realw deltat,
+ realw deltatsqover2,
+ realw deltatover2) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
+
/* because of block and grid sizing problems, there is a small */
/* amount of buffer at the end of the calculation */
- if(id < size) {
- potential_acoustic[id] = potential_acoustic[id]
- + deltat*potential_dot_acoustic[id]
+ if(id < size) {
+ potential_acoustic[id] = potential_acoustic[id]
+ + deltat*potential_dot_acoustic[id]
+ deltatsqover2*potential_dot_dot_acoustic[id];
-
- potential_dot_acoustic[id] = potential_dot_acoustic[id]
+
+ potential_dot_acoustic[id] = potential_dot_acoustic[id]
+ deltatover2*potential_dot_dot_acoustic[id];
-
- potential_dot_dot_acoustic[id] = 0;
+
+ potential_dot_dot_acoustic[id] = 0;
}
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(it_update_displacement_scheme_acoustic_cuda,
- IT_UPDATE_DISPLACEMENT_SCHEME_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
- int* size_F,
- float* deltat_F,
- float* deltatsqover2_F,
- float* deltatover2_F,
- int* SIMULATION_TYPE,
- float* b_deltat_F,
- float* b_deltatsqover2_F,
- float* b_deltatover2_F) {
-TRACE("it_update_displacement_scheme_acoustic_cuda");
+void FC_FUNC_(it_update_displacement_ac_cuda,
+ it_update_displacement_ac_cuda)(long* Mesh_pointer_f,
+ int* size_F,
+ float* deltat_F,
+ float* deltatsqover2_F,
+ float* deltatover2_F,
+ int* SIMULATION_TYPE,
+ float* b_deltat_F,
+ float* b_deltatsqover2_F,
+ float* b_deltatover2_F) {
+TRACE("it_update_displacement_ac_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-
- //int i,device;
+
+ //int i,device;
int size = *size_F;
- real deltat = *deltat_F;
- real deltatsqover2 = *deltatsqover2_F;
- real deltatover2 = *deltatover2_F;
- real b_deltat = *b_deltat_F;
- real b_deltatsqover2 = *b_deltatsqover2_F;
- real b_deltatover2 = *b_deltatover2_F;
+ realw deltat = *deltat_F;
+ realw deltatsqover2 = *deltatsqover2_F;
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltat = *b_deltat_F;
+ realw b_deltatsqover2 = *b_deltatsqover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
//cublasStatus status;
-
+
int blocksize = 128;
- int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
-
- int num_blocks_x = size_padded/blocksize;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
//launch kernel
UpdatePotential_kernel<<<grid,threads>>>(mp->d_potential_acoustic,
mp->d_potential_dot_acoustic,
mp->d_potential_dot_dot_acoustic,
size,deltat,deltatsqover2,deltatover2);
-
+
if(*SIMULATION_TYPE == 3) {
UpdatePotential_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic,
mp->d_b_potential_dot_acoustic,
mp->d_b_potential_dot_dot_acoustic,
size,b_deltat,b_deltatsqover2,b_deltatover2);
}
-
- cudaThreadSynchronize();
+
+ //cudaThreadSynchronize();
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- exit_on_cuda_error("it_update_displacement_scheme_acoustic_cuda");
+ exit_on_cuda_error("it_update_displacement_ac_cuda");
#endif
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2011-10-30 02:25:28 UTC (rev 19129)
@@ -26,6 +26,23 @@
!=====================================================================
*/
+/* daniel: trivia
+
+- for most real working arrays we use float (single exception so far is stf_pre_compute).
+ TODO: we could use "realw" instead of "float" type declaration to make it easier to switch
+ between a real or double precision simulation (matching CUSTOM_REAL == 4 or 8 in fortran routines).
+
+- instead of boolean "logical" declared in fortran routines, in C (or Cuda-C) we have to use "int" variables.
+
+ ifort / gfortran caveat:
+ to check whether it is true or false, do not check for == 1 to test for true values since ifort just uses
+ non-zero values for true (e.g. can be -1 for true). however, false will be always == 0.
+
+ thus, rather use: if( var ) {...} for testing if true instead of if( var == 1){...} (alternative: one could use if( var != 0 ){...}
+
+*/
+
+
#ifndef GPU_MESH_
#define GPU_MESH_
#include <sys/types.h>
@@ -40,7 +57,7 @@
#define DEBUG 0
#if DEBUG == 1
-#define TRACE(x) printf("%s\n",x)
+#define TRACE(x) printf("%s\n",x);
#else
#define TRACE(x) // printf("%s\n",x);
#endif
@@ -57,6 +74,7 @@
#endif
+//#undef ENABLE_VERY_SLOW_ERROR_CHECKING
#define ENABLE_VERY_SLOW_ERROR_CHECKING
/* ----------------------------------------------------------------------------------------------- */
@@ -85,7 +103,7 @@
void print_CUDA_error_if_any(cudaError_t err, int num);
-void pause_for_debugger(int pause);
+void pause_for_debugger(int pause);
void exit_on_cuda_error(char* kernel_name);
@@ -100,7 +118,7 @@
#define NGLL2 25
#define N_SLS 3
-typedef float real; // type of variables passed into function
+//typedef float real; // type of variables passed into function
typedef float realw; // type of "working" variables
// double precision temporary variables leads to 10% performance
@@ -118,18 +136,21 @@
// mesh resolution
int NSPEC_AB;
int NGLOB_AB;
-
+
// interpolators
float* d_xix; float* d_xiy; float* d_xiz;
float* d_etax; float* d_etay; float* d_etaz;
float* d_gammax; float* d_gammay; float* d_gammaz;
- // model parameters
+ // model parameters
float* d_kappav; float* d_muv;
- // global indexing
+ // global indexing
int* d_ibool;
+ // inner / outer elements
+ int* d_ispec_is_inner;
+
// pointers to constant memory arrays
float* d_hprime_xx; float* d_hprime_yy; float* d_hprime_zz;
float* d_hprimewgll_xx; float* d_hprimewgll_yy; float* d_hprimewgll_zz;
@@ -138,22 +159,26 @@
// ------------------------------------------------------------------ //
// elastic wavefield parameters
// ------------------------------------------------------------------ //
-
- // displacement, velocity, acceleration
+
+ // displacement, velocity, acceleration
float* d_displ; float* d_veloc; float* d_accel;
- // backward/reconstructed elastic wavefield
+ // backward/reconstructed elastic wavefield
float* d_b_displ; float* d_b_veloc; float* d_b_accel;
- // elastic domain parameters
+ // elastic elements
+ int* d_ispec_is_elastic;
+
+ // elastic domain parameters
int* d_phase_ispec_inner_elastic;
- int d_num_phase_ispec_elastic;
+ int num_phase_ispec_elastic;
+
float* d_rmass;
float* d_send_accel_buffer;
- // interfaces
+ // interfaces
int* d_nibool_interfaces_ext_mesh;
int* d_ibool_interfaces_ext_mesh;
-
+
//used for absorbing stacey boundaries
int d_num_abs_boundary_faces;
int* d_abs_boundary_ispec;
@@ -163,15 +188,12 @@
float* d_b_absorb_field;
int d_b_reclen_field;
-
+
float* d_rho_vp;
float* d_rho_vs;
-
- // inner / outer elements
- int* d_ispec_is_inner;
- int* d_ispec_is_elastic;
- // sources
+ // sources
+ int nsources_local;
float* d_sourcearrays;
double* d_stf_pre_compute;
int* d_islice_selected_source;
@@ -179,17 +201,17 @@
// receivers
int* d_number_receiver_global;
- int* d_ispec_selected_rec;
+ int* d_ispec_selected_rec;
int* d_islice_selected_rec;
int nrec_local;
float* d_station_seismo_field;
- float* h_station_seismo_field;
-
+ float* h_station_seismo_field;
+
// surface elements (to save for noise tomography and acoustic simulations)
int* d_free_surface_ispec;
int* d_free_surface_ijk;
int num_free_surface_faces;
-
+
// surface movie elements to save for noise tomography
float* d_noise_surface_movie;
@@ -202,11 +224,11 @@
float* d_one_minus_sum_beta;
float* d_factor_common;
-
+
float* d_alphaval;
float* d_betaval;
float* d_gammaval;
-
+
// attenuation & kernel
float* d_epsilondev_xx;
float* d_epsilondev_yy;
@@ -214,7 +236,7 @@
float* d_epsilondev_xz;
float* d_epsilondev_yz;
float* d_epsilon_trace_over_3;
-
+
// noise
float* d_normal_x_noise;
float* d_normal_y_noise;
@@ -230,7 +252,7 @@
float* d_b_R_xy;
float* d_b_R_xz;
float* d_b_R_yz;
-
+
float* d_b_epsilondev_xx;
float* d_b_epsilondev_yy;
float* d_b_epsilondev_xy;
@@ -241,19 +263,23 @@
float* d_b_alphaval;
float* d_b_betaval;
float* d_b_gammaval;
-
+
// sensitivity kernels
float* d_rho_kl;
float* d_mu_kl;
float* d_kappa_kl;
-
+
// noise sensitivity kernel
float* d_Sigma_kl;
+ // approximative hessian for preconditioning kernels
+ float* d_hess_el_kl;
+
// oceans
float* d_rmass_ocean_load;
float* d_free_surface_normal;
-
+ int* d_updated_dof_ocean_load;
+
// ------------------------------------------------------------------ //
// acoustic wavefield
// ------------------------------------------------------------------ //
@@ -261,29 +287,33 @@
float* d_potential_acoustic; float* d_potential_dot_acoustic; float* d_potential_dot_dot_acoustic;
// backward/reconstructed wavefield
float* d_b_potential_acoustic; float* d_b_potential_dot_acoustic; float* d_b_potential_dot_dot_acoustic;
-
- // acoustic domain parameters
- int* d_phase_ispec_inner_acoustic;
+
+ // acoustic domain parameters
+ int* d_ispec_is_acoustic;
+
+ int* d_phase_ispec_inner_acoustic;
int num_phase_ispec_acoustic;
-
+
float* d_rhostore;
float* d_kappastore;
float* d_rmass_acoustic;
-
+
float* d_send_potential_dot_dot_buffer;
- int* d_ispec_is_acoustic;
-
+
float* d_b_absorb_potential;
int d_b_reclen_potential;
-
+
// for writing seismograms
float* d_station_seismo_potential;
float* h_station_seismo_potential;
-
+
// sensitivity kernels
float* d_rho_ac_kl;
float* d_kappa_ac_kl;
-
+
+ // approximative hessian for preconditioning kernels
+ float* d_hess_ac_kl;
+
// coupling acoustic-elastic
int* d_coupling_ac_el_ispec;
int* d_coupling_ac_el_ijk;
@@ -291,7 +321,7 @@
float* d_coupling_ac_el_jacobian2Dw;
-
+
} Mesh;
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -40,21 +40,23 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void transfer_surface_to_host_kernel(int* free_surface_ispec,int* free_surface_ijk, int num_free_surface_faces, int* ibool, real* displ, real* noise_surface_movie) {
+__global__ void transfer_surface_to_host_kernel(int* free_surface_ispec,int* free_surface_ijk,
+ int num_free_surface_faces, int* ibool,
+ realw* displ, realw* noise_surface_movie) {
int igll = threadIdx.x;
int iface = blockIdx.x + blockIdx.y*gridDim.x;
-
+
// int id = tx + blockIdx.x*blockDim.x + blockIdx.y*blockDim.x*gridDim.x;
-
+
if(iface < num_free_surface_faces) {
int ispec = free_surface_ispec[iface]-1; //-1 for C-based indexing
int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
-
- int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-
+
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)] = displ[iglob*3];
noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)] = displ[iglob*3+1];
noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)] = displ[iglob*3+2];
@@ -65,8 +67,8 @@
extern "C"
void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){
-TRACE("fortranflush");
-
+TRACE("fortranflush");
+
fflush(stdout);
fflush(stderr);
printf("Flushing proc %d!\n",*rank);
@@ -76,7 +78,7 @@
extern "C"
void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id) {
-TRACE("fortranprint");
+TRACE("fortranprint");
int procid;
MPI_Comm_rank(MPI_COMM_WORLD,&procid);
@@ -87,7 +89,7 @@
extern "C"
void FC_FUNC_(fortranprintf,FORTRANPRINTF)(float* val) {
-TRACE("fortranprintf");
+TRACE("fortranprintf");
int procid;
MPI_Comm_rank(MPI_COMM_WORLD,&procid);
@@ -98,7 +100,7 @@
extern "C"
void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val) {
-TRACE("fortranprintd");
+TRACE("fortranprintd");
int procid;
MPI_Comm_rank(MPI_COMM_WORLD,&procid);
@@ -110,9 +112,9 @@
// randomize displ for testing
extern "C"
void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,float* h_displ) {
-TRACE("make_displ_rand");
+TRACE("make_displ_rand");
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
// float* displ_rnd = (float*)malloc(mp->NGLOB_AB*3*sizeof(float));
for(int i=0;i<mp->NGLOB_AB*3;i++) {
h_displ[i] = rand();
@@ -124,10 +126,12 @@
extern "C"
void FC_FUNC_(transfer_surface_to_host,
- TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,real* h_noise_surface_movie,int* num_free_surface_faces) {
-TRACE("transfer_surface_to_host");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+ TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,
+ realw* h_noise_surface_movie,
+ int* num_free_surface_faces) {
+TRACE("transfer_surface_to_host");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
int num_blocks_x = *num_free_surface_faces;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
@@ -135,65 +139,72 @@
num_blocks_y = num_blocks_y*2;
}
dim3 grid(num_blocks_x,num_blocks_y,1);
- dim3 threads(25,1,1);
-
- transfer_surface_to_host_kernel<<<grid,threads>>>(mp->d_free_surface_ispec,mp->d_free_surface_ijk, *num_free_surface_faces, mp->d_ibool, mp->d_displ, mp->d_noise_surface_movie);
+ dim3 threads(25,1,1);
- cudaMemcpy(h_noise_surface_movie,mp->d_noise_surface_movie,3*25*(*num_free_surface_faces)*sizeof(real),cudaMemcpyDeviceToHost);
+ transfer_surface_to_host_kernel<<<grid,threads>>>(mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ *num_free_surface_faces,
+ mp->d_ibool,
+ mp->d_displ,
+ mp->d_noise_surface_movie);
+ cudaMemcpy(h_noise_surface_movie,mp->d_noise_surface_movie,
+ 3*25*(*num_free_surface_faces)*sizeof(realw),cudaMemcpyDeviceToHost);
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("transfer_surface_to_host");
-#endif
+#endif
}
/* ----------------------------------------------------------------------------------------------- */
-__global__ void noise_read_add_surface_movie_cuda_kernel(real* accel, int* ibool,
- int* free_surface_ispec,int* free_surface_ijk,
- int num_free_surface_faces,
- real* noise_surface_movie,
- real* normal_x_noise,
- real* normal_y_noise,
- real* normal_z_noise,
- real* mask_noise,
- real* free_surface_jacobian2Dw,
- real* wgllwgll_xy,
+__global__ void noise_read_add_surface_movie_cuda_kernel(realw* accel, int* ibool,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int num_free_surface_faces,
+ realw* noise_surface_movie,
+ realw* normal_x_noise,
+ realw* normal_y_noise,
+ realw* normal_z_noise,
+ realw* mask_noise,
+ realw* free_surface_jacobian2Dw,
+ realw* wgllwgll_xy,
float* d_debug) {
int iface = blockIdx.x + gridDim.x*blockIdx.y; // surface element id
// when nspec_top > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
- if(iface < num_free_surface_faces) {
+ if(iface < num_free_surface_faces) {
int ispec = free_surface_ispec[iface]-1;
-
+
int igll = threadIdx.x;
-
+
int ipoin = 25*iface + igll;
int i=free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
int j=free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
- int k=free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
-
+ int k=free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
+
int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-
- real normal_x = normal_x_noise[ipoin];
- real normal_y = normal_y_noise[ipoin];
- real normal_z = normal_z_noise[ipoin];
- real eta = (noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)]*normal_x +
+ realw normal_x = normal_x_noise[ipoin];
+ realw normal_y = normal_y_noise[ipoin];
+ realw normal_z = normal_z_noise[ipoin];
+
+ realw eta = (noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)]*normal_x +
noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)]*normal_y +
noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)]*normal_z);
-
+
// error from cuda-memcheck and ddt seems "incorrect", because we
// are passing a __constant__ variable pointer around like it was
// made using cudaMalloc, which *may* be "incorrect", but produces
// correct results.
-
+
// ========= Invalid __global__ read of size
// 4 ========= at 0x00000cd8 in
// compute_add_sources_cuda.cu:260:noise_read_add_surface_movie_cuda_kernel
// ========= by thread (0,0,0) in block (3443,0) ========= Address
// 0x203000c8 is out of bounds
-
+
// non atomic version for speed testing -- atomic updates are needed for correctness
// accel[3*iglob] += eta*mask_noise[ipoin] * normal_x * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + 25*ispec2D];
// accel[3*iglob+1] += eta*mask_noise[ipoin] * normal_y * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + 25*ispec2D];
@@ -201,38 +212,42 @@
// Fortran version in SVN -- note deletion of wgllwgll_xy?
// accel(1,iglob) = accel(1,iglob) + eta * mask_noise(ipoin) * normal_x_noise(ipoin) &
- // * free_surface_jacobian2Dw(igll,iface)
+ // * free_surface_jacobian2Dw(igll,iface)
// accel(2,iglob) = accel(2,iglob) + eta * mask_noise(ipoin) * normal_y_noise(ipoin) &
// * free_surface_jacobian2Dw(igll,iface)
// accel(3,iglob) = accel(3,iglob) + eta * mask_noise(ipoin) * normal_z_noise(ipoin) &
// * free_surface_jacobian2Dw(igll,iface) ! wgllwgll_xy(i,j) * jacobian2D_top(i,j,iface)
-
+
// atomicAdd(&accel[iglob*3] ,eta*mask_noise[ipoin]*normal_x*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
// atomicAdd(&accel[iglob*3+1],eta*mask_noise[ipoin]*normal_y*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
// atomicAdd(&accel[iglob*3+2],eta*mask_noise[ipoin]*normal_z*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
-
+
atomicAdd(&accel[iglob*3] ,eta*mask_noise[ipoin]*normal_x*free_surface_jacobian2Dw[igll+25*iface]);
atomicAdd(&accel[iglob*3+1],eta*mask_noise[ipoin]*normal_y*free_surface_jacobian2Dw[igll+25*iface]);
atomicAdd(&accel[iglob*3+2],eta*mask_noise[ipoin]*normal_z*free_surface_jacobian2Dw[igll+25*iface]);
-
+
}
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(noise_read_add_surface_movie_cuda,
- NOISE_READ_ADD_SURFACE_MOVIE_CUDA)(long* Mesh_pointer_f, real* h_noise_surface_movie, int* num_free_surface_faces_f,int* NOISE_TOMOGRAPHYf) {
-TRACE("noise_read_add_surface_movie_cuda");
+void FC_FUNC_(noise_read_add_surface_movie_cu,
+ NOISE_READ_ADD_SURFACE_MOVIE_CU)(long* Mesh_pointer_f,
+ realw* h_noise_surface_movie,
+ int* num_free_surface_faces_f,
+ int* NOISE_TOMOGRAPHYf) {
+TRACE("noise_read_add_surface_movie_cu");
- // EPIK_TRACER("noise_read_add_surface_movie_cuda");
-
+ // EPIK_TRACER("noise_read_add_surface_movie_cu");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
int num_free_surface_faces = *num_free_surface_faces_f;
int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
float* d_noise_surface_movie;
cudaMalloc((void**)&d_noise_surface_movie,3*25*num_free_surface_faces*sizeof(float));
- cudaMemcpy(d_noise_surface_movie, h_noise_surface_movie,3*25*num_free_surface_faces*sizeof(real),cudaMemcpyHostToDevice);
+ cudaMemcpy(d_noise_surface_movie, h_noise_surface_movie,
+ 3*25*num_free_surface_faces*sizeof(realw),cudaMemcpyHostToDevice);
int num_blocks_x = num_free_surface_faces;
int num_blocks_y = 1;
@@ -247,45 +262,45 @@
float* d_debug;
// cudaMalloc((void**)&d_debug,128*sizeof(float));
// cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-
+
if(NOISE_TOMOGRAPHY == 2) { // add surface source to forward field
noise_read_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_accel,
- mp->d_ibool,
- mp->d_free_surface_ispec,
- mp->d_free_surface_ijk,
- num_free_surface_faces,
- d_noise_surface_movie,
- mp->d_normal_x_noise,
- mp->d_normal_y_noise,
- mp->d_normal_z_noise,
- mp->d_mask_noise,
- mp->d_free_surface_jacobian2Dw,
- mp->d_wgllwgll_xy,
- d_debug);
+ mp->d_ibool,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ num_free_surface_faces,
+ d_noise_surface_movie,
+ mp->d_normal_x_noise,
+ mp->d_normal_y_noise,
+ mp->d_normal_z_noise,
+ mp->d_mask_noise,
+ mp->d_free_surface_jacobian2Dw,
+ mp->d_wgllwgll_xy,
+ d_debug);
}
- else if(NOISE_TOMOGRAPHY==3) { // add surface source to adjoint (backward) field
+ else if(NOISE_TOMOGRAPHY == 3) { // add surface source to adjoint (backward) field
noise_read_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
- mp->d_ibool,
- mp->d_free_surface_ispec,
- mp->d_free_surface_ijk,
- num_free_surface_faces,
- d_noise_surface_movie,
- mp->d_normal_x_noise,
- mp->d_normal_y_noise,
- mp->d_normal_z_noise,
- mp->d_mask_noise,
- mp->d_free_surface_jacobian2Dw,
- mp->d_wgllwgll_xy,
- d_debug);
+ mp->d_ibool,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ num_free_surface_faces,
+ d_noise_surface_movie,
+ mp->d_normal_x_noise,
+ mp->d_normal_y_noise,
+ mp->d_normal_z_noise,
+ mp->d_mask_noise,
+ mp->d_free_surface_jacobian2Dw,
+ mp->d_wgllwgll_xy,
+ d_debug);
}
-
+
// cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
// for(int i=0;i<8;i++) {
// printf("debug[%d]= %e\n",i,h_debug[i]);
// }
// MPI_Abort(MPI_COMM_WORLD,1);
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("noise_read_add_surface_movie_cuda_kernel");
#endif
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h 2011-10-30 02:25:28 UTC (rev 19129)
@@ -91,10 +91,10 @@
void bindTexturesPotential(float* d_potential_acoustic)
{
cudaError_t err;
-
+
cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<float>();
-
- err = cudaBindTexture(NULL,tex_potential_acoustic, d_potential_acoustic,
+
+ err = cudaBindTexture(NULL,tex_potential_acoustic, d_potential_acoustic,
channelDescFloat, NGLOB*sizeof(float));
if (err != cudaSuccess)
{
@@ -106,10 +106,10 @@
void bindTexturesPotential_dot_dot(float* d_potential_dot_dot_acoustic)
{
cudaError_t err;
-
+
cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<float>();
-
- err = cudaBindTexture(NULL,tex_potential_dot_dot_acoustic, d_potential_dot_dot_acoustic,
+
+ err = cudaBindTexture(NULL,tex_potential_dot_dot_acoustic, d_potential_dot_dot_acoustic,
channelDescFloat, NGLOB*sizeof(float));
if (err != cudaSuccess)
{
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -57,7 +57,7 @@
extern "C"
void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)() {
-TRACE("pause_for_debug");
+TRACE("pause_for_debug");
pause_for_debugger(1);
}
@@ -67,7 +67,7 @@
void pause_for_debugger(int pause) {
if(pause) {
int myrank;
- MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
printf("I'm rank %d\n",myrank);
int i = 0;
char hostname[256];
@@ -89,7 +89,7 @@
cudaThreadSynchronize();
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
- {
+ {
fprintf(stderr,"Error after %s: %s\n", kernel_name, cudaGetErrorString(err));
pause_for_debugger(0);
exit(1);
@@ -128,20 +128,20 @@
/* ----------------------------------------------------------------------------------------------- */
void get_free_memory(double* free_db, double* used_db, double* total_db) {
-
+
// gets memory usage in byte
size_t free_byte ;
size_t total_byte ;
cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
if ( cudaSuccess != cuda_status ){
printf("Error: cudaMemGetInfo fails, %s \n", cudaGetErrorString(cuda_status) );
- exit(1);
+ exit(1);
}
-
+
*free_db = (double)free_byte ;
*total_db = (double)total_byte ;
*used_db = *total_db - *free_db ;
- return;
+ return;
}
/* ----------------------------------------------------------------------------------------------- */
@@ -149,17 +149,17 @@
// Saves GPU memory usage to file
void output_free_memory(char* info_str) {
int myrank;
- MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+ MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
FILE* fp;
char filename[BUFSIZ];
double free_db,used_db,total_db;
get_free_memory(&free_db,&used_db,&total_db);
-
+
sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_mem_usage_proc_%03d.txt",myrank);
fp = fopen(filename,"a+");
fprintf(fp,"%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
- used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
fclose(fp);
}
@@ -169,8 +169,8 @@
extern "C"
void FC_FUNC_(output_free_device_memory,
OUTPUT_FREE_DEVICE_MEMORY)(int* id) {
-TRACE("output_free_device_memory");
-
+TRACE("output_free_device_memory");
+
char info[6];
sprintf(info,"f %d:",*id);
output_free_memory(info);
@@ -184,12 +184,12 @@
int myrank;
MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
double free_db,used_db,total_db;
-
+
get_free_memory(&free_db,&used_db,&total_db);
-
+
printf("%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
- used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
-
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+
}
/* ----------------------------------------------------------------------------------------------- */
@@ -197,7 +197,7 @@
extern "C"
void FC_FUNC_(show_free_device_memory,
SHOW_FREE_DEVICE_MEMORY)() {
-TRACE("show_free_device_memory");
+TRACE("show_free_device_memory");
show_free_memory("from fortran");
}
@@ -206,12 +206,12 @@
extern "C"
void FC_FUNC_(get_free_device_memory,
get_FREE_DEVICE_MEMORY)(float* free, float* used, float* total ) {
-TRACE("get_free_device_memory");
+TRACE("get_free_device_memory");
double free_db,used_db,total_db;
-
+
get_free_memory(&free_db,&used_db,&total_db);
-
+
// converts to MB
*free = (float) free_db/1024.0/1024.0;
*used = (float) used_db/1024.0/1024.0;
@@ -220,9 +220,201 @@
}
+/* ----------------------------------------------------------------------------------------------- */
+//daniel
+/*
+__global__ void check_phase_ispec_kernel(int num_phase_ispec,
+ int* phase_ispec,
+ int NSPEC_AB,
+ int* ier) {
+ int i,ispec,iphase,count0,count1;
+ *ier = 0;
+
+ for(iphase=0; iphase < 2; iphase++){
+ count0 = 0;
+ count1 = 0;
+
+ for(i=0; i < num_phase_ispec; i++){
+ ispec = phase_ispec[iphase*num_phase_ispec + i] - 1;
+ if( ispec < -1 || ispec >= NSPEC_AB ){
+ printf("Error in d_phase_ispec_inner_elastic %d %d\n",i,ispec);
+ *ier = 1;
+ return;
+ }
+ if( ispec >= 0 ){ count0++;}
+ if( ispec < 0 ){ count1++;}
+ }
+
+ printf("check_phase_ispec done: phase %d, count = %d %d \n",iphase,count0,count1);
+
+ }
+}
+
+void check_phase_ispec(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check phase_ispec for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 1 ){
+ check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_elastic,
+ mp->d_phase_ispec_inner_elastic,
+ mp->NSPEC_AB,
+ d_debug);
+ }else if( type == 2 ){
+ check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_acoustic,
+ mp->d_phase_ispec_inner_acoustic,
+ mp->NSPEC_AB,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_phase_ispec");
+#endif
+
+}
+*/
+
/* ----------------------------------------------------------------------------------------------- */
+//daniel
+/*
+__global__ void check_ispec_is_kernel(int NSPEC_AB,
+ int* ispec_is,
+ int* ier) {
+ int ispec,count0,count1;
+
+ *ier = 0;
+ count0 = 0;
+ count1 = 0;
+ for(ispec=0; ispec < NSPEC_AB; ispec++){
+ if( ispec_is[ispec] < -1 || ispec_is[ispec] > 1 ){
+ printf("Error in ispec_is %d %d\n",ispec,ispec_is[ispec]);
+ *ier = 1;
+ return;
+ //exit(1);
+ }
+ if( ispec_is[ispec] == 0 ){count0++;}
+ if( ispec_is[ispec] != 0 ){count1++;}
+ }
+ printf("check_ispec_is done: count = %d %d\n",count0,count1);
+}
+
+void check_ispec_is(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check ispec_is for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 0 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_inner,
+ d_debug);
+ }else if( type == 1 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_elastic,
+ d_debug);
+ }else if( type == 2 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_acoustic,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_ispec_is");
+#endif
+}
+*/
+/* ----------------------------------------------------------------------------------------------- */
+//daniel
+/*
+__global__ void check_array_ispec_kernel(int num_array_ispec,
+ int* array_ispec,
+ int NSPEC_AB,
+ int* ier) {
+
+ int i,ispec,count0,count1;
+
+ *ier = 0;
+ count0 = 0;
+ count1 = 0;
+
+ for(i=0; i < num_array_ispec; i++){
+ ispec = array_ispec[i] - 1;
+ if( ispec < -1 || ispec >= NSPEC_AB ){
+ printf("Error in d_array_ispec %d %d\n",i,ispec);
+ *ier = 1;
+ return;
+ }
+ if( ispec >= 0 ){ count0++;}
+ if( ispec < 0 ){ count1++;}
+ }
+
+ printf("check_array_ispec done: count = %d %d \n",count0,count1);
+}
+
+void check_array_ispec(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check array_ispec for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 1 ){
+ check_array_ispec_kernel<<<grid,threads>>>(mp->d_num_abs_boundary_faces,
+ mp->d_abs_boundary_ispec,
+ mp->NSPEC_AB,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_array_ispec");
+#endif
+
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+
// GPU preparation
/* ----------------------------------------------------------------------------------------------- */
@@ -230,25 +422,28 @@
extern "C"
void FC_FUNC_(prepare_constants_device,
PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
- int* h_NGLLX,
+ int* h_NGLLX,
int* NSPEC_AB, int* NGLOB_AB,
float* h_xix, float* h_xiy, float* h_xiz,
float* h_etax, float* h_etay, float* h_etaz,
float* h_gammax, float* h_gammay, float* h_gammaz,
float* h_kappav, float* h_muv,
- int* h_ibool,
- int* num_interfaces_ext_mesh, int* max_nibool_interfaces_ext_mesh,
- int* h_nibool_interfaces_ext_mesh, int* h_ibool_interfaces_ext_mesh,
- float* h_hprime_xx,float* h_hprime_yy,float* h_hprime_zz,
+ int* h_ibool,
+ int* num_interfaces_ext_mesh,
+ int* max_nibool_interfaces_ext_mesh,
+ int* h_nibool_interfaces_ext_mesh,
+ int* h_ibool_interfaces_ext_mesh,
+ float* h_hprime_xx,float* h_hprime_yy,float* h_hprime_zz,
float* h_hprimewgll_xx,float* h_hprimewgll_yy,float* h_hprimewgll_zz,
- float* h_wgllwgll_xy,float* h_wgllwgll_xz,float* h_wgllwgll_yz,
- int* ABSORBING_CONDITIONS,
+ float* h_wgllwgll_xy,float* h_wgllwgll_xz,float* h_wgllwgll_yz,
+ int* ABSORBING_CONDITIONS,
int* h_abs_boundary_ispec, int* h_abs_boundary_ijk,
float* h_abs_boundary_normal,
float* h_abs_boundary_jacobian2Dw,
int* h_num_abs_boundary_faces,
- int* h_ispec_is_inner,
+ int* h_ispec_is_inner,
int* NSOURCES,
+ int* nsources_local_f,
float* h_sourcearrays,
int* h_islice_selected_source,
int* h_ispec_selected_source,
@@ -256,49 +451,48 @@
int* h_ispec_selected_rec,
int* nrec_f,
int* nrec_local_f,
- int* SIMULATION_TYPE) {
+ int* SIMULATION_TYPE,
+ int* ncuda_devices) {
TRACE("prepare_constants_device");
-
+
int procid;
int device_count = 0;
-
+
// cuda initialization (needs -lcuda library)
//cuInit(0);
CUresult status = cuInit(0);
if ( CUDA_SUCCESS != status ) exit_on_error("CUDA device initialization failed");
-
- // Gets number of GPU devices
+
+ // Gets number of GPU devices
cudaGetDeviceCount(&device_count);
//printf("Cuda Devices: %d\n", device_count);
- if (device_count == 0) exit_on_error("There is no device supporting CUDA\n");
-
+ if (device_count == 0) exit_on_error("There is no device supporting CUDA\n");
+ *ncuda_devices = device_count;
+
// Gets rank number of MPI process
MPI_Comm_rank(MPI_COMM_WORLD, &procid);
- // Sets the active device
+ // Sets the active device
if(device_count > 1) {
- // daniel: todo - generalize for more GPUs per node?
- // assumes we have 2 GPU devices per node and running 2 MPI processes per node as well
- cudaSetDevice((procid)%2);
- exit_on_cuda_error("cudaSetDevice");
+ // generalized for more GPUs per node
+ cudaSetDevice((procid)%device_count);
+ exit_on_cuda_error("cudaSetDevice");
}
- // allocates mesh parameter structure
+ // allocates mesh parameter structure
Mesh* mp = (Mesh*)malloc(sizeof(Mesh));
- if (mp == NULL) exit_on_error("error allocating mesh pointer");
+ if (mp == NULL) exit_on_error("error allocating mesh pointer");
*Mesh_pointer = (long)mp;
// checks if NGLLX == 5
if( *h_NGLLX != NGLLX ){
- exit_on_error("NGLLX must be 5 for CUDA devices");
+ exit_on_error("NGLLX must be 5 for CUDA devices");
}
-
- // sets global parameters
+
+ // sets global parameters
mp->NSPEC_AB = *NSPEC_AB;
mp->NGLOB_AB = *NGLOB_AB;
-
- //mp->d_num_phase_ispec_elastic = *num_phase_ispec_elastic;
// sets constant arrays
setConst_hprime_xx(h_hprime_xx,mp);
@@ -310,143 +504,153 @@
setConst_wgllwgll_xy(h_wgllwgll_xy,mp);
setConst_wgllwgll_xz(h_wgllwgll_xz,mp);
setConst_wgllwgll_yz(h_wgllwgll_yz,mp);
-
+
/* Assuming NGLLX=5. Padded is then 128 (5^3+3) */
- int size_padded = 128 * (*NSPEC_AB);
- int size = 125 * (*NSPEC_AB);
+ int size_padded = 128 * (mp->NSPEC_AB);
+ int size = 125 * (mp->NSPEC_AB);
- // mesh
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix, size_padded*sizeof(float)),1001);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy, size_padded*sizeof(float)),1002);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz, size_padded*sizeof(float)),1003);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax, size_padded*sizeof(float)),1004);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay, size_padded*sizeof(float)),1005);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz, size_padded*sizeof(float)),1006);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax, size_padded*sizeof(float)),1007);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay, size_padded*sizeof(float)),1008);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz, size_padded*sizeof(float)),1009);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappav, size_padded*sizeof(float)),1010);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muv, size_padded*sizeof(float)),1011);
+ // mesh
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix, size_padded*sizeof(float)),1001);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy, size_padded*sizeof(float)),1002);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz, size_padded*sizeof(float)),1003);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax, size_padded*sizeof(float)),1004);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay, size_padded*sizeof(float)),1005);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz, size_padded*sizeof(float)),1006);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax, size_padded*sizeof(float)),1007);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay, size_padded*sizeof(float)),1008);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz, size_padded*sizeof(float)),1009);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappav, size_padded*sizeof(float)),1010);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muv, size_padded*sizeof(float)),1011);
+ // transfer constant element data with padding
+ for(int i=0;i < mp->NSPEC_AB;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xix + i*128, &h_xix[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1501);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xiy+i*128, &h_xiy[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1502);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xiz+i*128, &h_xiz[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1503);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etax+i*128, &h_etax[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1504);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etay+i*128, &h_etay[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1505);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etaz+i*128, &h_etaz[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1506);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammax+i*128,&h_gammax[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1507);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammay+i*128,&h_gammay[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1508);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaz+i*128,&h_gammaz[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1509);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappav+i*128,&h_kappav[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1510);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_muv+i*128, &h_muv[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),1511);
+ }
+
// global indexing
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool, size_padded*sizeof(int)),1021);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool,size_padded*sizeof(int)),1021);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool, h_ibool,
- size*sizeof(int) ,cudaMemcpyHostToDevice),1022);
+ size*sizeof(int),cudaMemcpyHostToDevice),1022);
-
+
// prepare interprocess-edge exchange information
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_ext_mesh,
- *num_interfaces_ext_mesh*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_ext_mesh,h_nibool_interfaces_ext_mesh,
- *num_interfaces_ext_mesh*sizeof(int),cudaMemcpyHostToDevice),1202);
+ if( *num_interfaces_ext_mesh > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_ext_mesh,
+ (*num_interfaces_ext_mesh)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_ext_mesh,h_nibool_interfaces_ext_mesh,
+ (*num_interfaces_ext_mesh)*sizeof(int),cudaMemcpyHostToDevice),1202);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_ext_mesh,
- *num_interfaces_ext_mesh* *max_nibool_interfaces_ext_mesh*
- sizeof(int)),1203);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_ext_mesh,h_ibool_interfaces_ext_mesh,
- *num_interfaces_ext_mesh* *max_nibool_interfaces_ext_mesh*sizeof(int),
- cudaMemcpyHostToDevice),1204);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_ext_mesh,
+ (*num_interfaces_ext_mesh)*(*max_nibool_interfaces_ext_mesh)*sizeof(int)),1203);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_ext_mesh,h_ibool_interfaces_ext_mesh,
+ (*num_interfaces_ext_mesh)*(*max_nibool_interfaces_ext_mesh)*sizeof(int),
+ cudaMemcpyHostToDevice),1204);
+ }
- // inner elements
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,*NSPEC_AB*sizeof(int)),1205);
+ // inner elements
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,mp->NSPEC_AB*sizeof(int)),1205);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_inner, h_ispec_is_inner,
- *NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),1206);
-
+ mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),1206);
+ // daniel: check
+ //check_ispec_is(Mesh_pointer,0);
+
// absorbing boundaries
mp->d_num_abs_boundary_faces = *h_num_abs_boundary_faces;
- if( *ABSORBING_CONDITIONS == 1 && mp->d_num_abs_boundary_faces > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_ispec,
+ if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ispec),
(mp->d_num_abs_boundary_faces)*sizeof(int)),1101);
print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ispec, h_abs_boundary_ispec,
(mp->d_num_abs_boundary_faces)*sizeof(int),
cudaMemcpyHostToDevice),1102);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_ijk,
+
+ // daniel: check
+ //check_array_ispec(Mesh_pointer,1);
+
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_ijk),
3*25*(mp->d_num_abs_boundary_faces)*sizeof(int)),1103);
print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ijk, h_abs_boundary_ijk,
3*25*(mp->d_num_abs_boundary_faces)*sizeof(int),
cudaMemcpyHostToDevice),1104);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_normal,
- 3*25*(mp->d_num_abs_boundary_faces)*sizeof(int)),1105);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_normal),
+ 3*25*(mp->d_num_abs_boundary_faces)*sizeof(float)),1105);
print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_normal, h_abs_boundary_normal,
- 3*25*(mp->d_num_abs_boundary_faces)*sizeof(int),
+ 3*25*(mp->d_num_abs_boundary_faces)*sizeof(float),
cudaMemcpyHostToDevice),1106);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_jacobian2Dw,
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_abs_boundary_jacobian2Dw),
25*(mp->d_num_abs_boundary_faces)*sizeof(float)),1107);
print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_jacobian2Dw, h_abs_boundary_jacobian2Dw,
25*(mp->d_num_abs_boundary_faces)*sizeof(float),
- cudaMemcpyHostToDevice),1108);
+ cudaMemcpyHostToDevice),1108);
}
-
+
// sources
+ mp->nsources_local = *nsources_local_f;
if (*SIMULATION_TYPE == 1 || *SIMULATION_TYPE == 3){
// not needed in case of pure adjoint simulations (SIMULATION_TYPE == 2)
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_sourcearrays, sizeof(float)* *NSOURCES*3*125),1301);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_sourcearrays, h_sourcearrays, sizeof(float)* *NSOURCES*3*125,
- cudaMemcpyHostToDevice),1302);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_sourcearrays,
+ sizeof(float)* *NSOURCES*3*125),1301);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_sourcearrays, h_sourcearrays,
+ sizeof(float)* *NSOURCES*3*125,cudaMemcpyHostToDevice),1302);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_stf_pre_compute),
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_stf_pre_compute,
*NSOURCES*sizeof(double)),1303);
}
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_source, sizeof(int) * *NSOURCES),1401);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_source, h_islice_selected_source, sizeof(int)* *NSOURCES,
- cudaMemcpyHostToDevice),1402);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_source,
+ sizeof(int) * *NSOURCES),1401);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_source, h_islice_selected_source,
+ sizeof(int)* *NSOURCES,cudaMemcpyHostToDevice),1402);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_ispec_selected_source, sizeof(int)* *NSOURCES),1403);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_source, h_ispec_selected_source,sizeof(int)* *NSOURCES,
- cudaMemcpyHostToDevice),1404);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_ispec_selected_source,
+ sizeof(int)* *NSOURCES),1403);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_source, h_ispec_selected_source,
+ sizeof(int)* *NSOURCES,cudaMemcpyHostToDevice),1404);
-
- // transfer constant element data with padding
- for(int i=0;i<*NSPEC_AB;i++) {
- print_CUDA_error_if_any(cudaMemcpy(mp->d_xix + i*128, &h_xix[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1501);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_xiy+i*128, &h_xiy[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1502);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_xiz+i*128, &h_xiz[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1503);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_etax+i*128, &h_etax[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1504);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_etay+i*128, &h_etay[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1505);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_etaz+i*128, &h_etaz[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1506);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_gammax+i*128,&h_gammax[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1507);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_gammay+i*128,&h_gammay[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1508);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaz+i*128,&h_gammaz[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1509);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_kappav+i*128,&h_kappav[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1510);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_muv+i*128, &h_muv[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),1511);
-
- }
-
// receiver stations
int nrec = *nrec_f; // total number of receivers
- int nrec_local = *nrec_local_f; // number of receiver located in this partition
+ mp->nrec_local = *nrec_local_f; // number of receiver located in this partition
+ //int nrec_local = *nrec_local_f;
// note that:
// size(number_receiver_global) = nrec_local
- // size(ispec_selected_rec) = nrec
- mp->nrec_local = nrec_local;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_number_receiver_global),nrec_local*sizeof(int)),1);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_number_receiver_global,h_number_receiver_global,
- nrec_local*sizeof(int),cudaMemcpyHostToDevice),1512);
-
+ // size(ispec_selected_rec) = nrec
+ if( mp->nrec_local > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_number_receiver_global),mp->nrec_local*sizeof(int)),1);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_number_receiver_global,h_number_receiver_global,
+ mp->nrec_local*sizeof(int),cudaMemcpyHostToDevice),1512);
+ }
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_selected_rec),nrec*sizeof(int)),1513);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_rec,h_ispec_selected_rec,
- nrec*sizeof(int),cudaMemcpyHostToDevice),1514);
+ nrec*sizeof(int),cudaMemcpyHostToDevice),1514);
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("prepare_constants_device");
-#endif
+#endif
}
/* ----------------------------------------------------------------------------------------------- */
@@ -455,30 +659,30 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(prepare_adjoint_sim2_or_3_constants_device,
- PREPARE_ADJOINT_SIM2_OR_3_CONSTANTS_DEVICE)(
- long* Mesh_pointer_f,
- int* islice_selected_rec,
- int* islice_selected_rec_size) {
-
-TRACE("prepare_adjoint_sim2_or_3_constants_device");
-
+extern "C"
+void FC_FUNC_(prepare_sim2_or_3_const_device,
+ PREPARE_SIM2_OR_3_CONST_DEVICE)(
+ long* Mesh_pointer_f,
+ int* islice_selected_rec,
+ int* islice_selected_rec_size) {
+
+TRACE("prepare_sim2_or_3_const_device");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-
+
// allocates arrays for receivers
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_rec,
*islice_selected_rec_size*sizeof(int)),7001);
-
+
// copies arrays to GPU device
- print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_rec,islice_selected_rec,
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_rec,islice_selected_rec,
*islice_selected_rec_size*sizeof(int),cudaMemcpyHostToDevice),7002);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_adjoint_sim2_or_3_constants_device");
-#endif
-}
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_sim2_or_3_const_device");
+#endif
+}
+
/* ----------------------------------------------------------------------------------------------- */
// for ACOUSTIC simulations
@@ -487,11 +691,11 @@
extern "C"
void FC_FUNC_(prepare_fields_acoustic_device,
- PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
- float* rmass_acoustic,
+ PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
+ float* rmass_acoustic,
float* rhostore,
float* kappastore,
- int* num_phase_ispec_acoustic,
+ int* num_phase_ispec_acoustic,
int* phase_ispec_inner_acoustic,
int* ispec_is_acoustic,
int* NOISE_TOMOGRAPHY,
@@ -501,255 +705,281 @@
int* ABSORBING_CONDITIONS,
int* b_reclen_potential,
float* b_absorb_potential,
- int* SIMULATION_TYPE,
- float* rho_ac_kl,
- float* kappa_ac_kl,
int* ELASTIC_SIMULATION,
int* num_coupling_ac_el_faces,
int* coupling_ac_el_ispec,
int* coupling_ac_el_ijk,
float* coupling_ac_el_normal,
- float* coupling_ac_el_jacobian2Dw
+ float* coupling_ac_el_jacobian2Dw
) {
-
+
TRACE("prepare_fields_acoustic_device");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
/* Assuming NGLLX==5. Padded is then 128 (5^3+3) */
int size_padded = 128 * mp->NSPEC_AB;
int size_nonpadded = 125 * mp->NSPEC_AB;
int size = mp->NGLOB_AB;
-
+
// allocates arrays on device (GPU)
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_acoustic),sizeof(float)*size),9001);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_acoustic),sizeof(float)*size),9002);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_dot_acoustic),sizeof(float)*size),9003);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_potential_dot_dot_buffer),sizeof(float)*size),9004);
-
+
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(float)*size),9005);
-
+
// padded array
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rhostore),size_padded*sizeof(float)),9006);
-
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rhostore),size_padded*sizeof(float)),9006);
+
// non-padded array
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_nonpadded*sizeof(float)),9007);
-
- mp->num_phase_ispec_acoustic = *num_phase_ispec_acoustic;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic), mp->num_phase_ispec_acoustic*2*sizeof(int)),9008);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic),mp->NSPEC_AB*sizeof(int)),9009);
-
- // free surface
- if( *NOISE_TOMOGRAPHY == 0 ){
- // allocate surface arrays
- mp->num_free_surface_faces = *num_free_surface_faces;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),mp->num_free_surface_faces*sizeof(int)),9201);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),3*25*mp->num_free_surface_faces*sizeof(int)),9202);
-
- // transfers values onto GPU
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
- mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9203);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
- 3*25*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9204);
- }
-
- // absorbing boundaries
- if( *ABSORBING_CONDITIONS == 1 ){
- mp->d_b_reclen_potential = *b_reclen_potential;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),9301);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential,
- mp->d_b_reclen_potential,cudaMemcpyHostToDevice),9302);
- }
-
- // kernel simulations
- if( *SIMULATION_TYPE == 3 ){
- // allocates backward/reconstructed arrays on device (GPU)
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_acoustic),sizeof(float)*size),9014);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_acoustic),sizeof(float)*size),9015);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_dot_acoustic),sizeof(float)*size),9016);
-
- // allocates kernels
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_ac_kl),125*mp->NSPEC_AB*sizeof(float)),9017);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_ac_kl),125*mp->NSPEC_AB*sizeof(float)),9018);
- // copies over initial values
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_ac_kl,rho_ac_kl,
- 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),9019);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_kappa_ac_kl,kappa_ac_kl,
- 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),9020);
-
- }
-
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_nonpadded*sizeof(float)),9007);
+
// transfer element data
print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_acoustic,rmass_acoustic,
sizeof(float)*size,cudaMemcpyHostToDevice),9100);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic,
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic,
mp->num_phase_ispec_acoustic*2*sizeof(int),cudaMemcpyHostToDevice),9101);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_acoustic,ispec_is_acoustic,
mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),9102);
-
+
print_CUDA_error_if_any(cudaMemcpy(mp->d_kappastore,kappastore,
size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),9105);
-
+
// transfer constant element data with padding
- for(int i=0;i<mp->NSPEC_AB;i++) {
+ for(int i=0; i < mp->NSPEC_AB; i++) {
print_CUDA_error_if_any(cudaMemcpy(mp->d_rhostore+i*128, &rhostore[i*125],
125*sizeof(float),cudaMemcpyHostToDevice),9106);
}
-
+
+ // phase elements
+ mp->num_phase_ispec_acoustic = *num_phase_ispec_acoustic;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic),
+ mp->num_phase_ispec_acoustic*2*sizeof(int)),9008);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic),
+ mp->NSPEC_AB*sizeof(int)),9009);
+
+ // free surface
+ if( *NOISE_TOMOGRAPHY == 0 && *ELASTIC_SIMULATION == 0){
+ // allocate surface arrays
+ mp->num_free_surface_faces = *num_free_surface_faces;
+ if( mp->num_free_surface_faces > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
+ mp->num_free_surface_faces*sizeof(int)),9201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
+ mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9203);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
+ 3*25*mp->num_free_surface_faces*sizeof(int)),9202);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+ 3*25*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9204);
+ }
+ }
+
+ // absorbing boundaries
+ if( *ABSORBING_CONDITIONS ){
+ mp->d_b_reclen_potential = *b_reclen_potential;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),9301);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential,
+ mp->d_b_reclen_potential,cudaMemcpyHostToDevice),9302);
+ }
+
+
// for seismograms
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_potential),
- mp->nrec_local*125*sizeof(float)),9107);
- mp->h_station_seismo_potential = (float*)malloc(mp->nrec_local*125*sizeof(float));
- if( mp->h_station_seismo_potential == NULL) exit_on_error("error allocating h_station_seismo_potential");
+ if( mp->nrec_local > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_potential),
+ mp->nrec_local*125*sizeof(float)),9107);
+ mp->h_station_seismo_potential = (float*)malloc(mp->nrec_local*125*sizeof(float));
+ if( mp->h_station_seismo_potential == NULL) exit_on_error("error allocating h_station_seismo_potential");
+ }
// coupling with elastic parts
- if( *ELASTIC_SIMULATION == 1 && *num_coupling_ac_el_faces > 0 ){
+ if( *ELASTIC_SIMULATION && *num_coupling_ac_el_faces > 0 ){
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ispec),
- (*num_coupling_ac_el_faces)*sizeof(int)),9601);
+ (*num_coupling_ac_el_faces)*sizeof(int)),9601);
print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ispec,coupling_ac_el_ispec,
- (*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),9602);
+ (*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),9602);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ijk),
- 3*25*(*num_coupling_ac_el_faces)*sizeof(int)),9603);
+ 3*25*(*num_coupling_ac_el_faces)*sizeof(int)),9603);
print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ijk,coupling_ac_el_ijk,
- 3*25*(*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),9604);
+ 3*25*(*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),9604);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_normal),
- 3*25*(*num_coupling_ac_el_faces)*sizeof(float)),9605);
+ 3*25*(*num_coupling_ac_el_faces)*sizeof(float)),9605);
print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_normal,coupling_ac_el_normal,
- 3*25*(*num_coupling_ac_el_faces)*sizeof(float),cudaMemcpyHostToDevice),9606);
+ 3*25*(*num_coupling_ac_el_faces)*sizeof(float),cudaMemcpyHostToDevice),9606);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_jacobian2Dw),
- 25*(*num_coupling_ac_el_faces)*sizeof(float)),9607);
+ 25*(*num_coupling_ac_el_faces)*sizeof(float)),9607);
print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_jacobian2Dw,coupling_ac_el_jacobian2Dw,
25*(*num_coupling_ac_el_faces)*sizeof(float),cudaMemcpyHostToDevice),9608);
-
+
}
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_fields_acoustic_device");
-#endif
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_acoustic_device");
+#endif
}
/* ----------------------------------------------------------------------------------------------- */
+extern "C"
+void FC_FUNC_(prepare_fields_acoustic_adj_dev,
+ PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
+ int* SIMULATION_TYPE,
+ float* rho_ac_kl,
+ float* kappa_ac_kl,
+ int* APPROXIMATE_HESS_KL) {
+
+ TRACE("prepare_fields_acoustic_adj_dev");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ int size = mp->NGLOB_AB;
+
+ // kernel simulations
+ if( *SIMULATION_TYPE != 3 ) return;
+
+ // allocates backward/reconstructed arrays on device (GPU)
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_acoustic),sizeof(float)*size),9014);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_acoustic),sizeof(float)*size),9015);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_dot_acoustic),sizeof(float)*size),9016);
+
+ // allocates kernels
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_ac_kl),125*mp->NSPEC_AB*sizeof(float)),9017);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_ac_kl),125*mp->NSPEC_AB*sizeof(float)),9018);
+
+ // copies over initial values
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_ac_kl,rho_ac_kl,
+ 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),9019);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappa_ac_kl,kappa_ac_kl,
+ 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),9020);
+
+
+ if( *APPROXIMATE_HESS_KL ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hess_ac_kl),125*mp->NSPEC_AB*sizeof(float)),9030);
+ // initializes with zeros
+ print_CUDA_error_if_any(cudaMemset(mp->d_hess_ac_kl,0,
+ 125*mp->NSPEC_AB*sizeof(float)),9031);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_acoustic_adj_dev");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
// for ELASTIC simulations
/* ----------------------------------------------------------------------------------------------- */
extern "C"
void FC_FUNC_(prepare_fields_elastic_device,
- PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
+ PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
int* size,
float* rmass,
- float* rho_vp,
+ float* rho_vp,
float* rho_vs,
- int* num_phase_ispec_elastic,
+ int* num_phase_ispec_elastic,
int* phase_ispec_inner_elastic,
int* ispec_is_elastic,
int* ABSORBING_CONDITIONS,
float* h_b_absorb_field,
int* h_b_reclen_field,
- int* SIMULATION_TYPE,
- float* rho_kl,
- float* mu_kl,
- float* kappa_kl,
+ int* SIMULATION_TYPE,int* SAVE_FORWARD,
int* COMPUTE_AND_STORE_STRAIN,
float* epsilondev_xx,float* epsilondev_yy,float* epsilondev_xy,
float* epsilondev_xz,float* epsilondev_yz,
- float* epsilon_trace_over_3,
- float* b_epsilondev_xx,float* b_epsilondev_yy,float* b_epsilondev_xy,
- float* b_epsilondev_xz,float* b_epsilondev_yz,
- float* b_epsilon_trace_over_3,
- int* ATTENUATION, int* R_size,
+ int* ATTENUATION,
+ int* R_size,
float* R_xx,float* R_yy,float* R_xy,float* R_xz,float* R_yz,
- float* b_R_xx,float* b_R_yy,float* b_R_xy,float* b_R_xz,float* b_R_yz,
float* one_minus_sum_beta,float* factor_common,
float* alphaval,float* betaval,float* gammaval,
- float* b_alphaval,float* b_betaval,float* b_gammaval,
- int* OCEANS,float* rmass_ocean_load,
- float* free_surface_normal,int* num_free_surface_faces){
-
+ int* OCEANS,
+ float* rmass_ocean_load,
+ int* NOISE_TOMOGRAPHY,
+ float* free_surface_normal,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* num_free_surface_faces){
+
TRACE("prepare_fields_elastic_device");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
- /* Assuming NGLLX==5. Padded is then 128 (5^3+3) */
+ /* Assuming NGLLX==5. Padded is then 128 (5^3+3) */
//int size_padded = 128 * mp->NSPEC_AB;
int size_nonpadded = 125 * mp->NSPEC_AB;
-
+
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ),sizeof(float)*(*size)),8001);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc),sizeof(float)*(*size)),8002);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel),sizeof(float)*(*size)),8003);
+
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer),sizeof(float)*(*size)),8004);
-
+
+ // mass matrix
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass),sizeof(float)*mp->NGLOB_AB),8005);
-
- // non-padded arrays
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vp),size_nonpadded*sizeof(float)),8006);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vs),size_nonpadded*sizeof(float)),8007);
-
+ // transfer element data
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,rmass,
+ sizeof(float)*mp->NGLOB_AB,cudaMemcpyHostToDevice),8010);
+
+
// element indices
- mp->d_num_phase_ispec_elastic = *num_phase_ispec_elastic;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_elastic),
- mp->d_num_phase_ispec_elastic*2*sizeof(int)),8008);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_elastic),mp->NSPEC_AB*sizeof(int)),8009);
-
- // transfer element data
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,rmass,
- sizeof(float)*mp->NGLOB_AB,cudaMemcpyHostToDevice),8010);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic,
- mp->d_num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),8011);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_elastic,ispec_is_elastic,
mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),8012);
-
- // daniel: not sure if rho_vp, rho_vs needs padding... they are needed for stacey boundary condition
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp, rho_vp,
- size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),8013);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs, rho_vs,
- size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),8014);
-
+
+ // daniel: check
+ //check_ispec_is(Mesh_pointer_f,1);
+
+ // phase elements
+ mp->num_phase_ispec_elastic = *num_phase_ispec_elastic;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_elastic),
+ mp->num_phase_ispec_elastic*2*sizeof(int)),8008);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic,
+ mp->num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),8011);
+
+ //daniel: check
+ //check_phase_ispec(Mesh_pointer_f,1);
+
// for seismograms
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),
+ if( mp->nrec_local > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),
3*125*(mp->nrec_local)*sizeof(float)),8015);
- mp->h_station_seismo_field = (float*)malloc(3*125*(mp->nrec_local)*sizeof(float));
-
+ mp->h_station_seismo_field = (float*)malloc(3*125*(mp->nrec_local)*sizeof(float));
+ }
+
// absorbing conditions
- if( *ABSORBING_CONDITIONS == 1 && mp->d_num_abs_boundary_faces > 0){
- mp->d_b_reclen_field = *h_b_reclen_field;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_field),
+ if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0){
+ // non-padded arrays
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vp),size_nonpadded*sizeof(float)),8006);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vs),size_nonpadded*sizeof(float)),8007);
+
+ // rho_vp, rho_vs non-padded; they are needed for stacey boundary condition
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp, rho_vp,
+ size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),8013);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs, rho_vs,
+ size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),8014);
+
+ // absorb_field array used for file i/o
+ if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD )){
+ mp->d_b_reclen_field = *h_b_reclen_field;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_field),
mp->d_b_reclen_field),8016);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field,
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field,
mp->d_b_reclen_field,cudaMemcpyHostToDevice),8017);
+ }
}
- // kernel simulations
- if( *SIMULATION_TYPE == 3 ){
- // allocates backward/reconstructed arrays on device (GPU)
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ),sizeof(float)*(*size)),8201);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc),sizeof(float)*(*size)),8202);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel),sizeof(float)*(*size)),8203);
-
- // allocates kernels
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl),125*mp->NSPEC_AB*sizeof(float)),8204);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_mu_kl),125*mp->NSPEC_AB*sizeof(float)),8205);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_kl),125*mp->NSPEC_AB*sizeof(float)),8206);
-
- // copies over initial values
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_kl,rho_kl,
- 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8207);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_mu_kl,mu_kl,
- 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8208);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_kappa_kl,kappa_kl,
- 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8209);
-
- }
-
// strains used for attenuation and kernel simulations
- if( *COMPUTE_AND_STORE_STRAIN == 1 ){
+ if( *COMPUTE_AND_STORE_STRAIN ){
// strains
int epsilondev_size = 125*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing
-
+
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xx,
epsilondev_size*sizeof(float)),8301);
print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size*sizeof(float),
@@ -770,45 +1000,11 @@
epsilondev_size*sizeof(float)),8308);
print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size*sizeof(float),
cudaMemcpyHostToDevice),8309);
-
- if( *SIMULATION_TYPE == 3 ){
- // solid pressure
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3),
- 125*mp->NSPEC_AB*sizeof(float)),8310);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3,
- 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8311);
- // backward solid pressure
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3),
- 125*mp->NSPEC_AB*sizeof(float)),8312);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3,
- 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8313);
- // prepares backward strains
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx),
- epsilondev_size*sizeof(float)),8321);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy),
- epsilondev_size*sizeof(float)),8322);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy),
- epsilondev_size*sizeof(float)),8323);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz),
- epsilondev_size*sizeof(float)),8324);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz),
- epsilondev_size*sizeof(float)),8325);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,
- epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8326);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,
- epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8327);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,
- epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8328);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,
- epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8329);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,
- epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8330);
- }
}
-
+
// attenuation memory variables
- if( *ATTENUATION == 1 ){
+ if( *ATTENUATION ){
// memory arrays
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xx),
(*R_size)*sizeof(float)),8401);
@@ -833,39 +1029,13 @@
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yz),
(*R_size)*sizeof(float)),8409);
print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz,R_yz,(*R_size)*sizeof(float),
- cudaMemcpyHostToDevice),8410);
- if( *SIMULATION_TYPE == 3 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xx),
- (*R_size)*sizeof(float)),8421);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xx,b_R_xx,(*R_size)*sizeof(float),
- cudaMemcpyHostToDevice),8422);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yy),
- (*R_size)*sizeof(float)),8423);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yy,b_R_yy,(*R_size)*sizeof(float),
- cudaMemcpyHostToDevice),8424);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xy),
- (*R_size)*sizeof(float)),8425);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xy,b_R_xy,(*R_size)*sizeof(float),
- cudaMemcpyHostToDevice),8426);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xz),
- (*R_size)*sizeof(float)),8427);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xz,b_R_xz,(*R_size)*sizeof(float),
- cudaMemcpyHostToDevice),8428);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yz),
- (*R_size)*sizeof(float)),8429);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yz,b_R_yz,(*R_size)*sizeof(float),
- cudaMemcpyHostToDevice),8420);
- }
-
+ cudaMemcpyHostToDevice),8410);
+
// attenuation factors
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_one_minus_sum_beta),
125*mp->NSPEC_AB*sizeof(float)),8430);
print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta ,one_minus_sum_beta,
- 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8431);
+ 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8431);
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_factor_common),
N_SLS*125*mp->NSPEC_AB*sizeof(float)),8432);
@@ -877,52 +1047,196 @@
N_SLS*sizeof(float)),8434);
print_CUDA_error_if_any(cudaMemcpy(mp->d_alphaval ,alphaval,
N_SLS*sizeof(float),cudaMemcpyHostToDevice),8435);
-
+
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_betaval),
N_SLS*sizeof(float)),8436);
print_CUDA_error_if_any(cudaMemcpy(mp->d_betaval ,betaval,
N_SLS*sizeof(float),cudaMemcpyHostToDevice),8437);
-
+
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_gammaval),
N_SLS*sizeof(float)),8438);
print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaval ,gammaval,
N_SLS*sizeof(float),cudaMemcpyHostToDevice),8439);
-
- if( *SIMULATION_TYPE == 3 ){
- // alpha,beta,gamma factors for backward fields
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_alphaval),
- N_SLS*sizeof(float)),8434);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval,
- N_SLS*sizeof(float),cudaMemcpyHostToDevice),8435);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_betaval),
- N_SLS*sizeof(float)),8436);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval,
- N_SLS*sizeof(float),cudaMemcpyHostToDevice),8437);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_gammaval),
- N_SLS*sizeof(float)),8438);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval,
- N_SLS*sizeof(float),cudaMemcpyHostToDevice),8439);
+
+ }
+
+ if( *OCEANS ){
+ // oceans needs a free surface
+ mp->num_free_surface_faces = *num_free_surface_faces;
+ if( mp->num_free_surface_faces > 0 ){
+ // mass matrix
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load),
+ sizeof(float)*mp->NGLOB_AB),8501);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,rmass_ocean_load,
+ sizeof(float)*mp->NGLOB_AB,cudaMemcpyHostToDevice),8502);
+ // surface normal
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_normal),
+ 3*25*(mp->num_free_surface_faces)*sizeof(float)),8503);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_normal,free_surface_normal,
+ 3*25*(mp->num_free_surface_faces)*sizeof(float),cudaMemcpyHostToDevice),8504);
+
+ // temporary global array: used to synchronize updates on global accel array
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_updated_dof_ocean_load),
+ sizeof(int)*mp->NGLOB_AB),8505);
+
+ if( *NOISE_TOMOGRAPHY == 0){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
+ mp->num_free_surface_faces*sizeof(int)),9201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
+ mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9203);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
+ 3*25*mp->num_free_surface_faces*sizeof(int)),9202);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+ 3*25*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),9204);
+ }
}
}
- if( *OCEANS == 1 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load),sizeof(float)*mp->NGLOB_AB),8501);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,rmass_ocean_load,
- sizeof(float)*mp->NGLOB_AB,cudaMemcpyHostToDevice),8502);
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_elastic_device");
+#endif
+}
- mp->num_free_surface_faces = *num_free_surface_faces;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_normal),
- 3*25*(mp->num_free_surface_faces)*sizeof(float)),8503);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_normal,free_surface_normal,
- 3*25*(mp->num_free_surface_faces)*sizeof(float),cudaMemcpyHostToDevice),8504);
-
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_fields_elastic_adj_dev,
+ PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
+ int* size,
+ int* SIMULATION_TYPE,
+ float* rho_kl,
+ float* mu_kl,
+ float* kappa_kl,
+ int* COMPUTE_AND_STORE_STRAIN,
+ float* epsilon_trace_over_3,
+ float* b_epsilondev_xx,float* b_epsilondev_yy,float* b_epsilondev_xy,
+ float* b_epsilondev_xz,float* b_epsilondev_yz,
+ float* b_epsilon_trace_over_3,
+ int* ATTENUATION,
+ int* R_size,
+ float* b_R_xx,float* b_R_yy,float* b_R_xy,float* b_R_xz,float* b_R_yz,
+ float* b_alphaval,float* b_betaval,float* b_gammaval,
+ int* APPROXIMATE_HESS_KL){
+
+ TRACE("prepare_fields_elastic_adj_dev");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // checks if kernel simulation
+ if( *SIMULATION_TYPE != 3 ) return;
+
+ // kernel simulations
+ // allocates backward/reconstructed arrays on device (GPU)
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ),sizeof(float)*(*size)),8201);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc),sizeof(float)*(*size)),8202);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel),sizeof(float)*(*size)),8203);
+
+ // allocates kernels
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl),125*mp->NSPEC_AB*sizeof(float)),8204);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_mu_kl),125*mp->NSPEC_AB*sizeof(float)),8205);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_kl),125*mp->NSPEC_AB*sizeof(float)),8206);
+
+ // copies over initial values
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_kl,rho_kl,
+ 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8207);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_mu_kl,mu_kl,
+ 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8208);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappa_kl,kappa_kl,
+ 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8209);
+
+ // strains used for attenuation and kernel simulations
+ if( *COMPUTE_AND_STORE_STRAIN ){
+ // strains
+ int epsilondev_size = 125*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing
+
+ // solid pressure
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3),
+ 125*mp->NSPEC_AB*sizeof(float)),8310);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3,
+ 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8311);
+ // backward solid pressure
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3),
+ 125*mp->NSPEC_AB*sizeof(float)),8312);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3,
+ 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),8313);
+ // prepares backward strains
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx),
+ epsilondev_size*sizeof(float)),8321);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy),
+ epsilondev_size*sizeof(float)),8322);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy),
+ epsilondev_size*sizeof(float)),8323);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz),
+ epsilondev_size*sizeof(float)),8324);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz),
+ epsilondev_size*sizeof(float)),8325);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,
+ epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8326);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,
+ epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8327);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,
+ epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8328);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,
+ epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8329);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,
+ epsilondev_size*sizeof(float),cudaMemcpyHostToDevice),8330);
}
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_fields_elastic_device");
-#endif
+
+ // attenuation memory variables
+ if( *ATTENUATION ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xx),
+ (*R_size)*sizeof(float)),8421);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xx,b_R_xx,(*R_size)*sizeof(float),
+ cudaMemcpyHostToDevice),8422);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yy),
+ (*R_size)*sizeof(float)),8423);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yy,b_R_yy,(*R_size)*sizeof(float),
+ cudaMemcpyHostToDevice),8424);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xy),
+ (*R_size)*sizeof(float)),8425);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xy,b_R_xy,(*R_size)*sizeof(float),
+ cudaMemcpyHostToDevice),8426);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xz),
+ (*R_size)*sizeof(float)),8427);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xz,b_R_xz,(*R_size)*sizeof(float),
+ cudaMemcpyHostToDevice),8428);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yz),
+ (*R_size)*sizeof(float)),8429);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yz,b_R_yz,(*R_size)*sizeof(float),
+ cudaMemcpyHostToDevice),8420);
+
+ // alpha,beta,gamma factors for backward fields
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_alphaval),
+ N_SLS*sizeof(float)),8434);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval,
+ N_SLS*sizeof(float),cudaMemcpyHostToDevice),8435);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_betaval),
+ N_SLS*sizeof(float)),8436);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval,
+ N_SLS*sizeof(float),cudaMemcpyHostToDevice),8437);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_gammaval),
+ N_SLS*sizeof(float)),8438);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval,
+ N_SLS*sizeof(float),cudaMemcpyHostToDevice),8439);
+ }
+
+ if( *APPROXIMATE_HESS_KL ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hess_el_kl),125*mp->NSPEC_AB*sizeof(float)),8450);
+ // initializes with zeros
+ print_CUDA_error_if_any(cudaMemset(mp->d_hess_el_kl,0,
+ 125*mp->NSPEC_AB*sizeof(float)),8451);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_elastic_adj_dev");
+#endif
}
@@ -935,11 +1249,11 @@
extern "C"
void FC_FUNC_(prepare_fields_noise_device,
- PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
+ PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
int* NSPEC_AB, int* NGLOB_AB,
- int* free_surface_ispec,int* free_surface_ijk,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
int* num_free_surface_faces,
- int* size_free_surface_ijk,
int* SIMULATION_TYPE,
int* NOISE_TOMOGRAPHY,
int* NSTEP,
@@ -951,36 +1265,36 @@
float* free_surface_jacobian2Dw,
float* Sigma_kl
) {
-
+
TRACE("prepare_fields_noise_device");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-
+
// free surface
mp->num_free_surface_faces = *num_free_surface_faces;
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec,
- *num_free_surface_faces*sizeof(int)),4001);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec,
- *num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4002);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk,
- (*size_free_surface_ijk)*sizeof(int)),4003);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec,
+ mp->num_free_surface_faces*sizeof(int)),4001);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec,
+ mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4002);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk,
+ 3*25*mp->num_free_surface_faces*sizeof(int)),4003);
print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
- (*size_free_surface_ijk)*sizeof(int),cudaMemcpyHostToDevice),4004);
-
+ 3*25*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4004);
+
// alloc storage for the surface buffer to be copied
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie,
- 3*25*(*num_free_surface_faces)*sizeof(float)),4005);
-
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie,
+ 3*25*mp->num_free_surface_faces*sizeof(float)),4005);
+
// prepares noise source array
if( *NOISE_TOMOGRAPHY == 1 ){
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
- 3*125*(*NSTEP)*sizeof(float)),4101);
+ 3*125*(*NSTEP)*sizeof(float)),4101);
print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_sourcearray, noise_sourcearray,
- 3*125*(*NSTEP)*sizeof(float),cudaMemcpyHostToDevice),4102);
+ 3*125*(*NSTEP)*sizeof(float),cudaMemcpyHostToDevice),4102);
}
-
+
// prepares noise directions
if( *NOISE_TOMOGRAPHY > 1 ){
int nface_size = 25*(*num_free_surface_faces);
@@ -991,35 +1305,35 @@
nface_size*sizeof(float)),4302);
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise,
nface_size*sizeof(float)),4303);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise,
- nface_size*sizeof(float)),4304);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise,
+ nface_size*sizeof(float)),4304);
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_free_surface_jacobian2Dw,
nface_size*sizeof(float)),4305);
// transfers data onto GPU
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise,
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise,
nface_size*sizeof(float),cudaMemcpyHostToDevice),4306);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise,
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise,
nface_size*sizeof(float),cudaMemcpyHostToDevice),4307);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise,
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise,
nface_size*sizeof(float),cudaMemcpyHostToDevice),4308);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise,
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise,
nface_size*sizeof(float),cudaMemcpyHostToDevice),4309);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw,
- nface_size*sizeof(float),cudaMemcpyHostToDevice),4310);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw,
+ nface_size*sizeof(float),cudaMemcpyHostToDevice),4310);
}
-
+
// prepares noise strength kernel
if( *NOISE_TOMOGRAPHY == 3 ){
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_Sigma_kl),
125*(mp->NSPEC_AB)*sizeof(float)),4401);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_Sigma_kl, Sigma_kl,
- 125*(mp->NSPEC_AB)*sizeof(float),cudaMemcpyHostToDevice),4403);
- }
-
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_Sigma_kl, Sigma_kl,
+ 125*(mp->NSPEC_AB)*sizeof(float),cudaMemcpyHostToDevice),4403);
+ }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//printf("jacobian_size = %d\n",25*(*num_free_surface_faces));
- exit_on_cuda_error("prepare_fields_noise_device");
-#endif
+ exit_on_cuda_error("prepare_fields_noise_device");
+#endif
}
@@ -1033,19 +1347,22 @@
void FC_FUNC_(prepare_cleanup_device,
PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f,
int* SIMULATION_TYPE,
+ int* SAVE_FORWARD,
int* ACOUSTIC_SIMULATION,
int* ELASTIC_SIMULATION,
int* ABSORBING_CONDITIONS,
int* NOISE_TOMOGRAPHY,
int* COMPUTE_AND_STORE_STRAIN,
- int* ATTENUATION) {
-
+ int* ATTENUATION,
+ int* OCEANS,
+ int* APPROXIMATE_HESS_KL) {
+
TRACE("prepare_cleanup_device");
- // frees allocated memory arrays
+ // frees allocated memory arrays
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
- // frees memory on GPU
+ // frees memory on GPU
// mesh
cudaFree(mp->d_xix);
cudaFree(mp->d_xiy);
@@ -1057,15 +1374,15 @@
cudaFree(mp->d_gammay);
cudaFree(mp->d_gammaz);
cudaFree(mp->d_muv);
-
+
// absorbing boundaries
- if( *ABSORBING_CONDITIONS == 1 && mp->d_num_abs_boundary_faces > 0 ){
+ if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0 ){
cudaFree(mp->d_abs_boundary_ispec);
cudaFree(mp->d_abs_boundary_ijk);
cudaFree(mp->d_abs_boundary_normal);
cudaFree(mp->d_abs_boundary_jacobian2Dw);
}
-
+
// interfaces
cudaFree(mp->d_nibool_interfaces_ext_mesh);
cudaFree(mp->d_ibool_interfaces_ext_mesh);
@@ -1075,20 +1392,20 @@
cudaFree(mp->d_ibool);
// sources
- if (*SIMULATION_TYPE == 1 || *SIMULATION_TYPE == 3){
+ if (*SIMULATION_TYPE == 1 || *SIMULATION_TYPE == 3){
cudaFree(mp->d_sourcearrays);
- cudaFree(mp->d_stf_pre_compute);
+ cudaFree(mp->d_stf_pre_compute);
}
-
+
cudaFree(mp->d_islice_selected_source);
cudaFree(mp->d_ispec_selected_source);
// receivers
- cudaFree(mp->d_number_receiver_global);
+ if( mp->nrec_local > 0 ) cudaFree(mp->d_number_receiver_global);
cudaFree(mp->d_ispec_selected_rec);
// ACOUSTIC arrays
- if( *ACOUSTIC_SIMULATION == 1 ){
+ if( *ACOUSTIC_SIMULATION ){
cudaFree(mp->d_potential_acoustic);
cudaFree(mp->d_potential_dot_acoustic);
cudaFree(mp->d_potential_dot_dot_acoustic);
@@ -1098,42 +1415,54 @@
cudaFree(mp->d_kappastore);
cudaFree(mp->d_phase_ispec_inner_acoustic);
cudaFree(mp->d_ispec_is_acoustic);
-
- if( *NOISE_TOMOGRAPHY == 0 ){
+
+ if( *NOISE_TOMOGRAPHY == 0 ){
cudaFree(mp->d_free_surface_ispec);
cudaFree(mp->d_free_surface_ijk);
}
-
- if( *ABSORBING_CONDITIONS == 1 ) cudaFree(mp->d_b_absorb_potential);
-
+
+ if( *ABSORBING_CONDITIONS ) cudaFree(mp->d_b_absorb_potential);
+
if( *SIMULATION_TYPE == 3 ) {
cudaFree(mp->d_b_potential_acoustic);
cudaFree(mp->d_b_potential_dot_acoustic);
cudaFree(mp->d_b_potential_dot_dot_acoustic);
cudaFree(mp->d_rho_ac_kl);
cudaFree(mp->d_kappa_ac_kl);
+ if( *APPROXIMATE_HESS_KL) cudaFree(mp->d_hess_ac_kl);
}
-
- cudaFree(mp->d_station_seismo_potential);
- free(mp->h_station_seismo_potential);
- }
+ if(mp->nrec_local > 0 ){
+ cudaFree(mp->d_station_seismo_potential);
+ free(mp->h_station_seismo_potential);
+ }
+ } // ACOUSTIC_SIMULATION
+
// ELASTIC arrays
- if( *ELASTIC_SIMULATION == 1 ){
+ if( *ELASTIC_SIMULATION ){
cudaFree(mp->d_displ);
cudaFree(mp->d_veloc);
cudaFree(mp->d_accel);
cudaFree(mp->d_send_accel_buffer);
cudaFree(mp->d_rmass);
- cudaFree(mp->d_rho_vp);
- cudaFree(mp->d_rho_vs);
+
cudaFree(mp->d_phase_ispec_inner_elastic);
cudaFree(mp->d_ispec_is_elastic);
- cudaFree(mp->d_station_seismo_field);
-
- if( *ABSORBING_CONDITIONS == 1 && mp->d_num_abs_boundary_faces > 0) cudaFree(mp->d_b_absorb_field);
+ if( mp->nrec_local > 0 ){
+ cudaFree(mp->d_station_seismo_field);
+ free(mp->h_station_seismo_field);
+ }
+
+ if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0){
+ cudaFree(mp->d_rho_vp);
+ cudaFree(mp->d_rho_vs);
+
+ if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD ))
+ cudaFree(mp->d_b_absorb_field);
+ }
+
if( *SIMULATION_TYPE == 3 ) {
cudaFree(mp->d_b_displ);
cudaFree(mp->d_b_veloc);
@@ -1141,26 +1470,27 @@
cudaFree(mp->d_rho_kl);
cudaFree(mp->d_mu_kl);
cudaFree(mp->d_kappa_kl);
+ if( *APPROXIMATE_HESS_KL ) cudaFree(mp->d_hess_el_kl);
}
- if( *COMPUTE_AND_STORE_STRAIN == 1 ){
+ if( *COMPUTE_AND_STORE_STRAIN ){
cudaFree(mp->d_epsilondev_xx);
cudaFree(mp->d_epsilondev_yy);
cudaFree(mp->d_epsilondev_xy);
cudaFree(mp->d_epsilondev_xz);
cudaFree(mp->d_epsilondev_yz);
- if( *SIMULATION_TYPE == 3 ){
+ if( *SIMULATION_TYPE == 3 ){
cudaFree(mp->d_epsilon_trace_over_3);
cudaFree(mp->d_b_epsilon_trace_over_3);
cudaFree(mp->d_b_epsilondev_xx);
cudaFree(mp->d_b_epsilondev_yy);
cudaFree(mp->d_b_epsilondev_xy);
cudaFree(mp->d_b_epsilondev_xz);
- cudaFree(mp->d_b_epsilondev_yz);
- }
+ cudaFree(mp->d_b_epsilondev_yz);
+ }
}
-
- if( *ATTENUATION == 1 ){
+
+ if( *ATTENUATION ){
cudaFree(mp->d_factor_common);
cudaFree(mp->d_one_minus_sum_beta);
cudaFree(mp->d_alphaval);
@@ -1171,7 +1501,7 @@
cudaFree(mp->d_R_xy);
cudaFree(mp->d_R_xz);
cudaFree(mp->d_R_yz);
- if( *SIMULATION_TYPE == 3){
+ if( *SIMULATION_TYPE == 3){
cudaFree(mp->d_b_R_xx);
cudaFree(mp->d_b_R_yy);
cudaFree(mp->d_b_R_xy);
@@ -1182,13 +1512,25 @@
cudaFree(mp->d_b_gammaval);
}
}
- }
-
+
+ if( *OCEANS ){
+ if( mp->num_free_surface_faces > 0 ){
+ cudaFree(mp->d_rmass_ocean_load);
+ cudaFree(mp->d_free_surface_normal);
+ cudaFree(mp->d_updated_dof_ocean_load);
+ if( *NOISE_TOMOGRAPHY == 0){
+ cudaFree(mp->d_free_surface_ispec);
+ cudaFree(mp->d_free_surface_ijk);
+ }
+ }
+ }
+ } // ELASTIC_SIMULATION
+
// purely adjoint & kernel array
if( *SIMULATION_TYPE == 2 || *SIMULATION_TYPE == 3 ) cudaFree(mp->d_islice_selected_rec);
-
- // NOISE arrays
- if( *NOISE_TOMOGRAPHY > 0 ){
+
+ // NOISE arrays
+ if( *NOISE_TOMOGRAPHY > 0 ){
cudaFree(mp->d_free_surface_ispec);
cudaFree(mp->d_free_surface_ijk);
cudaFree(mp->d_noise_surface_movie);
@@ -1199,10 +1541,11 @@
cudaFree(mp->d_normal_z_noise);
cudaFree(mp->d_mask_noise);
cudaFree(mp->d_free_surface_jacobian2Dw);
- }
+ }
if( *NOISE_TOMOGRAPHY == 3 ) cudaFree(mp->d_Sigma_kl);
}
-
+
// mesh pointer - not needed anymore
free(mp);
-}
\ No newline at end of file
+}
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,7 +1,7 @@
#include "config.h"
#include <stdio.h>
-typedef float real;
+typedef float realw;
/* from check_fields_cuda.cu */
void FC_FUNC_(check_max_norm_displ_gpu,
@@ -33,30 +33,38 @@
void FC_FUNC_(get_max_accel,
GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer){}
-void FC_FUNC_(get_norm_acoustic_from_device_cuda,
- GET_NORM_ACOUSTIC_FROM_DEVICE_CUDA)(float* norm,
+void FC_FUNC_(get_norm_acoustic_from_device,
+ GET_NORM_ACOUSTIC_FROM_DEVICE)(float* norm,
long* Mesh_pointer_f,
int* SIMULATION_TYPE){}
-void FC_FUNC_(get_norm_elastic_from_device_cuda,
- GET_NORM_ELASTIC_FROM_DEVICE_CUDA)(float* norm,
+void FC_FUNC_(get_norm_elastic_from_device,
+ GET_NORM_ELASTIC_FROM_DEVICE)(float* norm,
long* Mesh_pointer_f,
int* SIMULATION_TYPE){}
/* from file compute_add_sources_cuda.cu */
-void FC_FUNC_(add_sourcearrays_adjoint_cuda,
- ADD_SOURCEARRAYS_ADJOINT_CUDA)(long* Mesh_pointer,
- int* USE_FORCE_POINT_SOURCE,
- double* h_stf_pre_compute,int* NSOURCES,
- int* phase_is_inner,int* myrank){}
+void FC_FUNC_(compute_add_sources_el_cuda,
+ COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
+ int* NSPEC_ABf, int* NGLOB_ABf,
+ int* phase_is_innerf,int* NSOURCESf,
+ int* itf, float* dtf, float* t0f,
+ int* SIMULATION_TYPEf,int* NSTEPf,
+ int* NOISE_TOMOGRAPHYf,
+ int* USE_FORCE_POINT_SOURCEf,
+ double* h_stf_pre_compute, int* myrankf){}
-void FC_FUNC_(compute_add_sources_elastic_cuda,
- COMPUTE_ADD_SOURCES_ELASTIC_CUDA)(){}
+void FC_FUNC_(compute_add_sources_el_s3_cuda,
+ COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
+ int* USE_FORCE_POINT_SOURCE,
+ double* h_stf_pre_compute,
+ int* NSOURCES,
+ int* phase_is_inner,int* myrank){}
-void FC_FUNC_(add_source_master_rec_noise_cuda,
- ADD_SOURCE_MASTER_REC_NOISE_CUDA)(long* Mesh_pointer_f,
+void FC_FUNC_(add_source_master_rec_noise_cu,
+ ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
int* myrank_f,
int* it_f,
int* irec_master_noise_f,
@@ -72,8 +80,8 @@
int* h_islice_selected_rec,int* nadj_rec_local,
int* NTSTEP_BETWEEN_READ_ADJSRC){}
-void FC_FUNC_(compute_add_sources_acoustic_cuda,
- COMPUTE_ADD_SOURCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+void FC_FUNC_(compute_add_sources_ac_cuda,
+ COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* NSOURCESf,
int* SIMULATION_TYPEf,
@@ -81,8 +89,8 @@
double* h_stf_pre_compute,
int* myrankf){}
-void FC_FUNC_(compute_add_sources_acoustic_sim3_cuda,
- COMPUTE_ADD_SOURCES_ACOUSTIC_SIM3_CUDA)(long* Mesh_pointer_f,
+void FC_FUNC_(compute_add_sources_ac_s3_cuda,
+ COMPUTE_ADD_SOURCES_AC_S3_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* NSOURCESf,
int* SIMULATION_TYPEf,
@@ -90,8 +98,8 @@
double* h_stf_pre_compute,
int* myrankf){}
-void FC_FUNC_(add_sources_acoustic_sim_type_2_or_3_cuda,
- ADD_SOURCES_ACOUSTIC_SIM_TYPE_2_OR_3_CUDA)(long* Mesh_pointer,
+void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda,
+ ADD_SOURCES_AC_SIM_2_OR_3_CUDA)(long* Mesh_pointer,
float* h_adj_sourcearrays,
int* size_adj_sourcearrays,
int* phase_is_inner,
@@ -104,15 +112,15 @@
/* from compute_coupling_cuda.cu */
-void FC_FUNC_(compute_coupling_acoustic_el_cuda,
- COMPUTE_COUPLING_ACOUSTIC_EL_CUDA)(
+void FC_FUNC_(compute_coupling_ac_el_cuda,
+ COMPUTE_COUPLING_AC_EL_CUDA)(
long* Mesh_pointer_f,
int* phase_is_innerf,
int* num_coupling_ac_el_facesf,
int* SIMULATION_TYPEf){}
-void FC_FUNC_(compute_coupling_elastic_ac_cuda,
- COMPUTE_COUPLING_ELASTIC_AC_CUDA)(
+void FC_FUNC_(compute_coupling_el_ac_cuda,
+ COMPUTE_COUPLING_EL_AC_CUDA)(
long* Mesh_pointer_f,
int* phase_is_innerf,
int* num_coupling_ac_el_facesf,
@@ -120,8 +128,8 @@
/* from compute_forces_acoustic_cuda.cu */
-void FC_FUNC_(transfer_boundary_potential_from_device,
- TRANSFER_BOUNDARY_POTENTIAL_FROM_DEVICE)(
+void FC_FUNC_(transfer_boun_pot_from_device,
+ TRANSFER_BOUN_POT_FROM_DEVICE)(
int* size,
long* Mesh_pointer_f,
float* potential_dot_dot_acoustic,
@@ -132,11 +140,11 @@
int* ibool_interfaces_ext_mesh,
int* FORWARD_OR_ADJOINT){}
-void FC_FUNC_(transfer_and_assemble_potential_to_device,
- TRANSFER_AND_ASSEMBLE_POTENTIAL_TO_DEVICE)(
+void FC_FUNC_(transfer_asmbl_pot_to_device,
+ TRANSFER_ASMBL_POT_TO_DEVICE)(
long* Mesh_pointer,
- real* potential_dot_dot_acoustic,
- real* buffer_recv_scalar_ext_mesh,
+ realw* potential_dot_dot_acoustic,
+ realw* buffer_recv_scalar_ext_mesh,
int* num_interfaces_ext_mesh,
int* max_nibool_interfaces_ext_mesh,
int* nibool_interfaces_ext_mesh,
@@ -162,15 +170,15 @@
int* SIMULATION_TYPE,
float* b_deltatover2_F){}
-void FC_FUNC_(acoustic_enforce_free_surface_cuda,
- ACOUSTIC_ENFORCE_FREE_SURFACE_CUDA)(long* Mesh_pointer_f,
+void FC_FUNC_(acoustic_enforce_free_surf_cuda,
+ ACOUSTIC_ENFORCE_FREE_SURF_CUDA)(long* Mesh_pointer_f,
int* SIMULATION_TYPE,
int* ABSORB_FREE_SURFACE){}
/* from compute_forces_elastic_cuda.cu */
-void FC_FUNC_(transfer_boundary_accel_from_device,
- TRANSFER_BOUNDARY_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, float* accel,
+void FC_FUNC_(transfer_boun_accel_from_device,
+ TRANSFER_BOUN_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, float* accel,
float* send_accel_buffer,
int* num_interfaces_ext_mesh,
int* max_nibool_interfaces_ext_mesh,
@@ -179,12 +187,13 @@
int* FORWARD_OR_ADJOINT){}
-void FC_FUNC_(transfer_and_assemble_accel_to_device,
- TRANSFER_AND_ASSEMBLE_ACCEL_TO_DEVICE)(long* Mesh_pointer, real* accel,
- real* buffer_recv_vector_ext_mesh,
- int* num_interfaces_ext_mesh,
- int* max_nibool_interfaces_ext_mesh,
- int* nibool_interfaces_ext_mesh,
+void FC_FUNC_(transfer_asmbl_accel_to_device,
+ TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer,
+ realw* accel,
+ realw* buffer_recv_vector_ext_mesh,
+ int* num_interfaces_ext_mesh,
+ int* max_nibool_interfaces_ext_mesh,
+ int* nibool_interfaces_ext_mesh,
int* ibool_interfaces_ext_mesh,int* FORWARD_OR_ADJOINT){}
void FC_FUNC_(compute_forces_elastic_cuda,
@@ -192,8 +201,8 @@
int* iphase,
int* nspec_outer_elastic,
int* nspec_inner_elastic,
+ int* SIMULATION_TYPE,
int* COMPUTE_AND_STORE_STRAIN,
- int* SIMULATION_TYPE,
int* ATTENUATION){}
void FC_FUNC_(kernel_3_a_cuda,
@@ -219,10 +228,10 @@
void FC_FUNC_(compute_kernels_elastic_cuda,
COMPUTE_KERNELS_ELASTIC_CUDA)(long* Mesh_pointer,
- float* deltat){}
+ float* deltat_f){}
-void FC_FUNC_(compute_kernels_strength_noise_cuda,
- COMPUTE_KERNELS_STRENGTH_NOISE_CUDA)(long* Mesh_pointer,
+void FC_FUNC_(compute_kernels_strgth_noise_cu,
+ COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
float* h_noise_surface_movie,
int* num_free_surface_faces_f,
float* deltat){}
@@ -230,9 +239,12 @@
void FC_FUNC_(compute_kernels_acoustic_cuda,
COMPUTE_KERNELS_ACOUSTIC_CUDA)(
long* Mesh_pointer,
- float* deltat){}
+ float* deltat_f){}
-
+void FC_FUNC_(compute_kernels_hess_cuda,
+ COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
+ float* deltat_f) {}
+
/* from file compute_stacey_acoustic_cuda.cu */
void FC_FUNC_(compute_stacey_acoustic_cuda,
COMPUTE_STACEY_ACOUSTIC_CUDA)(
@@ -252,10 +264,10 @@
int* SAVE_FORWARDf,
float* h_b_absorb_field){}
-/* from file it_update_displacement_scheme_cuda.cu */
+/* from file it_update_displacement_cuda.cu */
-void FC_FUNC_(it_update_displacement_scheme_cuda,
- IT_UPDATE_DISPLACMENT_SCHEME_CUDA)(long* Mesh_pointer_f,
+void FC_FUNC_(it_update_displacement_cuda,
+ it_update_displacement_cuda)(long* Mesh_pointer_f,
int* size_F,
float* deltat_F,
float* deltatsqover2_F,
@@ -265,8 +277,8 @@
float* b_deltatsqover2_F,
float* b_deltatover2_F){}
-void FC_FUNC_(it_update_displacement_scheme_acoustic_cuda,
- IT_UPDATE_DISPLACEMENT_SCHEME_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+void FC_FUNC_(it_update_displacement_ac_cuda,
+ IT_UPDATE_DISPLACEMENT_AC_CUDA)(long* Mesh_pointer_f,
int* size_F,
float* deltat_F,
float* deltatsqover2_F,
@@ -288,10 +300,15 @@
void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,float* h_displ){}
void FC_FUNC_(transfer_surface_to_host,
- TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,real* h_noise_surface_movie,int* num_free_surface_faces){}
+ TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,
+ realw* h_noise_surface_movie,
+ int* num_free_surface_faces){}
-void FC_FUNC_(noise_read_add_surface_movie_cuda,
- NOISE_READ_ADD_SURFACE_MOVIE_CUDA)(long* Mesh_pointer_f, real* h_noise_surface_movie, int* num_free_surface_faces_f,int* NOISE_TOMOGRAPHYf){}
+void FC_FUNC_(noise_read_add_surface_movie_cu,
+ NOISE_READ_ADD_SURFACE_MOVIE_CU)(long* Mesh_pointer_f,
+ realw* h_noise_surface_movie,
+ int* num_free_surface_faces_f,
+ int* NOISE_TOMOGRAPHYf){}
/* from file prepare_mesh_constants_cuda.cu */
@@ -327,6 +344,7 @@
int* h_num_abs_boundary_faces,
int* h_ispec_is_inner,
int* NSOURCES,
+ int* nsources_local,
float* h_sourcearrays,
int* h_islice_selected_source,
int* h_ispec_selected_source,
@@ -334,17 +352,18 @@
int* h_ispec_selected_rec,
int* nrec_f,
int* nrec_local_f,
- int* SIMULATION_TYPE)
+ int* SIMULATION_TYPE,
+ int* ncuda_devices)
{
fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
exit(1);
}
-void FC_FUNC_(prepare_adjoint_sim2_or_3_constants_device,
- PREPARE_ADJOINT_SIM2_OR_3_CONSTANTS_DEVICE)(
- long* Mesh_pointer_f,
- int* islice_selected_rec,
- int* islice_selected_rec_size){}
+void FC_FUNC_(prepare_sim2_or_3_const_device,
+ PREPARE_SIM2_OR_3_CONST_DEVICE)(
+ long* Mesh_pointer_f,
+ int* islice_selected_rec,
+ int* islice_selected_rec_size){}
void FC_FUNC_(prepare_fields_acoustic_device,
PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
@@ -361,9 +380,6 @@
int* ABSORBING_CONDITIONS,
int* b_reclen_potential,
float* b_absorb_potential,
- int* SIMULATION_TYPE,
- float* rho_ac_kl,
- float* kappa_ac_kl,
int* ELASTIC_SIMULATION,
int* num_coupling_ac_el_faces,
int* coupling_ac_el_ispec,
@@ -371,44 +387,67 @@
float* coupling_ac_el_normal,
float* coupling_ac_el_jacobian2Dw
){}
+
+void FC_FUNC_(prepare_fields_acoustic_adj_dev,
+ PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
+ int* SIMULATION_TYPE,
+ float* rho_ac_kl,
+ float* kappa_ac_kl,
+ int* APPROXIMATE_HESS_KL) {}
void FC_FUNC_(prepare_fields_elastic_device,
- PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
+ PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
int* size,
float* rmass,
- float* rho_vp,
+ float* rho_vp,
float* rho_vs,
- int* num_phase_ispec_elastic,
+ int* num_phase_ispec_elastic,
int* phase_ispec_inner_elastic,
int* ispec_is_elastic,
int* ABSORBING_CONDITIONS,
float* h_b_absorb_field,
int* h_b_reclen_field,
- int* SIMULATION_TYPE,
- float* rho_kl,
- float* mu_kl,
- float* kappa_kl,
+ int* SIMULATION_TYPE,int* SAVE_FORWARD,
int* COMPUTE_AND_STORE_STRAIN,
float* epsilondev_xx,float* epsilondev_yy,float* epsilondev_xy,
float* epsilondev_xz,float* epsilondev_yz,
- float* epsilon_trace_over_3,
- float* b_epsilondev_xx,float* b_epsilondev_yy,float* b_epsilondev_xy,
- float* b_epsilondev_xz,float* b_epsilondev_yz,
- float* b_epsilon_trace_over_3,
- int* ATTENUATION, int* R_size,
+ int* ATTENUATION,
+ int* R_size,
float* R_xx,float* R_yy,float* R_xy,float* R_xz,float* R_yz,
- float* b_R_xx,float* b_R_yy,float* b_R_xy,float* b_R_xz,float* b_R_yz,
float* one_minus_sum_beta,float* factor_common,
float* alphaval,float* betaval,float* gammaval,
- float* b_alphaval,float* b_betaval,float* b_gammaval,
- int* OCEANS,float* rmass_ocean_load,
- float* free_surface_normal,int* num_free_surface_faces){}
+ int* OCEANS,
+ float* rmass_ocean_load,
+ int* NOISE_TOMOGRAPHY,
+ float* free_surface_normal,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* num_free_surface_faces){}
+
+void FC_FUNC_(prepare_fields_elastic_adj_dev,
+ PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
+ int* size,
+ int* SIMULATION_TYPE,
+ float* rho_kl,
+ float* mu_kl,
+ float* kappa_kl,
+ int* COMPUTE_AND_STORE_STRAIN,
+ float* epsilon_trace_over_3,
+ float* b_epsilondev_xx,float* b_epsilondev_yy,float* b_epsilondev_xy,
+ float* b_epsilondev_xz,float* b_epsilondev_yz,
+ float* b_epsilon_trace_over_3,
+ int* ATTENUATION,
+ int* R_size,
+ float* b_R_xx,float* b_R_yy,float* b_R_xy,float* b_R_xz,float* b_R_yz,
+ float* b_alphaval,float* b_betaval,float* b_gammaval,
+ int* APPROXIMATE_HESS_KL){}
+
void FC_FUNC_(prepare_fields_noise_device,
PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
int* NSPEC_AB, int* NGLOB_AB,
- int* free_surface_ispec,int* free_surface_ijk,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
int* num_free_surface_faces,
- int* size_free_surface_ijk,
int* SIMULATION_TYPE,
int* NOISE_TOMOGRAPHY,
int* NSTEP,
@@ -424,28 +463,31 @@
void FC_FUNC_(prepare_cleanup_device,
PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f,
int* SIMULATION_TYPE,
+ int* SAVE_FORWARD,
int* ACOUSTIC_SIMULATION,
int* ELASTIC_SIMULATION,
int* ABSORBING_CONDITIONS,
int* NOISE_TOMOGRAPHY,
int* COMPUTE_AND_STORE_STRAIN,
- int* ATTENUATION){}
+ int* ATTENUATION,
+ int* OCEANS,
+ int* APPROXIMATE_HESS_KL){}
/* from file transfer_fields_cuda.cu */
+void FC_FUNC_(transfer_fields_el_to_device,
+ TRANSFER_FIELDS_EL_TO_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f){}
+
+void FC_FUNC_(transfer_fields_el_from_device,
+ TRANSFER_FIELDS_EL_FROM_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f){}
+
void FC_FUNC_(transfer_b_fields_to_device,
TRANSFER_B_FIELDS_TO_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,
long* Mesh_pointer_f){}
-void FC_FUNC_(transfer_fields_to_device,
- TRANSFER_FIELDS_TO_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f){}
-
void FC_FUNC_(transfer_b_fields_from_device,
TRANSFER_B_FIELDS_FROM_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,long* Mesh_pointer_f){}
-void FC_FUNC_(transfer_fields_from_device,
- TRANSFER_FIELDS_FROM_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f){}
-
void FC_FUNC_(transfer_accel_to_device,
TRNASFER_ACCEL_TO_DEVICE)(int* size, float* accel,long* Mesh_pointer_f){}
@@ -463,12 +505,39 @@
void FC_FUNC_(transfer_displ_from_device,
TRANSFER_DISPL_FROM_DEVICE)(int* size, float* displ,long* Mesh_pointer_f){}
+/*
void FC_FUNC_(transfer_compute_kernel_answers_from_device,
TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
float* rho_kl,int* size_rho,
float* mu_kl, int* size_mu,
float* kappa_kl, int* size_kappa){}
+*/
+/*
+void FC_FUNC_(transfer_compute_kernel_fields_from_device,
+ TRANSFER_COMPUTE_KERNEL_FIELDS_FROM_DEVICE)(long* Mesh_pointer,
+ float* accel, int* size_accel,
+ float* b_displ, int* size_b_displ,
+ float* epsilondev_xx,
+ float* epsilondev_yy,
+ float* epsilondev_xy,
+ float* epsilondev_xz,
+ float* epsilondev_yz,
+ int* size_epsilondev,
+ float* b_epsilondev_xx,
+ float* b_epsilondev_yy,
+ float* b_epsilondev_xy,
+ float* b_epsilondev_xz,
+ float* b_epsilondev_yz,
+ int* size_b_epsilondev,
+ float* rho_kl,int* size_rho,
+ float* mu_kl, int* size_mu,
+ float* kappa_kl, int* size_kappa,
+ float* epsilon_trace_over_3,
+ float* b_epsilon_trace_over_3,
+ int* size_epsilon_trace_over_3) {}
+*/
+
void FC_FUNC_(transfer_b_fields_att_to_device,
TRANSFER_B_FIELDS_ATT_TO_DEVICE)(long* Mesh_pointer,
float* b_R_xx,float* b_R_yy,float* b_R_xy,float* b_R_xz,float* b_R_yz,
@@ -491,85 +560,94 @@
float* epsilondev_yz,
int* size_epsilondev){}
-void FC_FUNC_(transfer_sensitivity_kernels_to_host,
- TRANSFER_SENSITIVITY_KERNELS_TO_HOST)(long* Mesh_pointer,
+void FC_FUNC_(transfer_kernels_el_to_host,
+ TRANSFER_KERNELS_EL_TO_HOST)(long* Mesh_pointer,
float* h_rho_kl,
float* h_mu_kl,
float* h_kappa_kl,
int* NSPEC_AB){}
-void FC_FUNC_(transfer_sensitivity_kernels_noise_to_host,
- TRANSFER_SENSITIVITY_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
- float* h_Sigma_kl,
- int* NSPEC_AB){}
+void FC_FUNC_(transfer_kernels_noise_to_host,
+ TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
+ float* h_Sigma_kl,
+ int* NSPEC_AB){}
-void FC_FUNC_(transfer_fields_acoustic_to_device,
- TRANSFER_FIELDS_ACOUSTIC_TO_DEVICE)(
+void FC_FUNC_(transfer_fields_ac_to_device,
+ TRANSFER_FIELDS_AC_TO_DEVICE)(
int* size,
float* potential_acoustic,
float* potential_dot_acoustic,
float* potential_dot_dot_acoustic,
long* Mesh_pointer_f){}
-void FC_FUNC_(transfer_b_fields_acoustic_to_device,
- TRANSFER_B_FIELDS_ACOUSTIC_TO_DEVICE)(
+void FC_FUNC_(transfer_b_fields_ac_to_device,
+ TRANSFER_B_FIELDS_AC_TO_DEVICE)(
int* size,
float* b_potential_acoustic,
float* b_potential_dot_acoustic,
float* b_potential_dot_dot_acoustic,
long* Mesh_pointer_f){}
-void FC_FUNC_(transfer_fields_acoustic_from_device,TRANSFER_FIELDS_ACOUSTIC_FROM_DEVICE)(
- int* size,
- float* potential_acoustic,
- float* potential_dot_acoustic,
- float* potential_dot_dot_acoustic,
- long* Mesh_pointer_f){}
+void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
+ int* size,
+ float* potential_acoustic,
+ float* potential_dot_acoustic,
+ float* potential_dot_dot_acoustic,
+ long* Mesh_pointer_f){}
-void FC_FUNC_(transfer_b_fields_acoustic_from_device,
- TRANSFER_B_FIELDS_ACOUSTIC_FROM_DEVICE)(
+void FC_FUNC_(transfer_b_fields_ac_from_device,
+ TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
int* size,
float* b_potential_acoustic,
float* b_potential_dot_acoustic,
float* b_potential_dot_dot_acoustic,
long* Mesh_pointer_f){}
-void FC_FUNC_(transfer_potential_dot_dot_from_device,
- TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* potential_dot_dot_acoustic,long* Mesh_pointer_f){}
+void FC_FUNC_(transfer_dot_dot_from_device,
+ TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, float* potential_dot_dot_acoustic,long* Mesh_pointer_f){}
-void FC_FUNC_(transfer_b_potential_dot_dot_from_device,
- TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* b_potential_dot_dot_acoustic,long* Mesh_pointer_f){}
+void FC_FUNC_(transfer_b_dot_dot_from_device,
+ TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, float* b_potential_dot_dot_acoustic,long* Mesh_pointer_f){}
-void FC_FUNC_(transfer_sensitivity_kernels_acoustic_to_host,
- TRANSFER_SENSITIVITY_KERNELS_ACOUSTIC_TO_HOST)(long* Mesh_pointer,
+void FC_FUNC_(transfer_kernels_ac_to_host,
+ TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer,
float* h_rho_ac_kl,
float* h_kappa_ac_kl,
int* NSPEC_AB){}
+void FC_FUNC_(transfer_kernels_hess_el_tohost,
+ TRANSFER_KERNELS_HESS_TO_HOST)(long* Mesh_pointer,
+ float* h_hess_kl,
+ int* NSPEC_AB) {}
+void FC_FUNC_(transfer_kernels_hess_ac_tohost,
+ TRANSFER_KERNELS_HESS_TO_HOST)(long* Mesh_pointer,
+ float* h_hess_ac_kl,
+ int* NSPEC_AB) {}
+
/* from file write_seismograms_cuda.cu */
-void FC_FUNC_(transfer_station_fields_from_device,
- TRANSFER_STATION_FIELDS_FROM_DEVICE)(float* displ,float* veloc,float* accel,
+void FC_FUNC_(transfer_station_el_from_device,
+ TRANSFER_STATION_EL_FROM_DEVICE)(float* displ,float* veloc,float* accel,
float* b_displ, float* b_veloc, float* b_accel,
long* Mesh_pointer_f,int* number_receiver_global,
int* ispec_selected_rec,int* ispec_selected_source,
int* ibool,int* SIMULATION_TYPEf){}
-void FC_FUNC_(transfer_station_fields_acoustic_from_device,
- TRANSFER_STATION_FIELDS_ACOUSTIC_FROM_DEVICE)(
- float* potential_acoustic,
- float* potential_dot_acoustic,
- float* potential_dot_dot_acoustic,
- float* b_potential_acoustic,
- float* b_potential_dot_acoustic,
- float* b_potential_dot_dot_acoustic,
- long* Mesh_pointer_f,
- int* number_receiver_global,
- int* ispec_selected_rec,
- int* ispec_selected_source,
- int* ibool,
- int* SIMULATION_TYPEf){}
+void FC_FUNC_(transfer_station_ac_from_device,
+ TRANSFER_STATION_AC_FROM_DEVICE)(
+ float* potential_acoustic,
+ float* potential_dot_acoustic,
+ float* potential_dot_dot_acoustic,
+ float* b_potential_acoustic,
+ float* b_potential_dot_acoustic,
+ float* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f,
+ int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ispec_selected_source,
+ int* ibool,
+ int* SIMULATION_TYPEf){}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -53,75 +53,75 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_b_fields_to_device,
- TRANSFER_B_FIELDS_TO_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,
- long* Mesh_pointer_f) {
+void FC_FUNC_(transfer_fields_el_to_device,
+ TRANSFER_FIELDS_EL_TO_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
-TRACE("transfer_b_fields_to_device_");
+TRACE("transfer_fields_el_to_device_");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- cudaMemcpy(mp->d_b_displ,b_displ,sizeof(float)*(*size),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_veloc,b_veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_accel,b_accel,sizeof(float)*(*size),cudaMemcpyHostToDevice);
-
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_displ,displ,sizeof(float)*(*size),cudaMemcpyHostToDevice),40003);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_veloc,veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice),40004);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_accel,accel,sizeof(float)*(*size),cudaMemcpyHostToDevice),40005);
+
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_fields_to_device,
- TRANSFER_FIELDS_TO_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
+void FC_FUNC_(transfer_fields_el_from_device,
+ TRANSFER_FIELDS_EL_FROM_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
-TRACE("transfer_fields_to_device_");
-
+ TRACE("transfer_fields_el_from_device_");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- print_CUDA_error_if_any(cudaMemcpy(mp->d_displ,displ,sizeof(float)*(*size),cudaMemcpyHostToDevice),40003);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_veloc,veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice),40004);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_accel,accel,sizeof(float)*(*size),cudaMemcpyHostToDevice),40005);
-
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40006);
+ print_CUDA_error_if_any(cudaMemcpy(veloc,mp->d_veloc,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40007);
+ print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40008);
+
+ // printf("Transfered Fields From Device\n");
+ // int procid;
+ // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ // printf("Quick check of answer for p:%d in transfer_fields_el_from_device\n",procid);
+ // for(int i=0;i<5;i++) {
+ // printf("accel[%d]=%2.20e\n",i,accel[i]);
+ // }
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
+void FC_FUNC_(transfer_b_fields_to_device,
+ TRANSFER_B_FIELDS_TO_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,
+ long* Mesh_pointer_f) {
+
+ TRACE("transfer_b_fields_to_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ cudaMemcpy(mp->d_b_displ,b_displ,sizeof(float)*(*size),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_veloc,b_veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_accel,b_accel,sizeof(float)*(*size),cudaMemcpyHostToDevice);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
void FC_FUNC_(transfer_b_fields_from_device,
TRANSFER_B_FIELDS_FROM_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,long* Mesh_pointer_f) {
TRACE("transfer_b_fields_from_device_");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
cudaMemcpy(b_displ,mp->d_b_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
cudaMemcpy(b_veloc,mp->d_b_veloc,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
cudaMemcpy(b_accel,mp->d_b_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
-
+
}
-/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(transfer_fields_from_device,
- TRANSFER_FIELDS_FROM_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
-
-TRACE("transfer_fields_from_device_");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40006);
- print_CUDA_error_if_any(cudaMemcpy(veloc,mp->d_veloc,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40007);
- print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40008);
-
- // printf("Transfered Fields From Device\n");
- // int procid;
- // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- // printf("Quick check of answer for p:%d in transfer_fields_from_device\n",procid);
- // for(int i=0;i<5;i++) {
- // printf("accel[%d]=%2.20e\n",i,accel[i]);
- // }
-
-}
-
/* ----------------------------------------------------------------------------------------------- */
extern "C"
@@ -129,9 +129,9 @@
TRNASFER_ACCEL_TO_DEVICE)(int* size, float* accel,long* Mesh_pointer_f) {
TRACE("transfer_accel_to_device");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(mp->d_accel,accel,sizeof(float)*(*size),cudaMemcpyHostToDevice),40016);
}
@@ -143,9 +143,9 @@
TRANSFER_ACCEL_FROM_DEVICE)(int* size, float* accel,long* Mesh_pointer_f) {
TRACE("transfer_accel_from_device");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40026);
}
@@ -157,9 +157,9 @@
TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* b_accel,long* Mesh_pointer_f) {
TRACE("transfer_b_accel_from_device");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40036);
}
@@ -171,9 +171,9 @@
TRANSFER_SIGMA_FROM_DEVICE)(int* size, float* sigma_kl,long* Mesh_pointer_f) {
TRACE("transfer_sigma_from_device");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(sigma_kl,mp->d_Sigma_kl,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40046);
}
@@ -185,9 +185,9 @@
TRANSFER_B_DISPL_FROM_DEVICE)(int* size, float* displ,long* Mesh_pointer_f) {
TRACE("transfer_b_displ_from_device");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40056);
}
@@ -199,15 +199,15 @@
TRANSFER_DISPL_FROM_DEVICE)(int* size, float* displ,long* Mesh_pointer_f) {
TRACE("transfer_displ_from_device");
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),40066);
}
/* ----------------------------------------------------------------------------------------------- */
-
+/*
extern "C"
void FC_FUNC_(transfer_compute_kernel_answers_from_device,
TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
@@ -219,9 +219,10 @@
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);
-
+ cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);
+
}
+*/
/* ----------------------------------------------------------------------------------------------- */
/*
@@ -265,12 +266,12 @@
cudaMemcpy(b_epsilondev_yz,mp->d_b_epsilondev_yz,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(epsilon_trace_over_3,mp->d_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(float),
- cudaMemcpyDeviceToHost);
+ cudaMemcpyDeviceToHost);
cudaMemcpy(b_epsilon_trace_over_3,mp->d_b_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(float),
- cudaMemcpyDeviceToHost);
-
+ cudaMemcpyDeviceToHost);
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("after transfer_compute_kernel_fields_from_device");
#endif
@@ -294,21 +295,21 @@
int* size_epsilondev) {
TRACE("transfer_b_fields_att_to_device");
//get mesh pointer out of fortran integer container
- Mesh* mp = (Mesh*)(*Mesh_pointer);
-
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
cudaMemcpy(mp->d_b_R_xx,b_R_xx,*size_R*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(mp->d_b_R_yy,b_R_yy,*size_R*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(mp->d_b_R_xy,b_R_xy,*size_R*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(mp->d_b_R_xz,b_R_xz,*size_R*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(mp->d_b_R_yz,b_R_yz,*size_R*sizeof(float),cudaMemcpyHostToDevice);
-
+
cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,*size_epsilondev*sizeof(float),cudaMemcpyHostToDevice);
-
-
+
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("after transfer_b_fields_att_to_device");
#endif
@@ -331,21 +332,21 @@
int* size_epsilondev) {
TRACE("transfer_fields_att_from_device");
//get mesh pointer out of fortran integer container
- Mesh* mp = (Mesh*)(*Mesh_pointer);
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
cudaMemcpy(R_xx,mp->d_R_xx,*size_R*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(R_yy,mp->d_R_yy,*size_R*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(R_xy,mp->d_R_xy,*size_R*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(R_xz,mp->d_R_xz,*size_R*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(R_yz,mp->d_R_yz,*size_R*sizeof(float),cudaMemcpyDeviceToHost);
-
+
cudaMemcpy(epsilondev_xx,mp->d_epsilondev_xx,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(epsilondev_yy,mp->d_epsilondev_yy,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(epsilondev_xy,mp->d_epsilondev_xy,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(epsilondev_xz,mp->d_epsilondev_xz,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(epsilondev_yz,mp->d_epsilondev_yz,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
-
-
+
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("after transfer_fields_att_from_device");
#endif
@@ -354,24 +355,24 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(transfer_sensitivity_kernels_to_host,
- TRANSFER_SENSITIVITY_KERNELS_TO_HOST)(long* Mesh_pointer,
+extern "C"
+void FC_FUNC_(transfer_kernels_el_to_host,
+ TRANSFER_KERNELS_EL_TO_HOST)(long* Mesh_pointer,
float* h_rho_kl,
- float* h_mu_kl,
+ float* h_mu_kl,
float* h_kappa_kl,
int* NSPEC_AB) {
-TRACE("transfer_sensitivity_kernels_to_host");
+TRACE("transfer_kernels_el_to_host");
//get mesh pointer out of fortran integer container
- Mesh* mp = (Mesh*)(*Mesh_pointer);
-
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
print_CUDA_error_if_any(cudaMemcpy(h_rho_kl,mp->d_rho_kl,*NSPEC_AB*125*sizeof(float),
cudaMemcpyDeviceToHost),40101);
print_CUDA_error_if_any(cudaMemcpy(h_mu_kl,mp->d_mu_kl,*NSPEC_AB*125*sizeof(float),
cudaMemcpyDeviceToHost),40102);
print_CUDA_error_if_any(cudaMemcpy(h_kappa_kl,mp->d_kappa_kl,*NSPEC_AB*125*sizeof(float),
cudaMemcpyDeviceToHost),40103);
-
+
}
/* ----------------------------------------------------------------------------------------------- */
@@ -380,18 +381,18 @@
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(transfer_sensitivity_kernels_noise_to_host,
- TRANSFER_SENSITIVITY_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
+extern "C"
+void FC_FUNC_(transfer_kernels_noise_to_host,
+ TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
float* h_Sigma_kl,
int* NSPEC_AB) {
-TRACE("transfer_sensitivity_kernels_noise_to_host");
-
+TRACE("transfer_kernels_noise_to_host");
+
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(h_Sigma_kl,mp->d_Sigma_kl,125*(*NSPEC_AB)*sizeof(float),
- cudaMemcpyHostToDevice),40201);
-
+ cudaMemcpyDeviceToHost),40201);
+
}
@@ -402,52 +403,52 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_fields_acoustic_to_device,
- TRANSFER_FIELDS_ACOUSTIC_TO_DEVICE)(
- int* size,
- float* potential_acoustic,
- float* potential_dot_acoustic,
+void FC_FUNC_(transfer_fields_ac_to_device,
+ TRANSFER_FIELDS_AC_TO_DEVICE)(
+ int* size,
+ float* potential_acoustic,
+ float* potential_dot_acoustic,
float* potential_dot_dot_acoustic,
long* Mesh_pointer_f) {
-TRACE("transfer_fields_acoustic_to_device");
-
+TRACE("transfer_fields_ac_to_device");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_acoustic,potential_acoustic,
sizeof(float)*(*size),cudaMemcpyHostToDevice),50110);
print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_dot_acoustic,potential_dot_acoustic,
sizeof(float)*(*size),cudaMemcpyHostToDevice),50120);
print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
sizeof(float)*(*size),cudaMemcpyHostToDevice),50130);
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_fields_acoustic_to_device");
-#endif
+ exit_on_cuda_error("after transfer_fields_ac_to_device");
+#endif
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_b_fields_acoustic_to_device,
- TRANSFER_B_FIELDS_ACOUSTIC_TO_DEVICE)(
- int* size,
- float* b_potential_acoustic,
- float* b_potential_dot_acoustic,
+void FC_FUNC_(transfer_b_fields_ac_to_device,
+ TRANSFER_B_FIELDS_AC_TO_DEVICE)(
+ int* size,
+ float* b_potential_acoustic,
+ float* b_potential_dot_acoustic,
float* b_potential_dot_dot_acoustic,
long* Mesh_pointer_f) {
-TRACE("transfer_b_fields_acoustic_to_device");
-
+TRACE("transfer_b_fields_ac_to_device");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_acoustic,b_potential_acoustic,
sizeof(float)*(*size),cudaMemcpyHostToDevice),51110);
print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
sizeof(float)*(*size),cudaMemcpyHostToDevice),51120);
print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
sizeof(float)*(*size),cudaMemcpyHostToDevice),51130);
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_b_fields_acoustic_to_device");
+ exit_on_cuda_error("after transfer_b_fields_ac_to_device");
#endif
}
@@ -455,104 +456,139 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_fields_acoustic_from_device,TRANSFER_FIELDS_ACOUSTIC_FROM_DEVICE)(
- int* size,
- float* potential_acoustic,
- float* potential_dot_acoustic,
+void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
+ int* size,
+ float* potential_acoustic,
+ float* potential_dot_acoustic,
float* potential_dot_dot_acoustic,
long* Mesh_pointer_f) {
-TRACE("transfer_fields_acoustic_from_device");
-
+TRACE("transfer_fields_ac_from_device");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(potential_acoustic,mp->d_potential_acoustic,
sizeof(float)*(*size),cudaMemcpyDeviceToHost),52111);
print_CUDA_error_if_any(cudaMemcpy(potential_dot_acoustic,mp->d_potential_dot_acoustic,
sizeof(float)*(*size),cudaMemcpyDeviceToHost),52121);
print_CUDA_error_if_any(cudaMemcpy(potential_dot_dot_acoustic,mp->d_potential_dot_dot_acoustic,
sizeof(float)*(*size),cudaMemcpyDeviceToHost),52131);
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_fields_acoustic_from_device");
+ exit_on_cuda_error("after transfer_fields_ac_from_device");
#endif
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_b_fields_acoustic_from_device,
- TRANSFER_B_FIELDS_ACOUSTIC_FROM_DEVICE)(
- int* size,
- float* b_potential_acoustic,
- float* b_potential_dot_acoustic,
+void FC_FUNC_(transfer_b_fields_ac_from_device,
+ TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
+ int* size,
+ float* b_potential_acoustic,
+ float* b_potential_dot_acoustic,
float* b_potential_dot_dot_acoustic,
long* Mesh_pointer_f) {
-TRACE("transfer_b_fields_acoustic_from_device");
-
+TRACE("transfer_b_fields_ac_from_device");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(b_potential_acoustic,mp->d_b_potential_acoustic,
sizeof(float)*(*size),cudaMemcpyDeviceToHost),53111);
print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_acoustic,mp->d_b_potential_dot_acoustic,
sizeof(float)*(*size),cudaMemcpyDeviceToHost),53121);
print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_dot_acoustic,mp->d_b_potential_dot_dot_acoustic,
- sizeof(float)*(*size),cudaMemcpyDeviceToHost),53131);
-
+ sizeof(float)*(*size),cudaMemcpyDeviceToHost),53131);
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_b_fields_acoustic_from_device");
+ exit_on_cuda_error("after transfer_b_fields_ac_from_device");
#endif
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_potential_dot_dot_from_device,
- TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* potential_dot_dot_acoustic,long* Mesh_pointer_f) {
-
- TRACE("transfer_potential_dot_dot_from_device");
-
+void FC_FUNC_(transfer_dot_dot_from_device,
+ TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, float* potential_dot_dot_acoustic,long* Mesh_pointer_f) {
+
+ TRACE("transfer_dot_dot_from_device");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(potential_dot_dot_acoustic,mp->d_potential_dot_dot_acoustic,
sizeof(float)*(*size),cudaMemcpyDeviceToHost),50041);
-
+
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_b_potential_dot_dot_from_device,
- TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {
-
- TRACE("transfer_b_potential_dot_dot_from_device");
-
+void FC_FUNC_(transfer_b_dot_dot_from_device,
+ TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, float* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {
+
+ TRACE("transfer_b_dot_dot_from_device");
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
+
print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_dot_acoustic,mp->d_b_potential_dot_dot_acoustic,
sizeof(float)*(*size),cudaMemcpyDeviceToHost),50042);
-
+
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(transfer_sensitivity_kernels_acoustic_to_host,
- TRANSFER_SENSITIVITY_KERNELS_ACOUSTIC_TO_HOST)(long* Mesh_pointer,
+extern "C"
+void FC_FUNC_(transfer_kernels_ac_to_host,
+ TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer,
float* h_rho_ac_kl,
float* h_kappa_ac_kl,
int* NSPEC_AB) {
-
- TRACE("transfer_sensitivity_kernels_acoustic_to_host");
-
- //get mesh pointer out of fortran integer container
- Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+ TRACE("transfer_kernels_ac_to_host");
+
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
int size = *NSPEC_AB*125;
-
+
// copies kernel values over to CPU host
print_CUDA_error_if_any(cudaMemcpy(h_rho_ac_kl,mp->d_rho_ac_kl,size*sizeof(float),
cudaMemcpyDeviceToHost),54101);
print_CUDA_error_if_any(cudaMemcpy(h_kappa_ac_kl,mp->d_kappa_ac_kl,size*sizeof(float),
- cudaMemcpyDeviceToHost),54102);
+ cudaMemcpyDeviceToHost),54102);
}
+/* ----------------------------------------------------------------------------------------------- */
+
+// for Hess kernel calculations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_kernels_hess_el_tohost,
+ TRANSFER_KERNELS_HESS_EL_TOHOST)(long* Mesh_pointer,
+ float* h_hess_kl,
+ int* NSPEC_AB) {
+TRACE("transfer_kernels_hess_el_tohost");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(h_hess_kl,mp->d_hess_el_kl,125*(*NSPEC_AB)*sizeof(float),
+ cudaMemcpyDeviceToHost),70201);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_kernels_hess_ac_tohost,
+ TRANSFER_KERNELS_HESS_AC_TOHOST)(long* Mesh_pointer,
+ float* h_hess_ac_kl,
+ int* NSPEC_AB) {
+ TRACE("transfer_kernels_hess_ac_tohost");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(h_hess_ac_kl,mp->d_hess_ac_kl,125*(*NSPEC_AB)*sizeof(float),
+ cudaMemcpyDeviceToHost),70202);
+}
+
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu 2011-10-30 02:25:28 UTC (rev 19129)
@@ -52,7 +52,7 @@
int blockID = blockIdx.x + blockIdx.y*gridDim.x;
if(blockID<nrec_local) {
//int nodeID = threadIdx.x + blockID*blockDim.x;
- int irec = number_receiver_global[blockID]-1;
+ int irec = number_receiver_global[blockID]-1;
int ispec = ispec_selected_rec[irec]-1; // ispec==0 before -1???
// if(threadIdx.x==1 && blockID < 125) {
// // debug_index[threadIdx.x] = threadIdx.x + 125*ispec;
@@ -79,7 +79,12 @@
int* d_ispec_selected,
int* h_ispec_selected,
int* ibool) {
-
+
+TRACE("transfer_field_from_device");
+
+ // checks if anything to do
+ if( mp->nrec_local == 0 ) return;
+
int blocksize = 125;
int num_blocks_x = mp->nrec_local;
int num_blocks_y = 1;
@@ -89,7 +94,7 @@
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
@@ -98,42 +103,42 @@
//cudaMalloc((void**)&d_debug_index,125*sizeof(int));
//h_debug_index = (int*)calloc(125,sizeof(int));
//cudaMemcpy(d_debug_index,h_debug_index,125*sizeof(int),cudaMemcpyHostToDevice);
-
-
+
+
// prepare field transfer array on device
transfer_stations_fields_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
- d_ispec_selected,
- mp->d_ibool,
- mp->d_station_seismo_field,
- d_field,
- mp->nrec_local,d_debug_index);
+ d_ispec_selected,
+ mp->d_ibool,
+ mp->d_station_seismo_field,
+ d_field,
+ mp->nrec_local,d_debug_index);
//cudaMemcpy(h_debug_index,d_debug_index,125*sizeof(int),cudaMemcpyDeviceToHost);
-
+
// pause_for_debug(1);
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("transfer_stations_fields_from_device_kernel");
#endif
-
+
cudaMemcpy(mp->h_station_seismo_field,mp->d_station_seismo_field,
- (3*125)*(mp->nrec_local)*sizeof(float),cudaMemcpyDeviceToHost);
-
+ (3*125)*(mp->nrec_local)*sizeof(float),cudaMemcpyDeviceToHost);
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("transfer_stations_fields_from_device_kernel_memcpy");
#endif
-
+
// pause_for_debug(1);
int irec_local;
-
+
for(irec_local=0;irec_local<mp->nrec_local;irec_local++) {
- int irec = number_receiver_global[irec_local]-1;
- int ispec = h_ispec_selected[irec]-1;
-
+ int irec = number_receiver_global[irec_local] - 1;
+ int ispec = h_ispec_selected[irec] - 1;
+
for(int i=0;i<125;i++) {
- int iglob = ibool[i+125*ispec]-1;
+ int iglob = ibool[i+125*ispec] - 1;
h_field[0+3*iglob] = mp->h_station_seismo_field[0+3*i+irec_local*125*3];
h_field[1+3*iglob] = mp->h_station_seismo_field[1+3*i+irec_local*125*3];
- h_field[2+3*iglob] = mp->h_station_seismo_field[2+3*i+irec_local*125*3];
+ h_field[2+3*iglob] = mp->h_station_seismo_field[2+3*i+irec_local*125*3];
}
}
@@ -142,44 +147,45 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_station_fields_from_device,
- TRANSFER_STATION_FIELDS_FROM_DEVICE)(float* displ,float* veloc,float* accel,
+void FC_FUNC_(transfer_station_el_from_device,
+ TRANSFER_STATION_EL_FROM_DEVICE)(float* displ,float* veloc,float* accel,
float* b_displ, float* b_veloc, float* b_accel,
long* Mesh_pointer_f,int* number_receiver_global,
int* ispec_selected_rec,int* ispec_selected_source,
int* ibool,int* SIMULATION_TYPEf) {
-TRACE("transfer_station_fields_from_device");
+TRACE("transfer_station_el_from_device");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-
+ // checks if anything to do
+ if( mp->nrec_local == 0 ) return;
int SIMULATION_TYPE = *SIMULATION_TYPEf;
-
+
if(SIMULATION_TYPE == 1) {
transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
}
else if(SIMULATION_TYPE == 2) {
transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
}
else if(SIMULATION_TYPE == 3) {
transfer_field_from_device(mp,mp->d_b_displ,b_displ, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
transfer_field_from_device(mp,mp->d_b_veloc,b_veloc, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
transfer_field_from_device(mp,mp->d_b_accel,b_accel, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
}
-
+
}
/* ----------------------------------------------------------------------------------------------- */
@@ -193,36 +199,36 @@
int* ibool,
float* station_seismo_potential,
float* desired_potential) {
-
+
int blockID = blockIdx.x + blockIdx.y*gridDim.x;
int nodeID = threadIdx.x + blockID*blockDim.x;
-
- int irec = number_receiver_global[blockID]-1;
+
+ int irec = number_receiver_global[blockID]-1;
int ispec = ispec_selected_rec[irec]-1;
int iglob = ibool[threadIdx.x + 125*ispec]-1;
-
+
//if(threadIdx.x == 0 ) printf("node acoustic: %i %i %i %i %i %e \n",blockID,nodeID,irec,ispec,iglob,desired_potential[iglob]);
-
+
station_seismo_potential[nodeID] = desired_potential[iglob];
}
/* ----------------------------------------------------------------------------------------------- */
-void transfer_field_acoustic_from_device(Mesh* mp,
+void transfer_field_acoustic_from_device(Mesh* mp,
float* d_potential,
float* h_potential,
int* number_receiver_global,
int* d_ispec_selected,
int* h_ispec_selected,
int* ibool) {
-
+
TRACE("transfer_field_acoustic_from_device");
-
+
int irec_local,irec,ispec,iglob,j;
-
+
// checks if anything to do
if( mp->nrec_local == 0 ) return;
-
+
// sets up kernel dimensions
int blocksize = 125;
int num_blocks_x = mp->nrec_local;
@@ -231,107 +237,109 @@
num_blocks_x = ceil(num_blocks_x/2.0);
num_blocks_y = num_blocks_y*2;
}
-
+
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
-
+
// prepare field transfer array on device
transfer_stations_fields_acoustic_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
d_ispec_selected,
mp->d_ibool,
mp->d_station_seismo_potential,
d_potential);
-
-
+
+
print_CUDA_error_if_any(cudaMemcpy(mp->h_station_seismo_potential,mp->d_station_seismo_potential,
mp->nrec_local*125*sizeof(float),cudaMemcpyDeviceToHost),500);
-
- //printf("copy local receivers: %i \n",mp->nrec_local);
-
+
+ //printf("copy local receivers: %i \n",mp->nrec_local);
+
for(irec_local=0; irec_local < mp->nrec_local; irec_local++) {
irec = number_receiver_global[irec_local]-1;
ispec = h_ispec_selected[irec]-1;
-
+
// copy element values
// note: iglob may vary and can be irregularly accessing the h_potential array
for(j=0; j < 125; j++){
iglob = ibool[j+125*ispec]-1;
h_potential[iglob] = mp->h_station_seismo_potential[j+irec_local*125];
}
-
+
// copy each station element's points to working array
// note: this works if iglob values would be all aligned...
//memcpy(&(h_potential[iglob]),&(mp->h_station_seismo_potential[irec_local*125]),125*sizeof(float));
-
+
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("transfer_field_acoustic_from_device");
-#endif
+#endif
}
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(transfer_station_fields_acoustic_from_device,
- TRANSFER_STATION_FIELDS_ACOUSTIC_FROM_DEVICE)(
- float* potential_acoustic,
- float* potential_dot_acoustic,
- float* potential_dot_dot_acoustic,
- float* b_potential_acoustic,
- float* b_potential_dot_acoustic,
- float* b_potential_dot_dot_acoustic,
- long* Mesh_pointer_f,
- int* number_receiver_global,
- int* ispec_selected_rec,
- int* ispec_selected_source,
- int* ibool,
- int* SIMULATION_TYPEf) {
-
-TRACE("transfer_station_fields_acoustic_from_device");
+extern "C"
+void FC_FUNC_(transfer_station_ac_from_device,
+ TRANSFER_STATION_AC_FROM_DEVICE)(
+ float* potential_acoustic,
+ float* potential_dot_acoustic,
+ float* potential_dot_dot_acoustic,
+ float* b_potential_acoustic,
+ float* b_potential_dot_acoustic,
+ float* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f,
+ int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ispec_selected_source,
+ int* ibool,
+ int* SIMULATION_TYPEf) {
+
+TRACE("transfer_station_ac_from_device");
//double start_time = get_time();
-
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-
+ // checks if anything to do
+ if( mp->nrec_local == 0 ) return;
+
int SIMULATION_TYPE = *SIMULATION_TYPEf;
-
+
if(SIMULATION_TYPE == 1) {
transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
}
else if(SIMULATION_TYPE == 2) {
- transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
+ transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
number_receiver_global,
mp->d_ispec_selected_source, ispec_selected_source, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
number_receiver_global,
mp->d_ispec_selected_source, ispec_selected_source, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
number_receiver_global,
mp->d_ispec_selected_source, ispec_selected_source, ibool);
}
else if(SIMULATION_TYPE == 3) {
- transfer_field_acoustic_from_device(mp,mp->d_b_potential_acoustic,b_potential_acoustic,
+ transfer_field_acoustic_from_device(mp,mp->d_b_potential_acoustic,b_potential_acoustic,
number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
+ transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
+ transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
number_receiver_global,
mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
- exit_on_cuda_error("transfer_station_fields_acoustic_from_device");
+ exit_on_cuda_error("transfer_station_ac_from_device");
#endif
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/decompose_mesh_SCOTCH.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/decompose_mesh_SCOTCH.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/decompose_mesh_SCOTCH.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -107,6 +107,8 @@
integer :: aniso_flag,idomain_id
double precision :: vp,vs,rho,qmu
+ integer, parameter :: IIN_database = 15
+
contains
!----------------------------------------------------------------------------------------------
@@ -783,8 +785,8 @@
! opens output file
write(prname, "(i6.6,'_Database')") ipart
- open(unit=15,file=outputpath_name(1:len_trim(outputpath_name))//'/proc'//prname,&
- status='unknown', action='write', form='formatted', iostat = ier)
+ open(unit=IIN_database,file=outputpath_name(1:len_trim(outputpath_name))//'/proc'//prname,&
+ status='unknown', action='write', form='unformatted', iostat = ier)
if( ier /= 0 ) then
print*,'error file open:',outputpath_name(1:len_trim(outputpath_name))//'/proc'//prname
print*
@@ -793,34 +795,36 @@
endif
! gets number of nodes
- call write_glob2loc_nodes_database(15, ipart, nnodes_loc, nodes_coords, &
+ call write_glob2loc_nodes_database(IIN_database, ipart, nnodes_loc, nodes_coords, &
glob2loc_nodes_nparts, glob2loc_nodes_parts, &
glob2loc_nodes, nnodes, 1)
! gets number of spectral elements
- call write_partition_database(15, ipart, nspec_loc, nspec, elmnts, &
+ call write_partition_database(IIN_database, ipart, nspec_loc, nspec, elmnts, &
glob2loc_elmnts, glob2loc_nodes_nparts, &
glob2loc_nodes_parts, glob2loc_nodes, part, mat, ngnod, 1)
! writes out node coordinate locations
- write(15,*) nnodes_loc
+ !write(IIN_database,*) nnodes_loc
+ write(IIN_database) nnodes_loc
- call write_glob2loc_nodes_database(15, ipart, nnodes_loc, nodes_coords,&
+ call write_glob2loc_nodes_database(IIN_database, ipart, nnodes_loc, nodes_coords,&
glob2loc_nodes_nparts, glob2loc_nodes_parts, &
glob2loc_nodes, nnodes, 2)
- call write_material_props_database(15,count_def_mat,count_undef_mat, &
+ call write_material_props_database(IIN_database,count_def_mat,count_undef_mat, &
mat_prop, undef_mat_prop)
! writes out spectral element indices
- write(15,*) nspec_loc
+ !write(IIN_database,*) nspec_loc
+ write(IIN_database) nspec_loc
- call write_partition_database(15, ipart, nspec_loc, nspec, elmnts, &
+ call write_partition_database(IIN_database, ipart, nspec_loc, nspec, elmnts, &
glob2loc_elmnts, glob2loc_nodes_nparts, &
glob2loc_nodes_parts, glob2loc_nodes, part, mat, ngnod, 2)
! writes out absorbing/free-surface boundaries
- call write_boundaries_database(15, ipart, nspec, nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, &
+ call write_boundaries_database(IIN_database, ipart, nspec, nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, &
nspec2D_ymax, nspec2D_bottom, nspec2D_top, &
ibelm_xmin, ibelm_xmax, ibelm_ymin, &
ibelm_ymax, ibelm_bottom, ibelm_top, &
@@ -830,26 +834,33 @@
glob2loc_nodes_parts, glob2loc_nodes, part)
! gets number of MPI interfaces
- call Write_interfaces_database(15, tab_interfaces, tab_size_interfaces, ipart, ninterfaces, &
+ call Write_interfaces_database(IIN_database, tab_interfaces, tab_size_interfaces, ipart, ninterfaces, &
my_ninterface, my_interfaces, my_nb_interfaces, &
glob2loc_elmnts, glob2loc_nodes_nparts, glob2loc_nodes_parts, &
glob2loc_nodes, 1, nparts)
! writes out MPI interfaces elements
- write(15,*) my_ninterface, maxval(my_nb_interfaces)
+ !print*,' my interfaces:',my_ninterface,maxval(my_nb_interfaces)
+ if( my_ninterface == 0 ) then
+ !write(IIN_database,*) my_ninterface, 0 ! avoids problem with maxval for empty array my_nb_interfaces
+ write(IIN_database) my_ninterface, 0 ! avoids problem with maxval for empty array my_nb_interfaces
+ else
+ !write(IIN_database,*) my_ninterface, maxval(my_nb_interfaces)
+ write(IIN_database) my_ninterface, maxval(my_nb_interfaces)
+ endif
- call Write_interfaces_database(15, tab_interfaces, tab_size_interfaces, ipart, ninterfaces, &
+ call Write_interfaces_database(IIN_database, tab_interfaces, tab_size_interfaces, ipart, ninterfaces, &
my_ninterface, my_interfaces, my_nb_interfaces, &
glob2loc_elmnts, glob2loc_nodes_nparts, glob2loc_nodes_parts, &
glob2loc_nodes, 2, nparts)
! writes out moho surface (optional)
- call write_moho_surface_database(15, ipart, nspec, &
+ call write_moho_surface_database(IIN_database, ipart, nspec, &
glob2loc_elmnts, glob2loc_nodes_nparts, &
glob2loc_nodes_parts, glob2loc_nodes, part, &
nspec2D_moho,ibelm_moho,nodes_ibelm_moho)
- close(15)
+ close(IIN_database)
end do
print*, 'partitions: '
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/part_decompose_mesh_SCOTCH.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/part_decompose_mesh_SCOTCH.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/part_decompose_mesh_SCOTCH.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -589,7 +589,10 @@
do i = 0, nnodes-1
do j = glob2loc_nodes_nparts(i), glob2loc_nodes_nparts(i+1)-1
if ( glob2loc_nodes_parts(j) == iproc ) then
- write(IIN_database,*) glob2loc_nodes(j)+1, nodes_coords(1,i+1), &
+ !write(IIN_database,*) glob2loc_nodes(j)+1, nodes_coords(1,i+1), &
+ ! nodes_coords(2,i+1), nodes_coords(3,i+1)
+
+ write(IIN_database) glob2loc_nodes(j)+1, nodes_coords(1,i+1), &
nodes_coords(2,i+1), nodes_coords(3,i+1)
end if
end do
@@ -611,7 +614,8 @@
character (len=30), dimension(6,count_undef_mat) :: undef_mat_prop
integer :: i
- write(IIN_database,*) count_def_mat,count_undef_mat
+ !write(IIN_database,*) count_def_mat,count_undef_mat
+ write(IIN_database) count_def_mat,count_undef_mat
do i = 1, count_def_mat
! database material definition
!
@@ -619,13 +623,19 @@
!
! (note that this order of the properties is different than the input in nummaterial_velocity_file)
!
- write(IIN_database,*) mat_prop(1,i), mat_prop(2,i), mat_prop(3,i), &
- mat_prop(4,i), mat_prop(5,i), mat_prop(6,i)
+ !write(IIN_database,*) mat_prop(1,i), mat_prop(2,i), mat_prop(3,i), &
+ ! mat_prop(4,i), mat_prop(5,i), mat_prop(6,i)
+ write(IIN_database) mat_prop(1,i), mat_prop(2,i), mat_prop(3,i), &
+ mat_prop(4,i), mat_prop(5,i), mat_prop(6,i)
end do
do i = 1, count_undef_mat
- write(IIN_database,*) trim(undef_mat_prop(1,i)),' ',trim(undef_mat_prop(2,i)),' ', &
- trim(undef_mat_prop(3,i)),' ',trim(undef_mat_prop(4,i)),' ', &
- trim(undef_mat_prop(5,i)),' ',trim(undef_mat_prop(6,i))
+ !write(IIN_database,*) trim(undef_mat_prop(1,i)),' ',trim(undef_mat_prop(2,i)),' ', &
+ ! trim(undef_mat_prop(3,i)),' ',trim(undef_mat_prop(4,i)),' ', &
+ ! trim(undef_mat_prop(5,i)),' ',trim(undef_mat_prop(6,i))
+
+ write(IIN_database) undef_mat_prop(1,i),undef_mat_prop(2,i), &
+ undef_mat_prop(3,i),undef_mat_prop(4,i), &
+ undef_mat_prop(5,i),undef_mat_prop(6,i)
end do
end subroutine write_material_props_database
@@ -681,42 +691,53 @@
loc_nspec2D_xmin = loc_nspec2D_xmin + 1
end if
end do
- write(IIN_database,*) 1, loc_nspec2D_xmin
+ !write(IIN_database,*) 1, loc_nspec2D_xmin
+ write(IIN_database) 1, loc_nspec2D_xmin
+
loc_nspec2D_xmax = 0
do i=1,nspec2D_xmax
if(part(ibelm_xmax(i)) == iproc) then
loc_nspec2D_xmax = loc_nspec2D_xmax + 1
end if
end do
- write(IIN_database,*) 2, loc_nspec2D_xmax
+ !write(IIN_database,*) 2, loc_nspec2D_xmax
+ write(IIN_database) 2, loc_nspec2D_xmax
+
loc_nspec2D_ymin = 0
do i=1,nspec2D_ymin
if(part(ibelm_ymin(i)) == iproc) then
loc_nspec2D_ymin = loc_nspec2D_ymin + 1
end if
end do
- write(IIN_database,*) 3, loc_nspec2D_ymin
+ !write(IIN_database,*) 3, loc_nspec2D_ymin
+ write(IIN_database) 3, loc_nspec2D_ymin
+
loc_nspec2D_ymax = 0
do i=1,nspec2D_ymax
if(part(ibelm_ymax(i)) == iproc) then
loc_nspec2D_ymax = loc_nspec2D_ymax + 1
end if
end do
- write(IIN_database,*) 4, loc_nspec2D_ymax
+ !write(IIN_database,*) 4, loc_nspec2D_ymax
+ write(IIN_database) 4, loc_nspec2D_ymax
+
loc_nspec2D_bottom = 0
do i=1,nspec2D_bottom
if(part(ibelm_bottom(i)) == iproc) then
loc_nspec2D_bottom = loc_nspec2D_bottom + 1
end if
end do
- write(IIN_database,*) 5, loc_nspec2D_bottom
+ !write(IIN_database,*) 5, loc_nspec2D_bottom
+ write(IIN_database) 5, loc_nspec2D_bottom
+
loc_nspec2D_top = 0
do i=1,nspec2D_top
if(part(ibelm_top(i)) == iproc) then
loc_nspec2D_top = loc_nspec2D_top + 1
end if
end do
- write(IIN_database,*) 6, loc_nspec2D_top
+ !write(IIN_database,*) 6, loc_nspec2D_top
+ write(IIN_database) 6, loc_nspec2D_top
! outputs element index and element node indices
! note: assumes that element indices in ibelm_* arrays are in the range from 1 to nspec
@@ -749,7 +770,10 @@
loc_node4 = glob2loc_nodes(j)+1
end if
end do
- write(IIN_database,*) glob2loc_elmnts(ibelm_xmin(i)-1)+1, &
+ !write(IIN_database,*) glob2loc_elmnts(ibelm_xmin(i)-1)+1, &
+ ! loc_node1, loc_node2, loc_node3, loc_node4
+
+ write(IIN_database) glob2loc_elmnts(ibelm_xmin(i)-1)+1, &
loc_node1, loc_node2, loc_node3, loc_node4
end if
end do
@@ -780,7 +804,10 @@
loc_node4 = glob2loc_nodes(j)+1
end if
end do
- write(IIN_database,*) glob2loc_elmnts(ibelm_xmax(i)-1)+1, &
+ !write(IIN_database,*) glob2loc_elmnts(ibelm_xmax(i)-1)+1, &
+ ! loc_node1, loc_node2, loc_node3, loc_node4
+
+ write(IIN_database) glob2loc_elmnts(ibelm_xmax(i)-1)+1, &
loc_node1, loc_node2, loc_node3, loc_node4
end if
end do
@@ -811,7 +838,10 @@
loc_node4 = glob2loc_nodes(j)+1
end if
end do
- write(IIN_database,*) glob2loc_elmnts(ibelm_ymin(i)-1)+1, &
+ !write(IIN_database,*) glob2loc_elmnts(ibelm_ymin(i)-1)+1, &
+ ! loc_node1, loc_node2, loc_node3, loc_node4
+
+ write(IIN_database) glob2loc_elmnts(ibelm_ymin(i)-1)+1, &
loc_node1, loc_node2, loc_node3, loc_node4
end if
end do
@@ -842,7 +872,10 @@
loc_node4 = glob2loc_nodes(j)+1
end if
end do
- write(IIN_database,*) glob2loc_elmnts(ibelm_ymax(i)-1)+1, &
+ !write(IIN_database,*) glob2loc_elmnts(ibelm_ymax(i)-1)+1, &
+ ! loc_node1, loc_node2, loc_node3, loc_node4
+
+ write(IIN_database) glob2loc_elmnts(ibelm_ymax(i)-1)+1, &
loc_node1, loc_node2, loc_node3, loc_node4
end if
end do
@@ -873,7 +906,8 @@
loc_node4 = glob2loc_nodes(j)+1
end if
end do
- write(IIN_database,*) glob2loc_elmnts(ibelm_bottom(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
+ !write(IIN_database,*) glob2loc_elmnts(ibelm_bottom(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
+ write(IIN_database) glob2loc_elmnts(ibelm_bottom(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
end if
end do
@@ -899,7 +933,8 @@
loc_node4 = glob2loc_nodes(j)+1
end if
end do
- write(IIN_database,*) glob2loc_elmnts(ibelm_top(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
+ !write(IIN_database,*) glob2loc_elmnts(ibelm_top(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
+ write(IIN_database) glob2loc_elmnts(ibelm_top(i)-1)+1, loc_node1, loc_node2, loc_node3, loc_node4
end if
end do
@@ -959,7 +994,10 @@
! format:
! # ispec_local # material_index_1 # material_index_2 # corner_id1 # corner_id2 # ... # corner_id8
- write(IIN_database,*) glob2loc_elmnts(i)+1, num_modele(1,i+1), &
+ !write(IIN_database,*) glob2loc_elmnts(i)+1, num_modele(1,i+1), &
+ ! num_modele(2,i+1),(loc_nodes(k)+1, k=0,ngnod-1)
+
+ write(IIN_database) glob2loc_elmnts(i)+1, num_modele(1,i+1), &
num_modele(2,i+1),(loc_nodes(k)+1, k=0,ngnod-1)
end if
end do
@@ -1033,9 +1071,13 @@
do j = i+1, nparts-1
if ( my_interfaces(num_interface) == 1 ) then
if ( i == iproc ) then
- write(IIN_database,*) j, my_nb_interfaces(num_interface)
+ !write(IIN_database,*) j, my_nb_interfaces(num_interface)
+ write(IIN_database) j, my_nb_interfaces(num_interface)
+
else
- write(IIN_database,*) i, my_nb_interfaces(num_interface)
+ !write(IIN_database,*) i, my_nb_interfaces(num_interface)
+ write(IIN_database) i, my_nb_interfaces(num_interface)
+
end if
count_faces = 0
@@ -1083,8 +1125,11 @@
local_nodes(1) = glob2loc_nodes(l)+1
end if
end do
- write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+ !write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+ ! local_nodes(1), -1, -1, -1
+ write(IIN_database) local_elmnt, tab_interfaces(k*7+2), &
local_nodes(1), -1, -1, -1
+
case (2)
! edge element
do l = glob2loc_nodes_nparts(tab_interfaces(k*7+3)), &
@@ -1099,8 +1144,11 @@
local_nodes(2) = glob2loc_nodes(l)+1
end if
end do
- write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+ !write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+ ! local_nodes(1), local_nodes(2), -1, -1
+ write(IIN_database) local_elmnt, tab_interfaces(k*7+2), &
local_nodes(1), local_nodes(2), -1, -1
+
case (4)
! face element
count_faces = count_faces + 1
@@ -1128,8 +1176,11 @@
local_nodes(4) = glob2loc_nodes(l)+1
end if
end do
- write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+ !write(IIN_database,*) local_elmnt, tab_interfaces(k*7+2), &
+ ! local_nodes(1), local_nodes(2),local_nodes(3), local_nodes(4)
+ write(IIN_database) local_elmnt, tab_interfaces(k*7+2), &
local_nodes(1), local_nodes(2),local_nodes(3), local_nodes(4)
+
case default
print *, "error in write_interfaces_database!", tab_interfaces(k*7+2), iproc
end select
@@ -1188,7 +1239,8 @@
if( loc_nspec2D_moho == 0 ) return
! format: #surface_id, #number of elements
- write(IIN_database,*) 7, loc_nspec2D_moho
+ !write(IIN_database,*) 7, loc_nspec2D_moho
+ write(IIN_database) 7, loc_nspec2D_moho
! outputs element index and element node indices
! note: assumes that element indices in ibelm_* arrays are in the range from 1 to nspec
@@ -1219,7 +1271,9 @@
loc_node4 = glob2loc_nodes(j)+1
end if
end do
- write(IIN_database,*) glob2loc_elmnts(ibelm_moho(i)-1)+1, &
+ !write(IIN_database,*) glob2loc_elmnts(ibelm_moho(i)-1)+1, &
+ ! loc_node1, loc_node2, loc_node3, loc_node4
+ write(IIN_database) glob2loc_elmnts(ibelm_moho(i)-1)+1, &
loc_node1, loc_node2, loc_node3, loc_node4
end if
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/program_decompose_mesh_SCOTCH.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/program_decompose_mesh_SCOTCH.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/decompose_mesh_SCOTCH/program_decompose_mesh_SCOTCH.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -31,6 +31,9 @@
check_valence, &
scotch_partitioning, &
write_mesh_databases
+
+! daniel: ifort
+! USE IFPORT,only: getarg
implicit none
integer :: i
character(len=256) :: arg(3)
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/calc_jacobian.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/calc_jacobian.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/calc_jacobian.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,364 +1,364 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine calc_jacobian(myrank,xixstore,xiystore,xizstore, &
- etaxstore,etaystore,etazstore, &
- gammaxstore,gammaystore,gammazstore,jacobianstore, &
- xstore,ystore,zstore, &
- xelm,yelm,zelm,shape3D,dershape3D,ispec,nspec)
-
- implicit none
-
- include "constants.h"
-
- integer ispec,nspec,myrank
-
- double precision shape3D(NGNOD,NGLLX,NGLLY,NGLLZ)
- double precision dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ)
-
- double precision, dimension(NGNOD) :: xelm,yelm,zelm
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
- xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, &
- gammaxstore,gammaystore,gammazstore,jacobianstore
-
- double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-
- integer i,j,k,ia
- double precision xxi,xeta,xgamma,yxi,yeta,ygamma,zxi,zeta,zgamma
- double precision xmesh,ymesh,zmesh
- double precision xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
- double precision jacobian
-
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
-
- xxi = ZERO
- xeta = ZERO
- xgamma = ZERO
- yxi = ZERO
- yeta = ZERO
- ygamma = ZERO
- zxi = ZERO
- zeta = ZERO
- zgamma = ZERO
- xmesh = ZERO
- ymesh = ZERO
- zmesh = ZERO
-
- do ia=1,NGNOD
- xxi = xxi + dershape3D(1,ia,i,j,k)*xelm(ia)
- xeta = xeta + dershape3D(2,ia,i,j,k)*xelm(ia)
- xgamma = xgamma + dershape3D(3,ia,i,j,k)*xelm(ia)
- yxi = yxi + dershape3D(1,ia,i,j,k)*yelm(ia)
- yeta = yeta + dershape3D(2,ia,i,j,k)*yelm(ia)
- ygamma = ygamma + dershape3D(3,ia,i,j,k)*yelm(ia)
- zxi = zxi + dershape3D(1,ia,i,j,k)*zelm(ia)
- zeta = zeta + dershape3D(2,ia,i,j,k)*zelm(ia)
- zgamma = zgamma + dershape3D(3,ia,i,j,k)*zelm(ia)
- xmesh = xmesh + shape3D(ia,i,j,k)*xelm(ia)
- ymesh = ymesh + shape3D(ia,i,j,k)*yelm(ia)
- zmesh = zmesh + shape3D(ia,i,j,k)*zelm(ia)
- enddo
-
- jacobian = xxi*(yeta*zgamma-ygamma*zeta) - &
- xeta*(yxi*zgamma-ygamma*zxi) + &
- xgamma*(yxi*zeta-yeta*zxi)
-
-! can ignore negative jacobian in mesher if needed when debugging code
- if(jacobian <= ZERO) call exit_MPI(myrank,'3D Jacobian undefined')
-
-! invert the relation (Fletcher p. 50 vol. 2)
- xix = (yeta*zgamma-ygamma*zeta) / jacobian
- xiy = (xgamma*zeta-xeta*zgamma) / jacobian
- xiz = (xeta*ygamma-xgamma*yeta) / jacobian
- etax = (ygamma*zxi-yxi*zgamma) / jacobian
- etay = (xxi*zgamma-xgamma*zxi) / jacobian
- etaz = (xgamma*yxi-xxi*ygamma) / jacobian
- gammax = (yxi*zeta-yeta*zxi) / jacobian
- gammay = (xeta*zxi-xxi*zeta) / jacobian
- gammaz = (xxi*yeta-xeta*yxi) / jacobian
-
-! compute and store the jacobian for the solver
- jacobian = 1. / (xix*(etay*gammaz-etaz*gammay) &
- -xiy*(etax*gammaz-etaz*gammax) &
- +xiz*(etax*gammay-etay*gammax))
-
-! save the derivatives and the jacobian
-
-! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- xixstore(i,j,k,ispec) = sngl(xix)
- xiystore(i,j,k,ispec) = sngl(xiy)
- xizstore(i,j,k,ispec) = sngl(xiz)
- etaxstore(i,j,k,ispec) = sngl(etax)
- etaystore(i,j,k,ispec) = sngl(etay)
- etazstore(i,j,k,ispec) = sngl(etaz)
- gammaxstore(i,j,k,ispec) = sngl(gammax)
- gammaystore(i,j,k,ispec) = sngl(gammay)
- gammazstore(i,j,k,ispec) = sngl(gammaz)
- jacobianstore(i,j,k,ispec) = sngl(jacobian)
- else
- xixstore(i,j,k,ispec) = xix
- xiystore(i,j,k,ispec) = xiy
- xizstore(i,j,k,ispec) = xiz
- etaxstore(i,j,k,ispec) = etax
- etaystore(i,j,k,ispec) = etay
- etazstore(i,j,k,ispec) = etaz
- gammaxstore(i,j,k,ispec) = gammax
- gammaystore(i,j,k,ispec) = gammay
- gammazstore(i,j,k,ispec) = gammaz
- jacobianstore(i,j,k,ispec) = jacobian
- endif
-
- xstore(i,j,k,ispec) = xmesh
- ystore(i,j,k,ispec) = ymesh
- zstore(i,j,k,ispec) = zmesh
-
- enddo
- enddo
- enddo
-
- end subroutine calc_jacobian
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! This subroutine recomputes the 3D jacobian for one element
-! based upon all GLL points
-! Hejun Zhu OCT16,2009
-
-! input: myrank,
-! xstore,ystore,zstore ----- input position
-! xigll,yigll,zigll ----- gll points position
-! ispec,nspec ----- element number
-! ACTUALLY_STORE_ARRAYS ------ save array or not
-
-! output: xixstore,xiystore,xizstore,
-! etaxstore,etaystore,etazstore,
-! gammaxstore,gammaystore,gammazstore ------ parameters used for calculating jacobian
-!
-!
-! subroutine recalc_jacobian_gll3D(myrank,xixstore,xiystore,xizstore, &
-! etaxstore,etaystore,etazstore, &
-! gammaxstore,gammaystore,gammazstore,jacobianstore, &
-! xstore,ystore,zstore, &
-! ispec,nspec, &
-! xigll,yigll,zigll, &
-! ACTUALLY_STORE_ARRAYS)
-!
-! implicit none
-!
-! include "constants.h"
-!
-! ! input parameter
-! integer::myrank,ispec,nspec
-! double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-! double precision, dimension(NGLLX):: xigll
-! double precision, dimension(NGLLY):: yigll
-! double precision, dimension(NGLLZ):: zigll
-! logical::ACTUALLY_STORE_ARRAYS
-!
-!
-! ! output results
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
-! xixstore,xiystore,xizstore,&
-! etaxstore,etaystore,etazstore,&
-! gammaxstore,gammaystore,gammazstore,&
-! jacobianstore
-!
-!
-! ! other parameters for this subroutine
-! integer:: i,j,k,i1,j1,k1
-! double precision:: xxi,xeta,xgamma,yxi,yeta,ygamma,zxi,zeta,zgamma
-! double precision:: xi,eta,gamma
-! double precision,dimension(NGLLX):: hxir,hpxir
-! double precision,dimension(NGLLY):: hetar,hpetar
-! double precision,dimension(NGLLZ):: hgammar,hpgammar
-! double precision:: hlagrange,hlagrange_xi,hlagrange_eta,hlagrange_gamma
-! double precision:: jacobian
-! double precision:: xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-!
-!
-!
-! ! test parameters which can be deleted
-! double precision:: xmesh,ymesh,zmesh
-! double precision:: sumshape,sumdershapexi,sumdershapeeta,sumdershapegamma
-!
-! ! first go over all 125 gll points
-! do k=1,NGLLZ
-! do j=1,NGLLY
-! do i=1,NGLLX
-!
-! xxi = 0.0
-! xeta = 0.0
-! xgamma = 0.0
-! yxi = 0.0
-! yeta = 0.0
-! ygamma = 0.0
-! zxi = 0.0
-! zeta = 0.0
-! zgamma = 0.0
-!
-! xi = xigll(i)
-! eta = yigll(j)
-! gamma = zigll(k)
-!
-! ! calculate lagrange polynomial and its derivative
-! call lagrange_any(xi,NGLLX,xigll,hxir,hpxir)
-! call lagrange_any(eta,NGLLY,yigll,hetar,hpetar)
-! call lagrange_any(gamma,NGLLZ,zigll,hgammar,hpgammar)
-!
-! ! test parameters
-! sumshape = 0.0
-! sumdershapexi = 0.0
-! sumdershapeeta = 0.0
-! sumdershapegamma = 0.0
-! xmesh = 0.0
-! ymesh = 0.0
-! zmesh = 0.0
-!
-!
-! do k1 = 1,NGLLZ
-! do j1 = 1,NGLLY
-! do i1 = 1,NGLLX
-! hlagrange = hxir(i1)*hetar(j1)*hgammar(k1)
-! hlagrange_xi = hpxir(i1)*hetar(j1)*hgammar(k1)
-! hlagrange_eta = hxir(i1)*hpetar(j1)*hgammar(k1)
-! hlagrange_gamma = hxir(i1)*hetar(j1)*hpgammar(k1)
-!
-!
-! xxi = xxi + xstore(i1,j1,k1,ispec)*hlagrange_xi
-! xeta = xeta + xstore(i1,j1,k1,ispec)*hlagrange_eta
-! xgamma = xgamma + xstore(i1,j1,k1,ispec)*hlagrange_gamma
-!
-! yxi = yxi + ystore(i1,j1,k1,ispec)*hlagrange_xi
-! yeta = yeta + ystore(i1,j1,k1,ispec)*hlagrange_eta
-! ygamma = ygamma + ystore(i1,j1,k1,ispec)*hlagrange_gamma
-!
-! zxi = zxi + zstore(i1,j1,k1,ispec)*hlagrange_xi
-! zeta = zeta + zstore(i1,j1,k1,ispec)*hlagrange_eta
-! zgamma = zgamma + zstore(i1,j1,k1,ispec)*hlagrange_gamma
-!
-! ! test the lagrange polynomial and its derivate
-! xmesh = xmesh + xstore(i1,j1,k1,ispec)*hlagrange
-! ymesh = ymesh + ystore(i1,j1,k1,ispec)*hlagrange
-! zmesh = zmesh + zstore(i1,j1,k1,ispec)*hlagrange
-! sumshape = sumshape + hlagrange
-! sumdershapexi = sumdershapexi + hlagrange_xi
-! sumdershapeeta = sumdershapeeta + hlagrange_eta
-! sumdershapegamma = sumdershapegamma + hlagrange_gamma
-!
-! end do
-! end do
-! end do
-!
-! ! Check the lagrange polynomial and its derivative
-! if (xmesh /=xstore(i,j,k,ispec).or.ymesh/=ystore(i,j,k,ispec).or.zmesh/=zstore(i,j,k,ispec)) then
-! call exit_MPI(myrank,'new mesh positions are wrong in recalc_jacobian_gall3D.f90')
-! end if
-! if(abs(sumshape-one) > TINYVAL) then
-! call exit_MPI(myrank,'error shape functions in recalc_jacobian_gll3D.f90')
-! end if
-! if(abs(sumdershapexi) > TINYVAL) then
-! call exit_MPI(myrank,'error derivative xi shape functions in recalc_jacobian_gll3D.f90')
-! end if
-! if(abs(sumdershapeeta) > TINYVAL) then
-! call exit_MPI(myrank,'error derivative eta shape functions in recalc_jacobian_gll3D.f90')
-! end if
-! if(abs(sumdershapegamma) > TINYVAL) then
-! call exit_MPI(myrank,'error derivative gamma shape functions in recalc_jacobian_gll3D.f90')
-! end if
-!
-!
-! jacobian = xxi*(yeta*zgamma-ygamma*zeta) - &
-! xeta*(yxi*zgamma-ygamma*zxi) + &
-! xgamma*(yxi*zeta-yeta*zxi)
-!
-! ! Check the jacobian
-! if(jacobian <= ZERO) then
-! call exit_MPI(myrank,'3D Jacobian undefined in recalc_jacobian_gll3D.f90')
-! end if
-!
-! ! invert the relation (Fletcher p. 50 vol. 2)
-! xix = (yeta*zgamma-ygamma*zeta) / jacobian
-! xiy = (xgamma*zeta-xeta*zgamma) / jacobian
-! xiz = (xeta*ygamma-xgamma*yeta) / jacobian
-! etax = (ygamma*zxi-yxi*zgamma) / jacobian
-! etay = (xxi*zgamma-xgamma*zxi) / jacobian
-! etaz = (xgamma*yxi-xxi*ygamma) / jacobian
-! gammax = (yxi*zeta-yeta*zxi) / jacobian
-! gammay = (xeta*zxi-xxi*zeta) / jacobian
-! gammaz = (xxi*yeta-xeta*yxi) / jacobian
-!
-!
-! ! compute and store the jacobian for the solver
-! jacobian = 1. / (xix*(etay*gammaz-etaz*gammay) &
-! -xiy*(etax*gammaz-etaz*gammax) &
-! +xiz*(etax*gammay-etay*gammax))
-!
-! ! resave the derivatives and the jacobian
-! ! distinguish between single and double precision for reals
-! if (ACTUALLY_STORE_ARRAYS) then
-!
-! if (myrank == 0) then
-! print*,'xix before',xixstore(i,j,k,ispec),'after',xix
-! print*,'etax before',etaxstore(i,j,k,ispec),'after',etax
-! print*,'gammax before',gammaxstore(i,j,k,ispec),'after',gammax
-! end if
-!
-! if(CUSTOM_REAL == SIZE_REAL) then
-! xixstore(i,j,k,ispec) = sngl(xix)
-! xiystore(i,j,k,ispec) = sngl(xiy)
-! xizstore(i,j,k,ispec) = sngl(xiz)
-! etaxstore(i,j,k,ispec) = sngl(etax)
-! etaystore(i,j,k,ispec) = sngl(etay)
-! etazstore(i,j,k,ispec) = sngl(etaz)
-! gammaxstore(i,j,k,ispec) = sngl(gammax)
-! gammaystore(i,j,k,ispec) = sngl(gammay)
-! gammazstore(i,j,k,ispec) = sngl(gammaz)
-! jacobianstore(i,j,k,ispec) = sngl(jacobian)
-! else
-! xixstore(i,j,k,ispec) = xix
-! xiystore(i,j,k,ispec) = xiy
-! xizstore(i,j,k,ispec) = xiz
-! etaxstore(i,j,k,ispec) = etax
-! etaystore(i,j,k,ispec) = etay
-! etazstore(i,j,k,ispec) = etaz
-! gammaxstore(i,j,k,ispec) = gammax
-! gammaystore(i,j,k,ispec) = gammay
-! gammazstore(i,j,k,ispec) = gammaz
-! jacobianstore(i,j,k,ispec) = jacobian
-! endif
-! end if
-! enddo
-! enddo
-! enddo
-!
-! end subroutine recalc_jacobian_gll3D
-!
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine calc_jacobian(myrank,xixstore,xiystore,xizstore, &
+ etaxstore,etaystore,etazstore, &
+ gammaxstore,gammaystore,gammazstore,jacobianstore, &
+ xstore,ystore,zstore, &
+ xelm,yelm,zelm,shape3D,dershape3D,ispec,nspec)
+
+ implicit none
+
+ include "constants.h"
+
+ integer ispec,nspec,myrank
+
+ double precision shape3D(NGNOD,NGLLX,NGLLY,NGLLZ)
+ double precision dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ)
+
+ double precision, dimension(NGNOD) :: xelm,yelm,zelm
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
+ xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, &
+ gammaxstore,gammaystore,gammazstore,jacobianstore
+
+ double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+
+ integer i,j,k,ia
+ double precision xxi,xeta,xgamma,yxi,yeta,ygamma,zxi,zeta,zgamma
+ double precision xmesh,ymesh,zmesh
+ double precision xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+ double precision jacobian
+
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+
+ xxi = ZERO
+ xeta = ZERO
+ xgamma = ZERO
+ yxi = ZERO
+ yeta = ZERO
+ ygamma = ZERO
+ zxi = ZERO
+ zeta = ZERO
+ zgamma = ZERO
+ xmesh = ZERO
+ ymesh = ZERO
+ zmesh = ZERO
+
+ do ia=1,NGNOD
+ xxi = xxi + dershape3D(1,ia,i,j,k)*xelm(ia)
+ xeta = xeta + dershape3D(2,ia,i,j,k)*xelm(ia)
+ xgamma = xgamma + dershape3D(3,ia,i,j,k)*xelm(ia)
+ yxi = yxi + dershape3D(1,ia,i,j,k)*yelm(ia)
+ yeta = yeta + dershape3D(2,ia,i,j,k)*yelm(ia)
+ ygamma = ygamma + dershape3D(3,ia,i,j,k)*yelm(ia)
+ zxi = zxi + dershape3D(1,ia,i,j,k)*zelm(ia)
+ zeta = zeta + dershape3D(2,ia,i,j,k)*zelm(ia)
+ zgamma = zgamma + dershape3D(3,ia,i,j,k)*zelm(ia)
+ xmesh = xmesh + shape3D(ia,i,j,k)*xelm(ia)
+ ymesh = ymesh + shape3D(ia,i,j,k)*yelm(ia)
+ zmesh = zmesh + shape3D(ia,i,j,k)*zelm(ia)
+ enddo
+
+ jacobian = xxi*(yeta*zgamma-ygamma*zeta) - &
+ xeta*(yxi*zgamma-ygamma*zxi) + &
+ xgamma*(yxi*zeta-yeta*zxi)
+
+! can ignore negative jacobian in mesher if needed when debugging code
+ if(jacobian <= ZERO) call exit_MPI(myrank,'3D Jacobian undefined')
+
+! invert the relation (Fletcher p. 50 vol. 2)
+ xix = (yeta*zgamma-ygamma*zeta) / jacobian
+ xiy = (xgamma*zeta-xeta*zgamma) / jacobian
+ xiz = (xeta*ygamma-xgamma*yeta) / jacobian
+ etax = (ygamma*zxi-yxi*zgamma) / jacobian
+ etay = (xxi*zgamma-xgamma*zxi) / jacobian
+ etaz = (xgamma*yxi-xxi*ygamma) / jacobian
+ gammax = (yxi*zeta-yeta*zxi) / jacobian
+ gammay = (xeta*zxi-xxi*zeta) / jacobian
+ gammaz = (xxi*yeta-xeta*yxi) / jacobian
+
+! compute and store the jacobian for the solver
+ jacobian = 1. / (xix*(etay*gammaz-etaz*gammay) &
+ -xiy*(etax*gammaz-etaz*gammax) &
+ +xiz*(etax*gammay-etay*gammax))
+
+! save the derivatives and the jacobian
+
+! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ xixstore(i,j,k,ispec) = sngl(xix)
+ xiystore(i,j,k,ispec) = sngl(xiy)
+ xizstore(i,j,k,ispec) = sngl(xiz)
+ etaxstore(i,j,k,ispec) = sngl(etax)
+ etaystore(i,j,k,ispec) = sngl(etay)
+ etazstore(i,j,k,ispec) = sngl(etaz)
+ gammaxstore(i,j,k,ispec) = sngl(gammax)
+ gammaystore(i,j,k,ispec) = sngl(gammay)
+ gammazstore(i,j,k,ispec) = sngl(gammaz)
+ jacobianstore(i,j,k,ispec) = sngl(jacobian)
+ else
+ xixstore(i,j,k,ispec) = xix
+ xiystore(i,j,k,ispec) = xiy
+ xizstore(i,j,k,ispec) = xiz
+ etaxstore(i,j,k,ispec) = etax
+ etaystore(i,j,k,ispec) = etay
+ etazstore(i,j,k,ispec) = etaz
+ gammaxstore(i,j,k,ispec) = gammax
+ gammaystore(i,j,k,ispec) = gammay
+ gammazstore(i,j,k,ispec) = gammaz
+ jacobianstore(i,j,k,ispec) = jacobian
+ endif
+
+ xstore(i,j,k,ispec) = xmesh
+ ystore(i,j,k,ispec) = ymesh
+ zstore(i,j,k,ispec) = zmesh
+
+ enddo
+ enddo
+ enddo
+
+ end subroutine calc_jacobian
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! This subroutine recomputes the 3D jacobian for one element
+! based upon all GLL points
+! Hejun Zhu OCT16,2009
+
+! input: myrank,
+! xstore,ystore,zstore ----- input position
+! xigll,yigll,zigll ----- gll points position
+! ispec,nspec ----- element number
+! ACTUALLY_STORE_ARRAYS ------ save array or not
+
+! output: xixstore,xiystore,xizstore,
+! etaxstore,etaystore,etazstore,
+! gammaxstore,gammaystore,gammazstore ------ parameters used for calculating jacobian
+!
+!
+! subroutine recalc_jacobian_gll3D(myrank,xixstore,xiystore,xizstore, &
+! etaxstore,etaystore,etazstore, &
+! gammaxstore,gammaystore,gammazstore,jacobianstore, &
+! xstore,ystore,zstore, &
+! ispec,nspec, &
+! xigll,yigll,zigll, &
+! ACTUALLY_STORE_ARRAYS)
+!
+! implicit none
+!
+! include "constants.h"
+!
+! ! input parameter
+! integer::myrank,ispec,nspec
+! double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+! double precision, dimension(NGLLX):: xigll
+! double precision, dimension(NGLLY):: yigll
+! double precision, dimension(NGLLZ):: zigll
+! logical::ACTUALLY_STORE_ARRAYS
+!
+!
+! ! output results
+! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
+! xixstore,xiystore,xizstore,&
+! etaxstore,etaystore,etazstore,&
+! gammaxstore,gammaystore,gammazstore,&
+! jacobianstore
+!
+!
+! ! other parameters for this subroutine
+! integer:: i,j,k,i1,j1,k1
+! double precision:: xxi,xeta,xgamma,yxi,yeta,ygamma,zxi,zeta,zgamma
+! double precision:: xi,eta,gamma
+! double precision,dimension(NGLLX):: hxir,hpxir
+! double precision,dimension(NGLLY):: hetar,hpetar
+! double precision,dimension(NGLLZ):: hgammar,hpgammar
+! double precision:: hlagrange,hlagrange_xi,hlagrange_eta,hlagrange_gamma
+! double precision:: jacobian
+! double precision:: xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+!
+!
+!
+! ! test parameters which can be deleted
+! double precision:: xmesh,ymesh,zmesh
+! double precision:: sumshape,sumdershapexi,sumdershapeeta,sumdershapegamma
+!
+! ! first go over all 125 gll points
+! do k=1,NGLLZ
+! do j=1,NGLLY
+! do i=1,NGLLX
+!
+! xxi = 0.0
+! xeta = 0.0
+! xgamma = 0.0
+! yxi = 0.0
+! yeta = 0.0
+! ygamma = 0.0
+! zxi = 0.0
+! zeta = 0.0
+! zgamma = 0.0
+!
+! xi = xigll(i)
+! eta = yigll(j)
+! gamma = zigll(k)
+!
+! ! calculate lagrange polynomial and its derivative
+! call lagrange_any(xi,NGLLX,xigll,hxir,hpxir)
+! call lagrange_any(eta,NGLLY,yigll,hetar,hpetar)
+! call lagrange_any(gamma,NGLLZ,zigll,hgammar,hpgammar)
+!
+! ! test parameters
+! sumshape = 0.0
+! sumdershapexi = 0.0
+! sumdershapeeta = 0.0
+! sumdershapegamma = 0.0
+! xmesh = 0.0
+! ymesh = 0.0
+! zmesh = 0.0
+!
+!
+! do k1 = 1,NGLLZ
+! do j1 = 1,NGLLY
+! do i1 = 1,NGLLX
+! hlagrange = hxir(i1)*hetar(j1)*hgammar(k1)
+! hlagrange_xi = hpxir(i1)*hetar(j1)*hgammar(k1)
+! hlagrange_eta = hxir(i1)*hpetar(j1)*hgammar(k1)
+! hlagrange_gamma = hxir(i1)*hetar(j1)*hpgammar(k1)
+!
+!
+! xxi = xxi + xstore(i1,j1,k1,ispec)*hlagrange_xi
+! xeta = xeta + xstore(i1,j1,k1,ispec)*hlagrange_eta
+! xgamma = xgamma + xstore(i1,j1,k1,ispec)*hlagrange_gamma
+!
+! yxi = yxi + ystore(i1,j1,k1,ispec)*hlagrange_xi
+! yeta = yeta + ystore(i1,j1,k1,ispec)*hlagrange_eta
+! ygamma = ygamma + ystore(i1,j1,k1,ispec)*hlagrange_gamma
+!
+! zxi = zxi + zstore(i1,j1,k1,ispec)*hlagrange_xi
+! zeta = zeta + zstore(i1,j1,k1,ispec)*hlagrange_eta
+! zgamma = zgamma + zstore(i1,j1,k1,ispec)*hlagrange_gamma
+!
+! ! test the lagrange polynomial and its derivate
+! xmesh = xmesh + xstore(i1,j1,k1,ispec)*hlagrange
+! ymesh = ymesh + ystore(i1,j1,k1,ispec)*hlagrange
+! zmesh = zmesh + zstore(i1,j1,k1,ispec)*hlagrange
+! sumshape = sumshape + hlagrange
+! sumdershapexi = sumdershapexi + hlagrange_xi
+! sumdershapeeta = sumdershapeeta + hlagrange_eta
+! sumdershapegamma = sumdershapegamma + hlagrange_gamma
+!
+! end do
+! end do
+! end do
+!
+! ! Check the lagrange polynomial and its derivative
+! if (xmesh /=xstore(i,j,k,ispec).or.ymesh/=ystore(i,j,k,ispec).or.zmesh/=zstore(i,j,k,ispec)) then
+! call exit_MPI(myrank,'new mesh positions are wrong in recalc_jacobian_gall3D.f90')
+! end if
+! if(abs(sumshape-one) > TINYVAL) then
+! call exit_MPI(myrank,'error shape functions in recalc_jacobian_gll3D.f90')
+! end if
+! if(abs(sumdershapexi) > TINYVAL) then
+! call exit_MPI(myrank,'error derivative xi shape functions in recalc_jacobian_gll3D.f90')
+! end if
+! if(abs(sumdershapeeta) > TINYVAL) then
+! call exit_MPI(myrank,'error derivative eta shape functions in recalc_jacobian_gll3D.f90')
+! end if
+! if(abs(sumdershapegamma) > TINYVAL) then
+! call exit_MPI(myrank,'error derivative gamma shape functions in recalc_jacobian_gll3D.f90')
+! end if
+!
+!
+! jacobian = xxi*(yeta*zgamma-ygamma*zeta) - &
+! xeta*(yxi*zgamma-ygamma*zxi) + &
+! xgamma*(yxi*zeta-yeta*zxi)
+!
+! ! Check the jacobian
+! if(jacobian <= ZERO) then
+! call exit_MPI(myrank,'3D Jacobian undefined in recalc_jacobian_gll3D.f90')
+! end if
+!
+! ! invert the relation (Fletcher p. 50 vol. 2)
+! xix = (yeta*zgamma-ygamma*zeta) / jacobian
+! xiy = (xgamma*zeta-xeta*zgamma) / jacobian
+! xiz = (xeta*ygamma-xgamma*yeta) / jacobian
+! etax = (ygamma*zxi-yxi*zgamma) / jacobian
+! etay = (xxi*zgamma-xgamma*zxi) / jacobian
+! etaz = (xgamma*yxi-xxi*ygamma) / jacobian
+! gammax = (yxi*zeta-yeta*zxi) / jacobian
+! gammay = (xeta*zxi-xxi*zeta) / jacobian
+! gammaz = (xxi*yeta-xeta*yxi) / jacobian
+!
+!
+! ! compute and store the jacobian for the solver
+! jacobian = 1. / (xix*(etay*gammaz-etaz*gammay) &
+! -xiy*(etax*gammaz-etaz*gammax) &
+! +xiz*(etax*gammay-etay*gammax))
+!
+! ! resave the derivatives and the jacobian
+! ! distinguish between single and double precision for reals
+! if (ACTUALLY_STORE_ARRAYS) then
+!
+! if (myrank == 0) then
+! print*,'xix before',xixstore(i,j,k,ispec),'after',xix
+! print*,'etax before',etaxstore(i,j,k,ispec),'after',etax
+! print*,'gammax before',gammaxstore(i,j,k,ispec),'after',gammax
+! end if
+!
+! if(CUSTOM_REAL == SIZE_REAL) then
+! xixstore(i,j,k,ispec) = sngl(xix)
+! xiystore(i,j,k,ispec) = sngl(xiy)
+! xizstore(i,j,k,ispec) = sngl(xiz)
+! etaxstore(i,j,k,ispec) = sngl(etax)
+! etaystore(i,j,k,ispec) = sngl(etay)
+! etazstore(i,j,k,ispec) = sngl(etaz)
+! gammaxstore(i,j,k,ispec) = sngl(gammax)
+! gammaystore(i,j,k,ispec) = sngl(gammay)
+! gammazstore(i,j,k,ispec) = sngl(gammaz)
+! jacobianstore(i,j,k,ispec) = sngl(jacobian)
+! else
+! xixstore(i,j,k,ispec) = xix
+! xiystore(i,j,k,ispec) = xiy
+! xizstore(i,j,k,ispec) = xiz
+! etaxstore(i,j,k,ispec) = etax
+! etaystore(i,j,k,ispec) = etay
+! etazstore(i,j,k,ispec) = etaz
+! gammaxstore(i,j,k,ispec) = gammax
+! gammaystore(i,j,k,ispec) = gammay
+! gammazstore(i,j,k,ispec) = gammaz
+! jacobianstore(i,j,k,ispec) = jacobian
+! endif
+! end if
+! enddo
+! enddo
+! enddo
+!
+! end subroutine recalc_jacobian_gll3D
+!
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,1091 +1,1094 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-module create_regions_mesh_ext_par
-
- include 'constants.h'
-
-! global point coordinates
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: xstore_dummy
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: ystore_dummy
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: zstore_dummy
-
-! Gauss-Lobatto-Legendre points and weights of integration
- double precision, dimension(:), allocatable :: xigll,yigll,zigll,wxgll,wygll,wzgll
-
-! 3D shape functions and their derivatives
- double precision, dimension(:,:,:,:), allocatable :: shape3D
- double precision, dimension(:,:,:,:,:), allocatable :: dershape3D
-
- double precision, dimension(:), allocatable :: xelm,yelm,zelm
-
-! arrays with mesh parameters
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: xixstore,xiystore,xizstore, &
- etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore,jacobianstore
-
-! for model density, kappa, mu
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rhostore,kappastore,mustore
-
-! mass matrix
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass,rmass_acoustic,&
- rmass_solid_poroelastic,rmass_fluid_poroelastic
-
-! ocean load
- integer :: NGLOB_OCEAN
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_ocean_load
-
-! attenuation
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: qmu_attenuation_store
-
-! 2D shape functions and their derivatives, weights
- double precision, dimension(:,:,:), allocatable :: shape2D_x,shape2D_y,shape2D_bottom,shape2D_top
- double precision, dimension(:,:,:,:), allocatable :: dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top
- double precision, dimension(:,:), allocatable :: wgllwgll_xy,wgllwgll_xz,wgllwgll_yz
-
-! absorbing boundary arrays (for all boundaries) - keeps all infos, allowing for irregular surfaces
- real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: abs_boundary_normal
- real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: abs_boundary_jacobian2Dw
- integer, dimension(:,:,:), allocatable :: abs_boundary_ijk
- integer, dimension(:), allocatable :: abs_boundary_ispec
- integer :: num_abs_boundary_faces
-
-! free surface arrays
- real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: free_surface_normal
- real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: free_surface_jacobian2Dw
- integer, dimension(:,:,:), allocatable :: free_surface_ijk
- integer, dimension(:), allocatable :: free_surface_ispec
- integer :: num_free_surface_faces
-
-! acoustic-elastic coupling surface
- real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: coupling_ac_el_normal
- real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: coupling_ac_el_jacobian2Dw
- integer, dimension(:,:,:), allocatable :: coupling_ac_el_ijk
- integer, dimension(:), allocatable :: coupling_ac_el_ispec
- integer :: num_coupling_ac_el_faces
-
-! for stacey
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rho_vp,rho_vs
-
-! anisotropy
- integer :: NSPEC_ANISO
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store
-
-! material domain flags
- logical, dimension(:), allocatable :: ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic
-
-! name of the database file
- character(len=256) prname
-
-end module create_regions_mesh_ext_par
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! main routine
-
-subroutine create_regions_mesh_ext(ibool, &
- xstore,ystore,zstore,nspec,npointot,myrank,LOCAL_PATH, &
- nnodes_ext_mesh,nelmnts_ext_mesh, &
- nodes_coords_ext_mesh, elmnts_ext_mesh, &
- max_static_memory_size, mat_ext_mesh, materials_ext_mesh, &
- nmat_ext_mesh, undef_mat_prop, nundefMat_ext_mesh, &
- num_interfaces_ext_mesh, max_interface_size_ext_mesh, &
- my_neighbours_ext_mesh, my_nelmnts_neighbours_ext_mesh, &
- my_interfaces_ext_mesh, &
- ibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh, &
- nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, &
- NSPEC2D_BOTTOM, NSPEC2D_TOP,&
- ibelm_xmin, ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top, &
- nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax,&
- nodes_ibelm_bottom,nodes_ibelm_top, &
- SAVE_MESH_FILES,nglob, &
- ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
- ATTENUATION,USE_OLSEN_ATTENUATION, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
- ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
- itopo_bathy)
-
-! create the different regions of the mesh
-
- use create_regions_mesh_ext_par
- implicit none
- !include "constants.h"
-
-! number of spectral elements in each block
- integer :: nspec
-
-! arrays with the mesh
- integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
- double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-
- integer :: npointot
-
-! proc numbers for MPI
- integer :: myrank
- integer :: NPROC
-
- character(len=256) :: LOCAL_PATH
-
-! data from the external mesh
- integer :: nnodes_ext_mesh,nelmnts_ext_mesh
- double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
- integer, dimension(ESIZE,nelmnts_ext_mesh) :: elmnts_ext_mesh
-
-! static memory size needed by the solver
- double precision :: max_static_memory_size
-
- integer, dimension(2,nelmnts_ext_mesh) :: mat_ext_mesh
-
-! material properties
- integer :: nmat_ext_mesh,nundefMat_ext_mesh
- double precision, dimension(6,nmat_ext_mesh) :: materials_ext_mesh
- character (len=30), dimension(6,nundefMat_ext_mesh):: undef_mat_prop
-
-! double precision, external :: materials_ext_mesh
-
-! MPI communication
- integer :: num_interfaces_ext_mesh,max_interface_size_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: my_nelmnts_neighbours_ext_mesh
- integer, dimension(6,max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: my_interfaces_ext_mesh
- integer, dimension(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
-
-! absorbing boundaries
- integer :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, NSPEC2D_BOTTOM, NSPEC2D_TOP
- integer, dimension(nspec2D_xmin) :: ibelm_xmin
- integer, dimension(nspec2D_xmax) :: ibelm_xmax
- integer, dimension(nspec2D_ymin) :: ibelm_ymin
- integer, dimension(nspec2D_ymax) :: ibelm_ymax
- integer, dimension(NSPEC2D_BOTTOM) :: ibelm_bottom
- integer, dimension(NSPEC2D_TOP) :: ibelm_top
- ! node indices of boundary faces
- integer, dimension(4,nspec2D_xmin) :: nodes_ibelm_xmin
- integer, dimension(4,nspec2D_xmax) :: nodes_ibelm_xmax
- integer, dimension(4,nspec2D_ymin) :: nodes_ibelm_ymin
- integer, dimension(4,nspec2D_ymax) :: nodes_ibelm_ymax
- integer, dimension(4,NSPEC2D_BOTTOM) :: nodes_ibelm_bottom
- integer, dimension(4,NSPEC2D_TOP) :: nodes_ibelm_top
-
- integer :: nglob
-
- logical :: SAVE_MESH_FILES
- logical :: ANISOTROPY
- logical :: OCEANS,TOPOGRAPHY
- logical :: ATTENUATION,USE_OLSEN_ATTENUATION
-
-! use integer array to store topography values
- integer :: UTM_PROJECTION_ZONE
- logical :: SUPPRESS_UTM_PROJECTION
- integer :: NX_TOPO,NY_TOPO
- double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
- integer, dimension(NX_TOPO,NY_TOPO) :: itopo_bathy
-
-! local parameters
-! static memory size needed by the solver
- double precision :: static_memory_size
- real(kind=CUSTOM_REAL) :: model_speed_max,min_resolved_period
-
-! for vtk output
-! character(len=256) prname_file
-! integer,dimension(:),allocatable :: itest_flag
-! integer, dimension(:), allocatable :: elem_flag
-
-! For Piero Basini :
-! integer :: doubling_value_found_for_Piero
-! double precision :: xmesh,ymesh,zmesh
-! double precision :: rho,vp,vs
-
-! integer,dimension(nspec) :: idoubling
-! integer :: doubling_value_found_for_Piero
-! integer, parameter :: NUMBER_OF_STATIONS = 6
-! double precision, parameter :: RADIUS_TO_EXCLUDE = 250.d0
-! double precision, dimension(NUMBER_OF_STATIONS) :: utm_x_station,utm_y_station
-
-! logical :: is_around_a_station
-! integer :: istation
-
-! ! store bedrock values
-! integer :: icornerlat,icornerlong
-! double precision :: lat,long,elevation_bedrock
-! double precision :: lat_corner,long_corner,ratio_xi,ratio_eta
-!real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: ibedrock
-
-! initializes arrays
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) ' ...allocating arrays '
- endif
- call crm_ext_allocate_arrays(nspec,LOCAL_PATH,myrank, &
- nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
- nspec2D_bottom,nspec2D_top,ANISOTROPY)
-
-
-! fills location and weights for Gauss-Lobatto-Legendre points, shape and derivations,
-! returns jacobianstore,xixstore,...gammazstore
-! and GLL-point locations in xstore,ystore,zstore
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*) ' ...setting up jacobian '
- endif
- call crm_ext_setup_jacobian(myrank, &
- xstore,ystore,zstore,nspec, &
- nodes_coords_ext_mesh,nnodes_ext_mesh,&
- elmnts_ext_mesh,nelmnts_ext_mesh)
-
-! creates ibool index array for projection from local to global points
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*) ' ...indexing global points'
- endif
- call crm_ext_setup_indexing(ibool, &
- xstore,ystore,zstore,nspec,nglob,npointot, &
- nnodes_ext_mesh,nodes_coords_ext_mesh,myrank)
-
-! sets up MPI interfaces between partitions
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*) ' ...preparing MPI interfaces '
- endif
- call get_MPI(myrank,nglob,nspec,ibool, &
- nelmnts_ext_mesh,elmnts_ext_mesh, &
- my_nelmnts_neighbours_ext_mesh, my_interfaces_ext_mesh, &
- ibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh, &
- num_interfaces_ext_mesh,max_interface_size_ext_mesh,&
- my_neighbours_ext_mesh,NPROC)
-
-! sets material velocities
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*) ' ...determining velocity model'
- endif
- ! Default model. Using PREM model instead
- ! call get_model(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
- ! materials_ext_mesh,nmat_ext_mesh, &
- ! undef_mat_prop,nundefMat_ext_mesh, &
- ! ANISOTROPY,LOCAL_PATH)
-
- call get_model_PREM(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
- materials_ext_mesh,nmat_ext_mesh, &
- undef_mat_prop,nundefMat_ext_mesh, &
- ANISOTROPY,LOCAL_PATH)
-
-! sets up absorbing/free surface boundaries
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*) ' ...setting up absorbing boundaries '
- endif
- call get_absorbing_boundary(myrank,nspec,nglob,ibool, &
- nodes_coords_ext_mesh,nnodes_ext_mesh, &
- ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
- nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax, &
- nodes_ibelm_bottom,nodes_ibelm_top, &
- nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
- nspec2D_bottom,nspec2D_top)
-
-! sets up acoustic-elastic coupling surfaces
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*) ' ...detecting acoustic-elastic surfaces '
- endif
- call get_coupling_surfaces(myrank, &
- nspec,nglob,ibool,NPROC, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
- num_interfaces_ext_mesh,max_interface_size_ext_mesh, &
- my_neighbours_ext_mesh)
-
-! creates mass matrix
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*) ' ...creating mass matrix '
- endif
- call create_mass_matrices(nglob,nspec,ibool)
-
-! creates ocean load mass matrix
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*) ' ...creating ocean load mass matrix '
- endif
- call create_mass_matrices_ocean_load(nglob,nspec,ibool,OCEANS,TOPOGRAPHY, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
- ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
- itopo_bathy)
-
-! saves the binary mesh files
- call sync_all()
- if( myrank == 0) then
- write(IMAIN,*) ' ...saving databases'
- endif
- !call create_name_database(prname,myrank,LOCAL_PATH)
- call save_arrays_solver_ext_mesh(nspec,nglob, &
- xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore,&
- gammaxstore,gammaystore,gammazstore, &
- jacobianstore, rho_vp,rho_vs,qmu_attenuation_store, &
- rhostore,kappastore,mustore, &
- rmass,rmass_acoustic,rmass_solid_poroelastic,rmass_fluid_poroelastic, &
- OCEANS,rmass_ocean_load,NGLOB_OCEAN,ibool,xstore_dummy,ystore_dummy,zstore_dummy, &
- abs_boundary_normal,abs_boundary_jacobian2Dw, &
- abs_boundary_ijk,abs_boundary_ispec,num_abs_boundary_faces, &
- free_surface_normal,free_surface_jacobian2Dw, &
- free_surface_ijk,free_surface_ispec,num_free_surface_faces, &
- coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
- coupling_ac_el_ijk,coupling_ac_el_ispec,num_coupling_ac_el_faces, &
- num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
- max_interface_size_ext_mesh,ibool_interfaces_ext_mesh, &
- prname,SAVE_MESH_FILES,ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store, &
- c22store,c23store,c24store,c25store,c26store,c33store, &
- c34store,c35store,c36store,c44store,c45store,c46store, &
- c55store,c56store,c66store, &
- ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
-
-! computes the approximate amount of static memory needed to run the solver
- call memory_eval(nspec,nglob,maxval(nibool_interfaces_ext_mesh),num_interfaces_ext_mesh, &
- OCEANS,static_memory_size)
- call max_all_dp(static_memory_size, max_static_memory_size)
-
-! checks the mesh, stability and resolved period
- call sync_all()
- call check_mesh_resolution(myrank,nspec,nglob,ibool,&
- xstore_dummy,ystore_dummy,zstore_dummy, &
- kappastore,mustore,rho_vp,rho_vs, &
- -1.0d0, model_speed_max,min_resolved_period )
-
-! saves binary mesh files for attenuation
- if( ATTENUATION ) then
- call get_attenuation_model(myrank,nspec,USE_OLSEN_ATTENUATION, &
- mustore,rho_vs,qmu_attenuation_store, &
- ispec_is_elastic,min_resolved_period,prname)
- endif
-
-
-! VTK file output
-! if( SAVE_MESH_FILES ) then
-! ! saves material flag assigned for each spectral element into a vtk file
-! prname_file = prname(1:len_trim(prname))//'material_flag'
-! allocate(elem_flag(nspec))
-! elem_flag(:) = mat_ext_mesh(1,:)
-! call write_VTK_data_elem_i(nspec,nglob, &
-! xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-! elem_flag,prname_file)
-! deallocate(elem_flag)
-!
-! !plotting abs boundaries
-! ! allocate(itest_flag(nspec))
-! ! itest_flag(:) = 0
-! ! do ispec=1,nspec
-! ! if( iboun(1,ispec) ) itest_flag(ispec) = 1
-! ! enddo
-! ! prname_file = prname(1:len_trim(prname))//'iboundary1_flag'
-! ! call write_VTK_data_elem_i(nspec,nglob, &
-! ! xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-! ! itest_flag,prname_file)
-! ! deallocate(itest_flag)
-! endif
-
-! cleanup
- if( .not. SAVE_MOHO_MESH ) deallocate(xstore_dummy,ystore_dummy,zstore_dummy)
- deallocate(xixstore,xiystore,xizstore,&
- etaxstore,etaystore,etazstore,&
- gammaxstore,gammaystore,gammazstore)
- deallocate(jacobianstore,qmu_attenuation_store)
- deallocate(kappastore,mustore,rho_vp,rho_vs)
-
-end subroutine create_regions_mesh_ext
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-subroutine crm_ext_allocate_arrays(nspec,LOCAL_PATH,myrank, &
- nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
- nspec2D_bottom,nspec2D_top,ANISOTROPY)
-
- use create_regions_mesh_ext_par
- implicit none
-
- integer :: nspec,myrank
- integer :: nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
- nspec2D_bottom,nspec2D_top
-
- character(len=256) :: LOCAL_PATH
-
- logical :: ANISOTROPY
-
-! local parameters
- integer :: ier
-
-! memory test
-! logical,dimension(:),allocatable :: test_mem
-!
-! tests memory availability (including some small buffer of 10*1024 byte)
-! allocate( test_mem(int(max_static_memory_size)+10*1024),stat=ier)
-! if(ier /= 0) then
-! write(IMAIN,*) 'error: try to increase the available process stack size by'
-! write(IMAIN,*) ' ulimit -s **** '
-! call exit_MPI(myrank,'not enough memory to allocate arrays')
-! endif
-! test_mem(:) = .true.
-! deallocate( test_mem, stat=ier)
-! if(ier /= 0) call exit_MPI(myrank,'error to allocate arrays')
-! call sync_all()
-
- allocate( xelm(NGNOD),yelm(NGNOD),zelm(NGNOD),stat=ier)
- if( ier /= 0 ) stop 'error allocating array xelm etc.'
-
- allocate( qmu_attenuation_store(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! create the name for the database of the current slide and region
- call create_name_database(prname,myrank,LOCAL_PATH)
-
-! Gauss-Lobatto-Legendre points of integration
- allocate(xigll(NGLLX),yigll(NGLLY),zigll(NGLLZ),stat=ier)
- if( ier /= 0 ) stop 'error allocating array xigll etc.'
-
-! Gauss-Lobatto-Legendre weights of integration
- allocate(wxgll(NGLLX),wygll(NGLLY),wzgll(NGLLZ),stat=ier)
- if( ier /= 0 ) stop 'error allocating array wxgll etc.'
-
-! 3D shape functions and their derivatives
- allocate(shape3D(NGNOD,NGLLX,NGLLY,NGLLZ), &
- dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ),stat=ier)
- if( ier /= 0 ) stop 'error allocating array shape3D etc.'
-
-! 2D shape functions and their derivatives
- allocate(shape2D_x(NGNOD2D,NGLLY,NGLLZ), &
- shape2D_y(NGNOD2D,NGLLX,NGLLZ), &
- shape2D_bottom(NGNOD2D,NGLLX,NGLLY), &
- shape2D_top(NGNOD2D,NGLLX,NGLLY),stat=ier)
- if( ier /= 0 ) stop 'error allocating array shape2D_x etc.'
-
- allocate(dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ), &
- dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ), &
- dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY), &
- dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY),stat=ier)
- if( ier /= 0 ) stop 'error allocating array dershape2D_x etc.'
-
- allocate(wgllwgll_xy(NGLLX,NGLLY), &
- wgllwgll_xz(NGLLX,NGLLZ), &
- wgllwgll_yz(NGLLY,NGLLZ),stat=ier)
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! Stacey
- allocate(rho_vp(NGLLX,NGLLY,NGLLZ,nspec), &
- rho_vs(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! array with model density
- allocate(rhostore(NGLLX,NGLLY,NGLLZ,nspec), &
- kappastore(NGLLX,NGLLY,NGLLZ,nspec), &
- mustore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
- !vpstore(NGLLX,NGLLY,NGLLZ,nspec), &
- !vsstore(NGLLX,NGLLY,NGLLZ,nspec),
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! arrays with mesh parameters
- allocate(xixstore(NGLLX,NGLLY,NGLLZ,nspec), &
- xiystore(NGLLX,NGLLY,NGLLZ,nspec), &
- xizstore(NGLLX,NGLLY,NGLLZ,nspec), &
- etaxstore(NGLLX,NGLLY,NGLLZ,nspec), &
- etaystore(NGLLX,NGLLY,NGLLZ,nspec), &
- etazstore(NGLLX,NGLLY,NGLLZ,nspec), &
- gammaxstore(NGLLX,NGLLY,NGLLZ,nspec), &
- gammaystore(NGLLX,NGLLY,NGLLZ,nspec), &
- gammazstore(NGLLX,NGLLY,NGLLZ,nspec), &
- jacobianstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! absorbing boundary
- ! absorbing faces
- num_abs_boundary_faces = nspec2D_xmin + nspec2D_xmax + nspec2D_ymin + nspec2D_ymax + nspec2D_bottom
- ! adds faces of free surface if it also absorbs
- if( ABSORB_FREE_SURFACE ) num_abs_boundary_faces = num_abs_boundary_faces + nspec2D_top
-
- ! allocates arrays to store info for each face (assumes NGLLX=NGLLY=NGLLZ)
- allocate( abs_boundary_ispec(num_abs_boundary_faces), &
- abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces), &
- abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces), &
- abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces),stat=ier)
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
- ! free surface faces
- num_free_surface_faces = nspec2D_top
-
- ! allocates arrays to store info for each face (assumes NGLLX=NGLLY=NGLLZ)
- allocate( free_surface_ispec(num_free_surface_faces), &
- free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces), &
- free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces), &
- free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces),stat=ier)
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! array with anisotropy
- if( ANISOTROPY ) then
- NSPEC_ANISO = nspec
- else
- NSPEC_ANISO = 1
- endif
- allocate(c11store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c12store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c13store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c14store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c15store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c16store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c22store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c23store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c24store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c25store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c26store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c33store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c34store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c35store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c36store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c44store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c45store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c46store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c55store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c56store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
- c66store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO),stat=ier)
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! material flags
- allocate( ispec_is_acoustic(nspec), &
- ispec_is_elastic(nspec), &
- ispec_is_poroelastic(nspec), stat=ier)
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-end subroutine crm_ext_allocate_arrays
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-subroutine crm_ext_setup_jacobian(myrank, &
- xstore,ystore,zstore,nspec, &
- nodes_coords_ext_mesh,nnodes_ext_mesh,&
- elmnts_ext_mesh,nelmnts_ext_mesh)
-
- use create_regions_mesh_ext_par
- implicit none
-
-! number of spectral elements in each block
- integer :: nspec
-
- double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-
-! data from the external mesh
- integer :: nnodes_ext_mesh,nelmnts_ext_mesh
- double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
- integer, dimension(ESIZE,nelmnts_ext_mesh) :: elmnts_ext_mesh
-
-! proc numbers for MPI
- integer :: myrank
-
-! local parameters
- integer :: ispec,ia,i,j,k
-
-! set up coordinates of the Gauss-Lobatto-Legendre points
- call zwgljd(xigll,wxgll,NGLLX,GAUSSALPHA,GAUSSBETA)
- call zwgljd(yigll,wygll,NGLLY,GAUSSALPHA,GAUSSBETA)
- call zwgljd(zigll,wzgll,NGLLZ,GAUSSALPHA,GAUSSBETA)
-
-! if number of points is odd, the middle abscissa is exactly zero
- if(mod(NGLLX,2) /= 0) xigll((NGLLX-1)/2+1) = ZERO
- if(mod(NGLLY,2) /= 0) yigll((NGLLY-1)/2+1) = ZERO
- if(mod(NGLLZ,2) /= 0) zigll((NGLLZ-1)/2+1) = ZERO
-
-! get the 3-D shape functions
- call get_shape3D(myrank,shape3D,dershape3D,xigll,yigll,zigll)
-
-! get the 2-D shape functions
- call get_shape2D(myrank,shape2D_x,dershape2D_x,yigll,zigll,NGLLY,NGLLZ)
- call get_shape2D(myrank,shape2D_y,dershape2D_y,xigll,zigll,NGLLX,NGLLZ)
- call get_shape2D(myrank,shape2D_bottom,dershape2D_bottom,xigll,yigll,NGLLX,NGLLY)
- call get_shape2D(myrank,shape2D_top,dershape2D_top,xigll,yigll,NGLLX,NGLLY)
-
-! 2D weights
- do j=1,NGLLY
- do i=1,NGLLX
- wgllwgll_xy(i,j) = wxgll(i)*wygll(j)
- enddo
- enddo
- do k=1,NGLLZ
- do i=1,NGLLX
- wgllwgll_xz(i,k) = wxgll(i)*wzgll(k)
- enddo
- enddo
- do k=1,NGLLZ
- do j=1,NGLLY
- wgllwgll_yz(j,k) = wygll(j)*wzgll(k)
- enddo
- enddo
-
-! point locations
- xstore(:,:,:,:) = 0.d0
- ystore(:,:,:,:) = 0.d0
- zstore(:,:,:,:) = 0.d0
-
- do ispec = 1, nspec
- !call get_xyzelm(xelm, yelm, zelm, ispec, elmnts_ext_mesh, nodes_coords_ext_mesh, nspec, nnodes_ext_mesh)
- do ia = 1,NGNOD
- xelm(ia) = nodes_coords_ext_mesh(1,elmnts_ext_mesh(ia,ispec))
- yelm(ia) = nodes_coords_ext_mesh(2,elmnts_ext_mesh(ia,ispec))
- zelm(ia) = nodes_coords_ext_mesh(3,elmnts_ext_mesh(ia,ispec))
- enddo
-
- ! CUBIT should provide a mesh ordering such that the 3D jacobian is defined
- ! (otherwise mesh would be degenerated)
- call calc_jacobian(myrank,xixstore,xiystore,xizstore, &
- etaxstore,etaystore,etazstore, &
- gammaxstore,gammaystore,gammazstore,jacobianstore, &
- xstore,ystore,zstore, &
- xelm,yelm,zelm,shape3D,dershape3D,ispec,nspec)
-
- enddo
-
-end subroutine crm_ext_setup_jacobian
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-subroutine crm_ext_setup_indexing(ibool, &
- xstore,ystore,zstore,nspec,nglob,npointot, &
- nnodes_ext_mesh,nodes_coords_ext_mesh,myrank)
-
-! creates global indexing array ibool
-
- use create_regions_mesh_ext_par
- implicit none
-
-! number of spectral elements in each block
- integer :: nspec,nglob,npointot,myrank
-
-! arrays with the mesh
- integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
- double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-
-! data from the external mesh
- integer :: nnodes_ext_mesh
- double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
-
-! local parameters
-! variables for creating array ibool
- double precision, dimension(:), allocatable :: xp,yp,zp
- integer, dimension(:), allocatable :: locval
- logical, dimension(:), allocatable :: ifseg
-
- integer :: ieoff,ilocnum,ier
- integer :: i,j,k,ispec,iglobnum
-
-! allocate memory for arrays
- allocate(locval(npointot), &
- ifseg(npointot), &
- xp(npointot), &
- yp(npointot), &
- zp(npointot),stat=ier)
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
-! creates temporary global point arrays
- locval = 0
- ifseg = .false.
- xp = 0.d0
- yp = 0.d0
- zp = 0.d0
-
- do ispec=1,nspec
- ieoff = NGLLX * NGLLY * NGLLZ * (ispec-1)
- ilocnum = 0
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- ilocnum = ilocnum + 1
- xp(ilocnum+ieoff) = xstore(i,j,k,ispec)
- yp(ilocnum+ieoff) = ystore(i,j,k,ispec)
- zp(ilocnum+ieoff) = zstore(i,j,k,ispec)
- enddo
- enddo
- enddo
- enddo
-
-! gets ibool indexing from local (GLL points) to global points
- call get_global(nspec,xp,yp,zp,ibool,locval,ifseg,nglob,npointot, &
- minval(nodes_coords_ext_mesh(1,:)),maxval(nodes_coords_ext_mesh(1,:)))
-
-!- we can create a new indirect addressing to reduce cache misses
- call get_global_indirect_addressing(nspec,nglob,ibool)
-
-!cleanup
- deallocate(xp,stat=ier); if(ier /= 0) stop 'error in deallocate'
- deallocate(yp,stat=ier); if(ier /= 0) stop 'error in deallocate'
- deallocate(zp,stat=ier); if(ier /= 0) stop 'error in deallocate'
- deallocate(locval,stat=ier); if(ier /= 0) stop 'error in deallocate'
- deallocate(ifseg,stat=ier); if(ier /= 0) stop 'error in deallocate'
-
-! unique global point locations
- allocate(xstore_dummy(nglob), &
- ystore_dummy(nglob), &
- zstore_dummy(nglob),stat=ier)
- if(ier /= 0) stop 'error in allocate'
- do ispec = 1, nspec
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglobnum = ibool(i,j,k,ispec)
- xstore_dummy(iglobnum) = xstore(i,j,k,ispec)
- ystore_dummy(iglobnum) = ystore(i,j,k,ispec)
- zstore_dummy(iglobnum) = zstore(i,j,k,ispec)
- enddo
- enddo
- enddo
- enddo
-
- end subroutine crm_ext_setup_indexing
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
- subroutine create_regions_mesh_save_moho( myrank,nglob,nspec, &
- nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
- nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
-
- use create_regions_mesh_ext_par
- implicit none
-
- integer :: nspec2D_moho_ext
- integer, dimension(nspec2D_moho_ext) :: ibelm_moho
- integer, dimension(4,nspec2D_moho_ext) :: nodes_ibelm_moho
-
- integer :: myrank,nglob,nspec
-
- ! data from the external mesh
- integer :: nnodes_ext_mesh
- double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-
-! local parameters
- ! Moho mesh
- real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_top
- real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_bot
- integer,dimension(:,:,:),allocatable :: ijk_moho_top, ijk_moho_bot
- integer,dimension(:),allocatable :: ibelm_moho_top, ibelm_moho_bot
- integer :: NSPEC2D_MOHO
- logical, dimension(:),allocatable :: is_moho_top, is_moho_bot
-
- real(kind=CUSTOM_REAL),dimension(NGNOD2D) :: xcoord,ycoord,zcoord
- real(kind=CUSTOM_REAL) :: jacobian2Dw_face(NGLLX,NGLLY)
- real(kind=CUSTOM_REAL) :: normal_face(NDIM,NGLLX,NGLLY)
- real(kind=CUSTOM_REAL),dimension(NDIM):: normal
- integer :: ijk_face(3,NGLLX,NGLLY)
-
- real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: iglob_normals
- integer,dimension(:),allocatable:: iglob_is_surface
-
- integer :: imoho_bot,imoho_top
- integer :: ispec2D,ispec,icorner,iface,i,j,k,igll,iglob,ier
- integer :: iglob_midpoint,idirect,counter
- integer :: imoho_top_all,imoho_bot_all,imoho_all
-
- ! corners indices of reference cube faces
- integer,dimension(3,4),parameter :: iface1_corner_ijk = &
- reshape( (/ 1,1,1, 1,NGLLY,1, 1,NGLLY,NGLLZ, 1,1,NGLLZ /),(/3,4/)) ! xmin
- integer,dimension(3,4),parameter :: iface2_corner_ijk = &
- reshape( (/ NGLLX,1,1, NGLLX,NGLLY,1, NGLLX,NGLLY,NGLLZ, NGLLX,1,NGLLZ /),(/3,4/)) ! xmax
- integer,dimension(3,4),parameter :: iface3_corner_ijk = &
- reshape( (/ 1,1,1, 1,1,NGLLZ, NGLLX,1,NGLLZ, NGLLX,1,1 /),(/3,4/)) ! ymin
- integer,dimension(3,4),parameter :: iface4_corner_ijk = &
- reshape( (/ 1,NGLLY,1, NGLLX,NGLLY,1, NGLLX,NGLLY,NGLLZ, 1,NGLLY,NGLLZ /),(/3,4/)) ! ymax
- integer,dimension(3,4),parameter :: iface5_corner_ijk = &
- reshape( (/ 1,1,1, 1,NGLLY,1, NGLLX,NGLLY,1, NGLLX,1,1 /),(/3,4/)) ! bottom
- integer,dimension(3,4),parameter :: iface6_corner_ijk = &
- reshape( (/ 1,1,NGLLZ, NGLLX,1,NGLLZ, NGLLX,NGLLY,NGLLZ, 1,NGLLY,NGLLZ /),(/3,4/)) ! top
- integer,dimension(3,4,6),parameter :: iface_all_corner_ijk = &
- reshape( (/ iface1_corner_ijk,iface2_corner_ijk, &
- iface3_corner_ijk,iface4_corner_ijk, &
- iface5_corner_ijk,iface6_corner_ijk /),(/3,4,6/)) ! all faces
- ! midpoint indices for each face (xmin,xmax,ymin,ymax,zmin,zmax)
- integer,dimension(3,6),parameter :: iface_all_midpointijk = &
- reshape( (/ 1,2,2, NGLLX,2,2, 2,1,2, 2,NGLLY,2, 2,2,1, 2,2,NGLLZ /),(/3,6/)) ! top
-
- ! temporary arrays for passing information
- allocate(iglob_is_surface(nglob), &
- iglob_normals(NDIM,nglob),stat=ier)
- if( ier /= 0 ) stop 'error allocating array iglob_is_surface'
-
- iglob_is_surface = 0
- iglob_normals = 0._CUSTOM_REAL
-
- ! loops over given moho surface elements
- do ispec2D=1, nspec2D_moho_ext
-
- ! gets element id
- ispec = ibelm_moho(ispec2D)
-
- ! looks for i,j,k indices of GLL points on boundary face
- ! determines element face by given CUBIT corners
- ! (note: uses point locations rather than point indices to find the element face,
- ! because the indices refer no more to the newly indexed ibool array )
- do icorner=1,NGNOD2D
- xcoord(icorner) = nodes_coords_ext_mesh(1,nodes_ibelm_moho(icorner,ispec2D))
- ycoord(icorner) = nodes_coords_ext_mesh(2,nodes_ibelm_moho(icorner,ispec2D))
- zcoord(icorner) = nodes_coords_ext_mesh(3,nodes_ibelm_moho(icorner,ispec2D))
- enddo
-
- ! sets face id of reference element associated with this face
- call get_element_face_id(ispec,xcoord,ycoord,zcoord, &
- ibool,nspec,nglob, &
- xstore_dummy,ystore_dummy,zstore_dummy, &
- iface)
-
- ! ijk indices of GLL points for face id
- call get_element_face_gll_indices(iface,ijk_face,NGLLX,NGLLZ)
-
- ! weighted jacobian and normal
- call get_jacobian_boundary_face(myrank,nspec, &
- xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
- dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
- ispec,iface,jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
-
- ! normal convention: points away from element
- ! switch normal direction if necessary
- do j=1,NGLLY
- do i=1,NGLLX
- call get_element_face_normal(ispec,iface,xcoord,ycoord,zcoord, &
- ibool,nspec,nglob, &
- xstore_dummy,ystore_dummy,zstore_dummy, &
- normal_face(:,i,j) )
- enddo
- enddo
-
- ! stores information on global points on moho surface
- igll = 0
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(ijk_face(1,i,j),ijk_face(2,i,j),ijk_face(3,i,j),ispec)
- ! sets flag
- iglob_is_surface(iglob) = ispec2D
- ! sets normals
- iglob_normals(:,iglob) = normal_face(:,i,j)
- enddo
- enddo
- enddo
-
- ! stores moho elements
- NSPEC2D_MOHO = nspec2D_moho_ext
-
- allocate(ibelm_moho_bot(NSPEC2D_MOHO), &
- ibelm_moho_top(NSPEC2D_MOHO), &
- normal_moho_top(NDIM,NGLLSQUARE,NSPEC2D_MOHO), &
- normal_moho_bot(NDIM,NGLLSQUARE,NSPEC2D_MOHO), &
- ijk_moho_bot(3,NGLLSQUARE,NSPEC2D_MOHO), &
- ijk_moho_top(3,NGLLSQUARE,NSPEC2D_MOHO),stat=ier)
- if( ier /= 0 ) stop 'error allocating ibelm_moho_bot'
-
- ibelm_moho_bot = 0
- ibelm_moho_top = 0
-
- ! element flags
- allocate(is_moho_top(nspec), &
- is_moho_bot(nspec),stat=ier)
- if( ier /= 0 ) stop 'error allocating is_moho_top'
- is_moho_top = .false.
- is_moho_bot = .false.
-
- ! finds spectral elements with moho surface
- imoho_top = 0
- imoho_bot = 0
- do ispec=1,nspec
-
- ! loops over each face
- do iface = 1,6
- ! checks if corners of face on surface
- counter = 0
- do icorner = 1,NGNOD2D
- i = iface_all_corner_ijk(1,icorner,iface)
- j = iface_all_corner_ijk(2,icorner,iface)
- k = iface_all_corner_ijk(3,icorner,iface)
- iglob = ibool(i,j,k,ispec)
-
- ! checks if point on surface
- if( iglob_is_surface(iglob) > 0 ) then
- counter = counter+1
-
- ! reference corner coordinates
- xcoord(icorner) = xstore_dummy(iglob)
- ycoord(icorner) = ystore_dummy(iglob)
- zcoord(icorner) = zstore_dummy(iglob)
- endif
- enddo
-
- ! stores moho informations
- if( counter == NGNOD2D ) then
-
- ! gets face GLL points i,j,k indices from element face
- call get_element_face_gll_indices(iface,ijk_face,NGLLX,NGLLY)
-
- ! re-computes face infos
- ! weighted jacobian and normal
- call get_jacobian_boundary_face(myrank,nspec, &
- xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
- dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
- ispec,iface,jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
-
- ! normal convention: points away from element
- ! switch normal direction if necessary
- do j=1,NGLLZ
- do i=1,NGLLX
- call get_element_face_normal(ispec,iface,xcoord,ycoord,zcoord, &
- ibool,nspec,nglob, &
- xstore_dummy,ystore_dummy,zstore_dummy, &
- normal_face(:,i,j) )
- enddo
- enddo
-
- ! takes normal stored temporary on a face midpoint
- i = iface_all_midpointijk(1,iface)
- j = iface_all_midpointijk(2,iface)
- k = iface_all_midpointijk(3,iface)
- iglob_midpoint = ibool(i,j,k,ispec)
- normal(:) = iglob_normals(:,iglob_midpoint)
-
- ! determines whether normal points into element or not (top/bottom distinction)
- call get_element_face_normal_idirect(ispec,iface,xcoord,ycoord,zcoord, &
- ibool,nspec,nglob, &
- xstore_dummy,ystore_dummy,zstore_dummy, &
- normal,idirect )
-
- ! takes moho surface element id given by id on midpoint
- ispec2D = iglob_is_surface(iglob_midpoint)
-
- ! sets face infos for bottom (normal points away from element)
- if( idirect == 1 ) then
-
- ! checks validity
- if( is_moho_bot( ispec) .eqv. .true. ) then
- print*,'error: moho surface geometry bottom'
- print*,' does not allow for mulitple element faces in kernel computation'
- call exit_mpi(myrank,'error moho bottom elements')
- endif
-
- imoho_bot = imoho_bot + 1
- is_moho_bot(ispec) = .true.
- ibelm_moho_bot(ispec2D) = ispec
-
- ! stores on surface gll points (assuming NGLLX = NGLLY = NGLLZ)
- igll = 0
- do j=1,NGLLZ
- do i=1,NGLLX
- igll = igll+1
- ijk_moho_bot(:,igll,ispec2D) = ijk_face(:,i,j)
- normal_moho_bot(:,igll,ispec2D) = normal_face(:,i,j)
- enddo
- enddo
-
- ! sets face infos for top element
- else if( idirect == 2 ) then
-
- ! checks validity
- if( is_moho_top( ispec) .eqv. .true. ) then
- print*,'error: moho surface geometry top'
- print*,' does not allow for mulitple element faces kernel computation'
- call exit_mpi(myrank,'error moho top elements')
- endif
-
- imoho_top = imoho_top + 1
- is_moho_top(ispec) = .true.
- ibelm_moho_top(ispec2D) = ispec
-
- ! gll points
- igll = 0
- do j=1,NGLLZ
- do i=1,NGLLX
- igll = igll+1
- ijk_moho_top(:,igll,ispec) = ijk_face(:,i,j)
- ! note: top elements have normal pointing into element
- normal_moho_top(:,igll,ispec) = - normal_face(:,i,j)
- enddo
- enddo
- endif
-
- endif ! counter
-
- enddo ! iface
-
- ! checks validity of top/bottom distinction
- if( is_moho_top(ispec) .and. is_moho_bot(ispec) ) then
- print*,'error: moho surface elements confusing'
- print*,' element:',ispec,'has top and bottom surface'
- call exit_mpi(myrank,'error moho surface element')
- endif
-
- enddo ! ispec2D
-
- ! note: surface e.g. could be at the free-surface and have no top elements etc...
- ! user output
- call sum_all_i( imoho_top, imoho_top_all )
- call sum_all_i( imoho_bot, imoho_bot_all )
- call sum_all_i( NSPEC2D_MOHO, imoho_all )
- if( myrank == 0 ) then
- write(IMAIN,*) '********'
- write(IMAIN,*) 'Moho surface:'
- write(IMAIN,*) ' total surface elements: ',imoho_all
- write(IMAIN,*) ' top elements :',imoho_top_all
- write(IMAIN,*) ' bottom elements:',imoho_bot_all
- write(IMAIN,*) '********'
- endif
-
- ! saves moho files: total number of elements, corner points, all points
- open(unit=27,file=prname(1:len_trim(prname))//'ibelm_moho.bin',status='unknown',form='unformatted')
- write(27) NSPEC2D_MOHO
- write(27) ibelm_moho_top
- write(27) ibelm_moho_bot
- write(27) ijk_moho_top
- write(27) ijk_moho_bot
- close(27)
- open(unit=27,file=prname(1:len_trim(prname))//'normal_moho.bin',status='unknown',form='unformatted')
- write(27) normal_moho_top
- write(27) normal_moho_bot
- close(27)
- open(unit=27,file=prname(1:len_trim(prname))//'is_moho.bin',status='unknown',form='unformatted')
- write(27) is_moho_top
- write(27) is_moho_bot
- close(27)
-
- end subroutine create_regions_mesh_save_moho
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+module create_regions_mesh_ext_par
+
+ include 'constants.h'
+
+! global point coordinates
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: xstore_dummy
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: ystore_dummy
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: zstore_dummy
+
+! Gauss-Lobatto-Legendre points and weights of integration
+ double precision, dimension(:), allocatable :: xigll,yigll,zigll,wxgll,wygll,wzgll
+
+! 3D shape functions and their derivatives
+ double precision, dimension(:,:,:,:), allocatable :: shape3D
+ double precision, dimension(:,:,:,:,:), allocatable :: dershape3D
+
+ double precision, dimension(:), allocatable :: xelm,yelm,zelm
+
+! arrays with mesh parameters
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: xixstore,xiystore,xizstore, &
+ etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore,jacobianstore
+
+! for model density, kappa, mu
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rhostore,kappastore,mustore
+
+! mass matrix
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass,rmass_acoustic,&
+ rmass_solid_poroelastic,rmass_fluid_poroelastic
+
+! ocean load
+ integer :: NGLOB_OCEAN
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_ocean_load
+
+! attenuation
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: qmu_attenuation_store
+
+! 2D shape functions and their derivatives, weights
+ double precision, dimension(:,:,:), allocatable :: shape2D_x,shape2D_y,shape2D_bottom,shape2D_top
+ double precision, dimension(:,:,:,:), allocatable :: dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top
+ double precision, dimension(:,:), allocatable :: wgllwgll_xy,wgllwgll_xz,wgllwgll_yz
+
+! absorbing boundary arrays (for all boundaries) - keeps all infos, allowing for irregular surfaces
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: abs_boundary_normal
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: abs_boundary_jacobian2Dw
+ integer, dimension(:,:,:), allocatable :: abs_boundary_ijk
+ integer, dimension(:), allocatable :: abs_boundary_ispec
+ integer :: num_abs_boundary_faces
+
+! free surface arrays
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: free_surface_normal
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: free_surface_jacobian2Dw
+ integer, dimension(:,:,:), allocatable :: free_surface_ijk
+ integer, dimension(:), allocatable :: free_surface_ispec
+ integer :: num_free_surface_faces
+
+! acoustic-elastic coupling surface
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: coupling_ac_el_normal
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: coupling_ac_el_jacobian2Dw
+ integer, dimension(:,:,:), allocatable :: coupling_ac_el_ijk
+ integer, dimension(:), allocatable :: coupling_ac_el_ispec
+ integer :: num_coupling_ac_el_faces
+
+! for stacey
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rho_vp,rho_vs
+
+! anisotropy
+ integer :: NSPEC_ANISO
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store
+
+! material domain flags
+ logical, dimension(:), allocatable :: ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic
+
+! name of the database file
+ character(len=256) prname
+
+end module create_regions_mesh_ext_par
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! main routine
+
+subroutine create_regions_mesh_ext(ibool, &
+ xstore,ystore,zstore,nspec,npointot,myrank,LOCAL_PATH, &
+ nnodes_ext_mesh,nelmnts_ext_mesh, &
+ nodes_coords_ext_mesh, elmnts_ext_mesh, &
+ max_static_memory_size, mat_ext_mesh, materials_ext_mesh, &
+ nmat_ext_mesh, undef_mat_prop, nundefMat_ext_mesh, &
+ num_interfaces_ext_mesh, max_interface_size_ext_mesh, &
+ my_neighbours_ext_mesh, my_nelmnts_neighbours_ext_mesh, &
+ my_interfaces_ext_mesh, &
+ ibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh, &
+ nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, &
+ NSPEC2D_BOTTOM, NSPEC2D_TOP,&
+ ibelm_xmin, ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top, &
+ nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax,&
+ nodes_ibelm_bottom,nodes_ibelm_top, &
+ SAVE_MESH_FILES,nglob, &
+ ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
+ ATTENUATION,USE_OLSEN_ATTENUATION, &
+ UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
+ ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
+ itopo_bathy)
+
+! create the different regions of the mesh
+
+ use create_regions_mesh_ext_par
+ implicit none
+ !include "constants.h"
+
+! number of spectral elements in each block
+ integer :: nspec
+
+! arrays with the mesh
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+ double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+
+ integer :: npointot
+
+! proc numbers for MPI
+ integer :: myrank
+ integer :: NPROC
+
+ character(len=256) :: LOCAL_PATH
+
+! data from the external mesh
+ integer :: nnodes_ext_mesh,nelmnts_ext_mesh
+ double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
+ integer, dimension(ESIZE,nelmnts_ext_mesh) :: elmnts_ext_mesh
+
+! static memory size needed by the solver
+ double precision :: max_static_memory_size
+
+ integer, dimension(2,nelmnts_ext_mesh) :: mat_ext_mesh
+
+! material properties
+ integer :: nmat_ext_mesh,nundefMat_ext_mesh
+ double precision, dimension(6,nmat_ext_mesh) :: materials_ext_mesh
+ character (len=30), dimension(6,nundefMat_ext_mesh):: undef_mat_prop
+
+! double precision, external :: materials_ext_mesh
+
+! MPI communication
+ integer :: num_interfaces_ext_mesh,max_interface_size_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: my_nelmnts_neighbours_ext_mesh
+ integer, dimension(6,max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: my_interfaces_ext_mesh
+ integer, dimension(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+
+! absorbing boundaries
+ integer :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, NSPEC2D_BOTTOM, NSPEC2D_TOP
+ integer, dimension(nspec2D_xmin) :: ibelm_xmin
+ integer, dimension(nspec2D_xmax) :: ibelm_xmax
+ integer, dimension(nspec2D_ymin) :: ibelm_ymin
+ integer, dimension(nspec2D_ymax) :: ibelm_ymax
+ integer, dimension(NSPEC2D_BOTTOM) :: ibelm_bottom
+ integer, dimension(NSPEC2D_TOP) :: ibelm_top
+ ! node indices of boundary faces
+ integer, dimension(4,nspec2D_xmin) :: nodes_ibelm_xmin
+ integer, dimension(4,nspec2D_xmax) :: nodes_ibelm_xmax
+ integer, dimension(4,nspec2D_ymin) :: nodes_ibelm_ymin
+ integer, dimension(4,nspec2D_ymax) :: nodes_ibelm_ymax
+ integer, dimension(4,NSPEC2D_BOTTOM) :: nodes_ibelm_bottom
+ integer, dimension(4,NSPEC2D_TOP) :: nodes_ibelm_top
+
+ integer :: nglob
+
+ logical :: SAVE_MESH_FILES
+ logical :: ANISOTROPY
+ logical :: OCEANS,TOPOGRAPHY
+ logical :: ATTENUATION,USE_OLSEN_ATTENUATION
+
+! use integer array to store topography values
+ integer :: UTM_PROJECTION_ZONE
+ logical :: SUPPRESS_UTM_PROJECTION
+ integer :: NX_TOPO,NY_TOPO
+ double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
+ integer, dimension(NX_TOPO,NY_TOPO) :: itopo_bathy
+
+! local parameters
+! static memory size needed by the solver
+ double precision :: static_memory_size
+ real(kind=CUSTOM_REAL) :: model_speed_max,min_resolved_period
+
+! for vtk output
+! character(len=256) prname_file
+! integer,dimension(:),allocatable :: itest_flag
+! integer, dimension(:), allocatable :: elem_flag
+
+! For Piero Basini :
+! integer :: doubling_value_found_for_Piero
+! double precision :: xmesh,ymesh,zmesh
+! double precision :: rho,vp,vs
+
+! integer,dimension(nspec) :: idoubling
+! integer :: doubling_value_found_for_Piero
+! integer, parameter :: NUMBER_OF_STATIONS = 6
+! double precision, parameter :: RADIUS_TO_EXCLUDE = 250.d0
+! double precision, dimension(NUMBER_OF_STATIONS) :: utm_x_station,utm_y_station
+
+! logical :: is_around_a_station
+! integer :: istation
+
+! ! store bedrock values
+! integer :: icornerlat,icornerlong
+! double precision :: lat,long,elevation_bedrock
+! double precision :: lat_corner,long_corner,ratio_xi,ratio_eta
+!real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: ibedrock
+
+! initializes arrays
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) ' ...allocating arrays '
+ endif
+ call crm_ext_allocate_arrays(nspec,LOCAL_PATH,myrank, &
+ nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
+ nspec2D_bottom,nspec2D_top,ANISOTROPY)
+
+
+! fills location and weights for Gauss-Lobatto-Legendre points, shape and derivations,
+! returns jacobianstore,xixstore,...gammazstore
+! and GLL-point locations in xstore,ystore,zstore
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...setting up jacobian '
+ endif
+ call crm_ext_setup_jacobian(myrank, &
+ xstore,ystore,zstore,nspec, &
+ nodes_coords_ext_mesh,nnodes_ext_mesh,&
+ elmnts_ext_mesh,nelmnts_ext_mesh)
+
+! creates ibool index array for projection from local to global points
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...indexing global points'
+ endif
+ call crm_ext_setup_indexing(ibool, &
+ xstore,ystore,zstore,nspec,nglob,npointot, &
+ nnodes_ext_mesh,nodes_coords_ext_mesh,myrank)
+
+! sets up MPI interfaces between partitions
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...preparing MPI interfaces '
+ endif
+ call get_MPI(myrank,nglob,nspec,ibool, &
+ nelmnts_ext_mesh,elmnts_ext_mesh, &
+ my_nelmnts_neighbours_ext_mesh, my_interfaces_ext_mesh, &
+ ibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, &
+ num_interfaces_ext_mesh,max_interface_size_ext_mesh,&
+ my_neighbours_ext_mesh,NPROC)
+
+! sets material velocities
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...determining velocity model'
+ endif
+ ! Default model. Using PREM model instead
+ ! call get_model(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
+ ! materials_ext_mesh,nmat_ext_mesh, &
+ ! undef_mat_prop,nundefMat_ext_mesh, &
+ ! ANISOTROPY,LOCAL_PATH)
+
+ call get_model_PREM(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
+ materials_ext_mesh,nmat_ext_mesh, &
+ undef_mat_prop,nundefMat_ext_mesh, &
+ ANISOTROPY,LOCAL_PATH)
+
+! sets up absorbing/free surface boundaries
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...setting up absorbing boundaries '
+ endif
+ call get_absorbing_boundary(myrank,nspec,nglob,ibool, &
+ nodes_coords_ext_mesh,nnodes_ext_mesh, &
+ ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
+ nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax, &
+ nodes_ibelm_bottom,nodes_ibelm_top, &
+ nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
+ nspec2D_bottom,nspec2D_top)
+
+! sets up acoustic-elastic coupling surfaces
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...detecting acoustic-elastic surfaces '
+ endif
+ call get_coupling_surfaces(myrank, &
+ nspec,nglob,ibool,NPROC, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+ num_interfaces_ext_mesh,max_interface_size_ext_mesh, &
+ my_neighbours_ext_mesh)
+
+! creates mass matrix
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...creating mass matrix '
+ endif
+ call create_mass_matrices(nglob,nspec,ibool)
+
+! creates ocean load mass matrix
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...creating ocean load mass matrix '
+ endif
+ call create_mass_matrices_ocean_load(nglob,nspec,ibool,OCEANS,TOPOGRAPHY, &
+ UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
+ ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
+ itopo_bathy)
+
+! saves the binary mesh files
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...saving databases'
+ endif
+ !call create_name_database(prname,myrank,LOCAL_PATH)
+ call save_arrays_solver_ext_mesh(nspec,nglob, &
+ xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore,&
+ gammaxstore,gammaystore,gammazstore, &
+ jacobianstore, rho_vp,rho_vs,qmu_attenuation_store, &
+ rhostore,kappastore,mustore, &
+ rmass,rmass_acoustic,rmass_solid_poroelastic,rmass_fluid_poroelastic, &
+ OCEANS,rmass_ocean_load,NGLOB_OCEAN, &
+ ibool, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ abs_boundary_normal,abs_boundary_jacobian2Dw, &
+ abs_boundary_ijk,abs_boundary_ispec,num_abs_boundary_faces, &
+ free_surface_normal,free_surface_jacobian2Dw, &
+ free_surface_ijk,free_surface_ispec,num_free_surface_faces, &
+ coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
+ coupling_ac_el_ijk,coupling_ac_el_ispec,num_coupling_ac_el_faces, &
+ num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
+ max_interface_size_ext_mesh,ibool_interfaces_ext_mesh, &
+ prname,SAVE_MESH_FILES, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store, &
+ c22store,c23store,c24store,c25store,c26store,c33store, &
+ c34store,c35store,c36store,c44store,c45store,c46store, &
+ c55store,c56store,c66store, &
+ ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
+
+! computes the approximate amount of static memory needed to run the solver
+ call memory_eval(nspec,nglob,maxval(nibool_interfaces_ext_mesh),num_interfaces_ext_mesh, &
+ OCEANS,static_memory_size)
+ call max_all_dp(static_memory_size, max_static_memory_size)
+
+! checks the mesh, stability and resolved period
+ call sync_all()
+ call check_mesh_resolution(myrank,nspec,nglob,ibool,&
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ kappastore,mustore,rho_vp,rho_vs, &
+ -1.0d0, model_speed_max,min_resolved_period )
+
+! saves binary mesh files for attenuation
+ if( ATTENUATION ) then
+ call get_attenuation_model(myrank,nspec,USE_OLSEN_ATTENUATION, &
+ mustore,rho_vs,qmu_attenuation_store, &
+ ispec_is_elastic,min_resolved_period,prname)
+ endif
+
+
+! VTK file output
+! if( SAVE_MESH_FILES ) then
+! ! saves material flag assigned for each spectral element into a vtk file
+! prname_file = prname(1:len_trim(prname))//'material_flag'
+! allocate(elem_flag(nspec))
+! elem_flag(:) = mat_ext_mesh(1,:)
+! call write_VTK_data_elem_i(nspec,nglob, &
+! xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+! elem_flag,prname_file)
+! deallocate(elem_flag)
+!
+! !plotting abs boundaries
+! ! allocate(itest_flag(nspec))
+! ! itest_flag(:) = 0
+! ! do ispec=1,nspec
+! ! if( iboun(1,ispec) ) itest_flag(ispec) = 1
+! ! enddo
+! ! prname_file = prname(1:len_trim(prname))//'iboundary1_flag'
+! ! call write_VTK_data_elem_i(nspec,nglob, &
+! ! xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+! ! itest_flag,prname_file)
+! ! deallocate(itest_flag)
+! endif
+
+! cleanup
+ if( .not. SAVE_MOHO_MESH ) deallocate(xstore_dummy,ystore_dummy,zstore_dummy)
+ deallocate(xixstore,xiystore,xizstore,&
+ etaxstore,etaystore,etazstore,&
+ gammaxstore,gammaystore,gammazstore)
+ deallocate(jacobianstore,qmu_attenuation_store)
+ deallocate(kappastore,mustore,rho_vp,rho_vs)
+
+end subroutine create_regions_mesh_ext
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine crm_ext_allocate_arrays(nspec,LOCAL_PATH,myrank, &
+ nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
+ nspec2D_bottom,nspec2D_top,ANISOTROPY)
+
+ use create_regions_mesh_ext_par
+ implicit none
+
+ integer :: nspec,myrank
+ integer :: nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
+ nspec2D_bottom,nspec2D_top
+
+ character(len=256) :: LOCAL_PATH
+
+ logical :: ANISOTROPY
+
+! local parameters
+ integer :: ier
+
+! memory test
+! logical,dimension(:),allocatable :: test_mem
+!
+! tests memory availability (including some small buffer of 10*1024 byte)
+! allocate( test_mem(int(max_static_memory_size)+10*1024),stat=ier)
+! if(ier /= 0) then
+! write(IMAIN,*) 'error: try to increase the available process stack size by'
+! write(IMAIN,*) ' ulimit -s **** '
+! call exit_MPI(myrank,'not enough memory to allocate arrays')
+! endif
+! test_mem(:) = .true.
+! deallocate( test_mem, stat=ier)
+! if(ier /= 0) call exit_MPI(myrank,'error to allocate arrays')
+! call sync_all()
+
+ allocate( xelm(NGNOD),yelm(NGNOD),zelm(NGNOD),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array xelm etc.'
+
+ allocate( qmu_attenuation_store(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! create the name for the database of the current slide and region
+ call create_name_database(prname,myrank,LOCAL_PATH)
+
+! Gauss-Lobatto-Legendre points of integration
+ allocate(xigll(NGLLX),yigll(NGLLY),zigll(NGLLZ),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array xigll etc.'
+
+! Gauss-Lobatto-Legendre weights of integration
+ allocate(wxgll(NGLLX),wygll(NGLLY),wzgll(NGLLZ),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array wxgll etc.'
+
+! 3D shape functions and their derivatives
+ allocate(shape3D(NGNOD,NGLLX,NGLLY,NGLLZ), &
+ dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array shape3D etc.'
+
+! 2D shape functions and their derivatives
+ allocate(shape2D_x(NGNOD2D,NGLLY,NGLLZ), &
+ shape2D_y(NGNOD2D,NGLLX,NGLLZ), &
+ shape2D_bottom(NGNOD2D,NGLLX,NGLLY), &
+ shape2D_top(NGNOD2D,NGLLX,NGLLY),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array shape2D_x etc.'
+
+ allocate(dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ), &
+ dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ), &
+ dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY), &
+ dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array dershape2D_x etc.'
+
+ allocate(wgllwgll_xy(NGLLX,NGLLY), &
+ wgllwgll_xz(NGLLX,NGLLZ), &
+ wgllwgll_yz(NGLLY,NGLLZ),stat=ier)
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! Stacey
+ allocate(rho_vp(NGLLX,NGLLY,NGLLZ,nspec), &
+ rho_vs(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! array with model density
+ allocate(rhostore(NGLLX,NGLLY,NGLLZ,nspec), &
+ kappastore(NGLLX,NGLLY,NGLLZ,nspec), &
+ mustore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ !vpstore(NGLLX,NGLLY,NGLLZ,nspec), &
+ !vsstore(NGLLX,NGLLY,NGLLZ,nspec),
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! arrays with mesh parameters
+ allocate(xixstore(NGLLX,NGLLY,NGLLZ,nspec), &
+ xiystore(NGLLX,NGLLY,NGLLZ,nspec), &
+ xizstore(NGLLX,NGLLY,NGLLZ,nspec), &
+ etaxstore(NGLLX,NGLLY,NGLLZ,nspec), &
+ etaystore(NGLLX,NGLLY,NGLLZ,nspec), &
+ etazstore(NGLLX,NGLLY,NGLLZ,nspec), &
+ gammaxstore(NGLLX,NGLLY,NGLLZ,nspec), &
+ gammaystore(NGLLX,NGLLY,NGLLZ,nspec), &
+ gammazstore(NGLLX,NGLLY,NGLLZ,nspec), &
+ jacobianstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! absorbing boundary
+ ! absorbing faces
+ num_abs_boundary_faces = nspec2D_xmin + nspec2D_xmax + nspec2D_ymin + nspec2D_ymax + nspec2D_bottom
+ ! adds faces of free surface if it also absorbs
+ if( ABSORB_FREE_SURFACE ) num_abs_boundary_faces = num_abs_boundary_faces + nspec2D_top
+
+ ! allocates arrays to store info for each face (assumes NGLLX=NGLLY=NGLLZ)
+ allocate( abs_boundary_ispec(num_abs_boundary_faces), &
+ abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces), &
+ abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces), &
+ abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces),stat=ier)
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+ ! free surface faces
+ num_free_surface_faces = nspec2D_top
+
+ ! allocates arrays to store info for each face (assumes NGLLX=NGLLY=NGLLZ)
+ allocate( free_surface_ispec(num_free_surface_faces), &
+ free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces), &
+ free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces), &
+ free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces),stat=ier)
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! array with anisotropy
+ if( ANISOTROPY ) then
+ NSPEC_ANISO = nspec
+ else
+ NSPEC_ANISO = 1
+ endif
+ allocate(c11store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c12store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c13store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c14store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c15store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c16store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c22store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c23store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c24store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c25store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c26store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c33store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c34store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c35store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c36store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c44store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c45store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c46store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c55store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c56store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO), &
+ c66store(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO),stat=ier)
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! material flags
+ allocate( ispec_is_acoustic(nspec), &
+ ispec_is_elastic(nspec), &
+ ispec_is_poroelastic(nspec), stat=ier)
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+end subroutine crm_ext_allocate_arrays
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine crm_ext_setup_jacobian(myrank, &
+ xstore,ystore,zstore,nspec, &
+ nodes_coords_ext_mesh,nnodes_ext_mesh,&
+ elmnts_ext_mesh,nelmnts_ext_mesh)
+
+ use create_regions_mesh_ext_par
+ implicit none
+
+! number of spectral elements in each block
+ integer :: nspec
+
+ double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+
+! data from the external mesh
+ integer :: nnodes_ext_mesh,nelmnts_ext_mesh
+ double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
+ integer, dimension(ESIZE,nelmnts_ext_mesh) :: elmnts_ext_mesh
+
+! proc numbers for MPI
+ integer :: myrank
+
+! local parameters
+ integer :: ispec,ia,i,j,k
+
+! set up coordinates of the Gauss-Lobatto-Legendre points
+ call zwgljd(xigll,wxgll,NGLLX,GAUSSALPHA,GAUSSBETA)
+ call zwgljd(yigll,wygll,NGLLY,GAUSSALPHA,GAUSSBETA)
+ call zwgljd(zigll,wzgll,NGLLZ,GAUSSALPHA,GAUSSBETA)
+
+! if number of points is odd, the middle abscissa is exactly zero
+ if(mod(NGLLX,2) /= 0) xigll((NGLLX-1)/2+1) = ZERO
+ if(mod(NGLLY,2) /= 0) yigll((NGLLY-1)/2+1) = ZERO
+ if(mod(NGLLZ,2) /= 0) zigll((NGLLZ-1)/2+1) = ZERO
+
+! get the 3-D shape functions
+ call get_shape3D(myrank,shape3D,dershape3D,xigll,yigll,zigll)
+
+! get the 2-D shape functions
+ call get_shape2D(myrank,shape2D_x,dershape2D_x,yigll,zigll,NGLLY,NGLLZ)
+ call get_shape2D(myrank,shape2D_y,dershape2D_y,xigll,zigll,NGLLX,NGLLZ)
+ call get_shape2D(myrank,shape2D_bottom,dershape2D_bottom,xigll,yigll,NGLLX,NGLLY)
+ call get_shape2D(myrank,shape2D_top,dershape2D_top,xigll,yigll,NGLLX,NGLLY)
+
+! 2D weights
+ do j=1,NGLLY
+ do i=1,NGLLX
+ wgllwgll_xy(i,j) = wxgll(i)*wygll(j)
+ enddo
+ enddo
+ do k=1,NGLLZ
+ do i=1,NGLLX
+ wgllwgll_xz(i,k) = wxgll(i)*wzgll(k)
+ enddo
+ enddo
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ wgllwgll_yz(j,k) = wygll(j)*wzgll(k)
+ enddo
+ enddo
+
+! point locations
+ xstore(:,:,:,:) = 0.d0
+ ystore(:,:,:,:) = 0.d0
+ zstore(:,:,:,:) = 0.d0
+
+ do ispec = 1, nspec
+ !call get_xyzelm(xelm, yelm, zelm, ispec, elmnts_ext_mesh, nodes_coords_ext_mesh, nspec, nnodes_ext_mesh)
+ do ia = 1,NGNOD
+ xelm(ia) = nodes_coords_ext_mesh(1,elmnts_ext_mesh(ia,ispec))
+ yelm(ia) = nodes_coords_ext_mesh(2,elmnts_ext_mesh(ia,ispec))
+ zelm(ia) = nodes_coords_ext_mesh(3,elmnts_ext_mesh(ia,ispec))
+ enddo
+
+ ! CUBIT should provide a mesh ordering such that the 3D jacobian is defined
+ ! (otherwise mesh would be degenerated)
+ call calc_jacobian(myrank,xixstore,xiystore,xizstore, &
+ etaxstore,etaystore,etazstore, &
+ gammaxstore,gammaystore,gammazstore,jacobianstore, &
+ xstore,ystore,zstore, &
+ xelm,yelm,zelm,shape3D,dershape3D,ispec,nspec)
+
+ enddo
+
+end subroutine crm_ext_setup_jacobian
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine crm_ext_setup_indexing(ibool, &
+ xstore,ystore,zstore,nspec,nglob,npointot, &
+ nnodes_ext_mesh,nodes_coords_ext_mesh,myrank)
+
+! creates global indexing array ibool
+
+ use create_regions_mesh_ext_par
+ implicit none
+
+! number of spectral elements in each block
+ integer :: nspec,nglob,npointot,myrank
+
+! arrays with the mesh
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+ double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+
+! data from the external mesh
+ integer :: nnodes_ext_mesh
+ double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
+
+! local parameters
+! variables for creating array ibool
+ double precision, dimension(:), allocatable :: xp,yp,zp
+ integer, dimension(:), allocatable :: locval
+ logical, dimension(:), allocatable :: ifseg
+
+ integer :: ieoff,ilocnum,ier
+ integer :: i,j,k,ispec,iglobnum
+
+! allocate memory for arrays
+ allocate(locval(npointot), &
+ ifseg(npointot), &
+ xp(npointot), &
+ yp(npointot), &
+ zp(npointot),stat=ier)
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+! creates temporary global point arrays
+ locval = 0
+ ifseg = .false.
+ xp = 0.d0
+ yp = 0.d0
+ zp = 0.d0
+
+ do ispec=1,nspec
+ ieoff = NGLLX * NGLLY * NGLLZ * (ispec-1)
+ ilocnum = 0
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ ilocnum = ilocnum + 1
+ xp(ilocnum+ieoff) = xstore(i,j,k,ispec)
+ yp(ilocnum+ieoff) = ystore(i,j,k,ispec)
+ zp(ilocnum+ieoff) = zstore(i,j,k,ispec)
+ enddo
+ enddo
+ enddo
+ enddo
+
+! gets ibool indexing from local (GLL points) to global points
+ call get_global(nspec,xp,yp,zp,ibool,locval,ifseg,nglob,npointot, &
+ minval(nodes_coords_ext_mesh(1,:)),maxval(nodes_coords_ext_mesh(1,:)))
+
+!- we can create a new indirect addressing to reduce cache misses
+ call get_global_indirect_addressing(nspec,nglob,ibool)
+
+!cleanup
+ deallocate(xp,stat=ier); if(ier /= 0) stop 'error in deallocate'
+ deallocate(yp,stat=ier); if(ier /= 0) stop 'error in deallocate'
+ deallocate(zp,stat=ier); if(ier /= 0) stop 'error in deallocate'
+ deallocate(locval,stat=ier); if(ier /= 0) stop 'error in deallocate'
+ deallocate(ifseg,stat=ier); if(ier /= 0) stop 'error in deallocate'
+
+! unique global point locations
+ allocate(xstore_dummy(nglob), &
+ ystore_dummy(nglob), &
+ zstore_dummy(nglob),stat=ier)
+ if(ier /= 0) stop 'error in allocate'
+ do ispec = 1, nspec
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglobnum = ibool(i,j,k,ispec)
+ xstore_dummy(iglobnum) = xstore(i,j,k,ispec)
+ ystore_dummy(iglobnum) = ystore(i,j,k,ispec)
+ zstore_dummy(iglobnum) = zstore(i,j,k,ispec)
+ enddo
+ enddo
+ enddo
+ enddo
+
+ end subroutine crm_ext_setup_indexing
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine create_regions_mesh_save_moho( myrank,nglob,nspec, &
+ nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
+ nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
+
+ use create_regions_mesh_ext_par
+ implicit none
+
+ integer :: nspec2D_moho_ext
+ integer, dimension(nspec2D_moho_ext) :: ibelm_moho
+ integer, dimension(4,nspec2D_moho_ext) :: nodes_ibelm_moho
+
+ integer :: myrank,nglob,nspec
+
+ ! data from the external mesh
+ integer :: nnodes_ext_mesh
+ double precision, dimension(NDIM,nnodes_ext_mesh) :: nodes_coords_ext_mesh
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+! local parameters
+ ! Moho mesh
+ real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_top
+ real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_bot
+ integer,dimension(:,:,:),allocatable :: ijk_moho_top, ijk_moho_bot
+ integer,dimension(:),allocatable :: ibelm_moho_top, ibelm_moho_bot
+ integer :: NSPEC2D_MOHO
+ logical, dimension(:),allocatable :: is_moho_top, is_moho_bot
+
+ real(kind=CUSTOM_REAL),dimension(NGNOD2D) :: xcoord,ycoord,zcoord
+ real(kind=CUSTOM_REAL) :: jacobian2Dw_face(NGLLX,NGLLY)
+ real(kind=CUSTOM_REAL) :: normal_face(NDIM,NGLLX,NGLLY)
+ real(kind=CUSTOM_REAL),dimension(NDIM):: normal
+ integer :: ijk_face(3,NGLLX,NGLLY)
+
+ real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: iglob_normals
+ integer,dimension(:),allocatable:: iglob_is_surface
+
+ integer :: imoho_bot,imoho_top
+ integer :: ispec2D,ispec,icorner,iface,i,j,k,igll,iglob,ier
+ integer :: iglob_midpoint,idirect,counter
+ integer :: imoho_top_all,imoho_bot_all,imoho_all
+
+ ! corners indices of reference cube faces
+ integer,dimension(3,4),parameter :: iface1_corner_ijk = &
+ reshape( (/ 1,1,1, 1,NGLLY,1, 1,NGLLY,NGLLZ, 1,1,NGLLZ /),(/3,4/)) ! xmin
+ integer,dimension(3,4),parameter :: iface2_corner_ijk = &
+ reshape( (/ NGLLX,1,1, NGLLX,NGLLY,1, NGLLX,NGLLY,NGLLZ, NGLLX,1,NGLLZ /),(/3,4/)) ! xmax
+ integer,dimension(3,4),parameter :: iface3_corner_ijk = &
+ reshape( (/ 1,1,1, 1,1,NGLLZ, NGLLX,1,NGLLZ, NGLLX,1,1 /),(/3,4/)) ! ymin
+ integer,dimension(3,4),parameter :: iface4_corner_ijk = &
+ reshape( (/ 1,NGLLY,1, NGLLX,NGLLY,1, NGLLX,NGLLY,NGLLZ, 1,NGLLY,NGLLZ /),(/3,4/)) ! ymax
+ integer,dimension(3,4),parameter :: iface5_corner_ijk = &
+ reshape( (/ 1,1,1, 1,NGLLY,1, NGLLX,NGLLY,1, NGLLX,1,1 /),(/3,4/)) ! bottom
+ integer,dimension(3,4),parameter :: iface6_corner_ijk = &
+ reshape( (/ 1,1,NGLLZ, NGLLX,1,NGLLZ, NGLLX,NGLLY,NGLLZ, 1,NGLLY,NGLLZ /),(/3,4/)) ! top
+ integer,dimension(3,4,6),parameter :: iface_all_corner_ijk = &
+ reshape( (/ iface1_corner_ijk,iface2_corner_ijk, &
+ iface3_corner_ijk,iface4_corner_ijk, &
+ iface5_corner_ijk,iface6_corner_ijk /),(/3,4,6/)) ! all faces
+ ! midpoint indices for each face (xmin,xmax,ymin,ymax,zmin,zmax)
+ integer,dimension(3,6),parameter :: iface_all_midpointijk = &
+ reshape( (/ 1,2,2, NGLLX,2,2, 2,1,2, 2,NGLLY,2, 2,2,1, 2,2,NGLLZ /),(/3,6/)) ! top
+
+ ! temporary arrays for passing information
+ allocate(iglob_is_surface(nglob), &
+ iglob_normals(NDIM,nglob),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array iglob_is_surface'
+
+ iglob_is_surface = 0
+ iglob_normals = 0._CUSTOM_REAL
+
+ ! loops over given moho surface elements
+ do ispec2D=1, nspec2D_moho_ext
+
+ ! gets element id
+ ispec = ibelm_moho(ispec2D)
+
+ ! looks for i,j,k indices of GLL points on boundary face
+ ! determines element face by given CUBIT corners
+ ! (note: uses point locations rather than point indices to find the element face,
+ ! because the indices refer no more to the newly indexed ibool array )
+ do icorner=1,NGNOD2D
+ xcoord(icorner) = nodes_coords_ext_mesh(1,nodes_ibelm_moho(icorner,ispec2D))
+ ycoord(icorner) = nodes_coords_ext_mesh(2,nodes_ibelm_moho(icorner,ispec2D))
+ zcoord(icorner) = nodes_coords_ext_mesh(3,nodes_ibelm_moho(icorner,ispec2D))
+ enddo
+
+ ! sets face id of reference element associated with this face
+ call get_element_face_id(ispec,xcoord,ycoord,zcoord, &
+ ibool,nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ iface)
+
+ ! ijk indices of GLL points for face id
+ call get_element_face_gll_indices(iface,ijk_face,NGLLX,NGLLZ)
+
+ ! weighted jacobian and normal
+ call get_jacobian_boundary_face(myrank,nspec, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
+ dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
+ ispec,iface,jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
+
+ ! normal convention: points away from element
+ ! switch normal direction if necessary
+ do j=1,NGLLY
+ do i=1,NGLLX
+ call get_element_face_normal(ispec,iface,xcoord,ycoord,zcoord, &
+ ibool,nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ normal_face(:,i,j) )
+ enddo
+ enddo
+
+ ! stores information on global points on moho surface
+ igll = 0
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(ijk_face(1,i,j),ijk_face(2,i,j),ijk_face(3,i,j),ispec)
+ ! sets flag
+ iglob_is_surface(iglob) = ispec2D
+ ! sets normals
+ iglob_normals(:,iglob) = normal_face(:,i,j)
+ enddo
+ enddo
+ enddo
+
+ ! stores moho elements
+ NSPEC2D_MOHO = nspec2D_moho_ext
+
+ allocate(ibelm_moho_bot(NSPEC2D_MOHO), &
+ ibelm_moho_top(NSPEC2D_MOHO), &
+ normal_moho_top(NDIM,NGLLSQUARE,NSPEC2D_MOHO), &
+ normal_moho_bot(NDIM,NGLLSQUARE,NSPEC2D_MOHO), &
+ ijk_moho_bot(3,NGLLSQUARE,NSPEC2D_MOHO), &
+ ijk_moho_top(3,NGLLSQUARE,NSPEC2D_MOHO),stat=ier)
+ if( ier /= 0 ) stop 'error allocating ibelm_moho_bot'
+
+ ibelm_moho_bot = 0
+ ibelm_moho_top = 0
+
+ ! element flags
+ allocate(is_moho_top(nspec), &
+ is_moho_bot(nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating is_moho_top'
+ is_moho_top = .false.
+ is_moho_bot = .false.
+
+ ! finds spectral elements with moho surface
+ imoho_top = 0
+ imoho_bot = 0
+ do ispec=1,nspec
+
+ ! loops over each face
+ do iface = 1,6
+ ! checks if corners of face on surface
+ counter = 0
+ do icorner = 1,NGNOD2D
+ i = iface_all_corner_ijk(1,icorner,iface)
+ j = iface_all_corner_ijk(2,icorner,iface)
+ k = iface_all_corner_ijk(3,icorner,iface)
+ iglob = ibool(i,j,k,ispec)
+
+ ! checks if point on surface
+ if( iglob_is_surface(iglob) > 0 ) then
+ counter = counter+1
+
+ ! reference corner coordinates
+ xcoord(icorner) = xstore_dummy(iglob)
+ ycoord(icorner) = ystore_dummy(iglob)
+ zcoord(icorner) = zstore_dummy(iglob)
+ endif
+ enddo
+
+ ! stores moho informations
+ if( counter == NGNOD2D ) then
+
+ ! gets face GLL points i,j,k indices from element face
+ call get_element_face_gll_indices(iface,ijk_face,NGLLX,NGLLY)
+
+ ! re-computes face infos
+ ! weighted jacobian and normal
+ call get_jacobian_boundary_face(myrank,nspec, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
+ dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
+ ispec,iface,jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
+
+ ! normal convention: points away from element
+ ! switch normal direction if necessary
+ do j=1,NGLLZ
+ do i=1,NGLLX
+ call get_element_face_normal(ispec,iface,xcoord,ycoord,zcoord, &
+ ibool,nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ normal_face(:,i,j) )
+ enddo
+ enddo
+
+ ! takes normal stored temporary on a face midpoint
+ i = iface_all_midpointijk(1,iface)
+ j = iface_all_midpointijk(2,iface)
+ k = iface_all_midpointijk(3,iface)
+ iglob_midpoint = ibool(i,j,k,ispec)
+ normal(:) = iglob_normals(:,iglob_midpoint)
+
+ ! determines whether normal points into element or not (top/bottom distinction)
+ call get_element_face_normal_idirect(ispec,iface,xcoord,ycoord,zcoord, &
+ ibool,nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ normal,idirect )
+
+ ! takes moho surface element id given by id on midpoint
+ ispec2D = iglob_is_surface(iglob_midpoint)
+
+ ! sets face infos for bottom (normal points away from element)
+ if( idirect == 1 ) then
+
+ ! checks validity
+ if( is_moho_bot( ispec) .eqv. .true. ) then
+ print*,'error: moho surface geometry bottom'
+ print*,' does not allow for mulitple element faces in kernel computation'
+ call exit_mpi(myrank,'error moho bottom elements')
+ endif
+
+ imoho_bot = imoho_bot + 1
+ is_moho_bot(ispec) = .true.
+ ibelm_moho_bot(ispec2D) = ispec
+
+ ! stores on surface gll points (assuming NGLLX = NGLLY = NGLLZ)
+ igll = 0
+ do j=1,NGLLZ
+ do i=1,NGLLX
+ igll = igll+1
+ ijk_moho_bot(:,igll,ispec2D) = ijk_face(:,i,j)
+ normal_moho_bot(:,igll,ispec2D) = normal_face(:,i,j)
+ enddo
+ enddo
+
+ ! sets face infos for top element
+ else if( idirect == 2 ) then
+
+ ! checks validity
+ if( is_moho_top( ispec) .eqv. .true. ) then
+ print*,'error: moho surface geometry top'
+ print*,' does not allow for mulitple element faces kernel computation'
+ call exit_mpi(myrank,'error moho top elements')
+ endif
+
+ imoho_top = imoho_top + 1
+ is_moho_top(ispec) = .true.
+ ibelm_moho_top(ispec2D) = ispec
+
+ ! gll points
+ igll = 0
+ do j=1,NGLLZ
+ do i=1,NGLLX
+ igll = igll+1
+ ijk_moho_top(:,igll,ispec) = ijk_face(:,i,j)
+ ! note: top elements have normal pointing into element
+ normal_moho_top(:,igll,ispec) = - normal_face(:,i,j)
+ enddo
+ enddo
+ endif
+
+ endif ! counter
+
+ enddo ! iface
+
+ ! checks validity of top/bottom distinction
+ if( is_moho_top(ispec) .and. is_moho_bot(ispec) ) then
+ print*,'error: moho surface elements confusing'
+ print*,' element:',ispec,'has top and bottom surface'
+ call exit_mpi(myrank,'error moho surface element')
+ endif
+
+ enddo ! ispec2D
+
+ ! note: surface e.g. could be at the free-surface and have no top elements etc...
+ ! user output
+ call sum_all_i( imoho_top, imoho_top_all )
+ call sum_all_i( imoho_bot, imoho_bot_all )
+ call sum_all_i( NSPEC2D_MOHO, imoho_all )
+ if( myrank == 0 ) then
+ write(IMAIN,*) '********'
+ write(IMAIN,*) 'Moho surface:'
+ write(IMAIN,*) ' total surface elements: ',imoho_all
+ write(IMAIN,*) ' top elements :',imoho_top_all
+ write(IMAIN,*) ' bottom elements:',imoho_bot_all
+ write(IMAIN,*) '********'
+ endif
+
+ ! saves moho files: total number of elements, corner points, all points
+ open(unit=27,file=prname(1:len_trim(prname))//'ibelm_moho.bin',status='unknown',form='unformatted')
+ write(27) NSPEC2D_MOHO
+ write(27) ibelm_moho_top
+ write(27) ibelm_moho_bot
+ write(27) ijk_moho_top
+ write(27) ijk_moho_bot
+ close(27)
+ open(unit=27,file=prname(1:len_trim(prname))//'normal_moho.bin',status='unknown',form='unformatted')
+ write(27) normal_moho_top
+ write(27) normal_moho_bot
+ close(27)
+ open(unit=27,file=prname(1:len_trim(prname))//'is_moho.bin',status='unknown',form='unformatted')
+ write(27) is_moho_top
+ write(27) is_moho_bot
+ close(27)
+
+ end subroutine create_regions_mesh_save_moho
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,1006 +1,1068 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-!
-! United States and French Government Sponsorship Acknowledged.
-!
-!=============================================================================!
-! !
-! generate_databases produces a spectral element grid !
-! for a local or regional model. !
-! The mesher uses the UTM projection !
-! !
-!=============================================================================!
-!
-! If you use this code for your own research, please cite at least one article
-! written by the developers of the package, for instance:
-!
-! @ARTICLE{TrKoLi08,
-! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
-! title = {Spectral-Element and Adjoint Methods in Seismology},
-! journal = {Communications in Computational Physics},
-! year = {2008},
-! volume = {3},
-! pages = {1-32},
-! number = {1}}
-!
-! or
-!
-! @ARTICLE{LiPoKoTr04,
-! author = {Qinya Liu and Jascha Polet and Dimitri Komatitsch and Jeroen Tromp},
-! title = {Spectral-element moment tensor inversions for earthquakes in {S}outhern {C}alifornia},
-! journal={Bull. Seismol. Soc. Am.},
-! year = {2004},
-! volume = {94},
-! pages = {1748-1761},
-! number = {5},
-! doi = {10.1785/012004038}}
-!
-! @INCOLLECTION{ChKoViCaVaFe07,
-! author = {Emmanuel Chaljub and Dimitri Komatitsch and Jean-Pierre Vilotte and
-! Yann Capdeville and Bernard Valette and Gaetano Festa},
-! title = {Spectral Element Analysis in Seismology},
-! booktitle = {Advances in Wave Propagation in Heterogeneous Media},
-! publisher = {Elsevier - Academic Press},
-! year = {2007},
-! editor = {Ru-Shan Wu and Val\'erie Maupin},
-! volume = {48},
-! series = {Advances in Geophysics},
-! pages = {365-419}}
-!
-! @ARTICLE{KoVi98,
-! author={D. Komatitsch and J. P. Vilotte},
-! title={The spectral-element method: an efficient tool to simulate the seismic response of 2{D} and 3{D} geological structures},
-! journal={Bull. Seismol. Soc. Am.},
-! year=1998,
-! volume=88,
-! number=2,
-! pages={368-392}}
-!
-! @ARTICLE{KoTr99,
-! author={D. Komatitsch and J. Tromp},
-! year=1999,
-! title={Introduction to the spectral-element method for 3-{D} seismic wave propagation},
-! journal={Geophys. J. Int.},
-! volume=139,
-! number=3,
-! pages={806-822},
-! doi={10.1046/j.1365-246x.1999.00967.x}}
-!
-! @ARTICLE{KoLiTrSuStSh04,
-! author={Dimitri Komatitsch and Qinya Liu and Jeroen Tromp and Peter S\"{u}ss
-! and Christiane Stidham and John H. Shaw},
-! year=2004,
-! title={Simulations of Ground Motion in the {L}os {A}ngeles {B}asin
-! based upon the Spectral-Element Method},
-! journal={Bull. Seism. Soc. Am.},
-! volume=94,
-! number=1,
-! pages={187-206}}
-!
-! and/or another article from http://web.univ-pau.fr/~dkomati1/publications.html
-!
-!
-! If you use the kernel capabilities of the code, please cite at least one article
-! written by the developers of the package, for instance:
-!
-! @ARTICLE{TrKoLi08,
-! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
-! title = {Spectral-Element and Adjoint Methods in Seismology},
-! journal = {Communications in Computational Physics},
-! year = {2008},
-! volume = {3},
-! pages = {1-32},
-! number = {1}}
-!
-! or
-!
-! @ARTICLE{LiTr06,
-! author={Qinya Liu and Jeroen Tromp},
-! title={Finite-frequency kernels based on adjoint methods},
-! journal={Bull. Seismol. Soc. Am.},
-! year=2006,
-! volume=96,
-! number=6,
-! pages={2383-2397},
-! doi={10.1785/0120060041}}
-!
-!
-! Reference frame - convention:
-! ----------------------------
-!
-! The code uses the following convention for the reference frame:
-!
-! - X axis is East
-! - Y axis is North
-! - Z axis is up
-!
-! Note that this convention is different from both the Aki-Richards convention
-! and the Harvard CMT convention.
-!
-! Let us recall that the Aki-Richards convention is:
-!
-! - X axis is North
-! - Y axis is East
-! - Z axis is down
-!
-! and that the Harvard CMT convention is:
-!
-! - X axis is South
-! - Y axis is East
-! - Z axis is up
-!
-! To report bugs or suggest improvements to the code, please send an email
-! to Jeroen Tromp <jtromp AT princeton.edu> and/or use our online
-! bug tracking system at http://www.geodynamics.org/roundup .
-!
-! Evolution of the code:
-! ---------------------
-!
-! MPI v. 2.0 "Sesame" (Spectral ElementS on Any MEsh), November 2010:
-! Dimitri Komatitsch, Nicolas Le Goff, Roland Martin and Pieyre Le Loher, University of Pau, France,
-! Jeroen Tromp and the Princeton group of developers, Princeton University, USA,
-! and Emanuele Casarotti, INGV Roma, Italy:
-! support for CUBIT meshes decomposed by SCOTCH, METIS or ZOLTAN;
-! much faster solver using Michel Deville's inlined matrix products.
-!
-! MPI v. 1.4 Dimitri Komatitsch, University of Pau, Qinya Liu and others, Caltech, September 2006:
-! better adjoint and kernel calculations, faster and better I/Os
-! on very large systems, many small improvements and bug fixes
-!
-! MPI v. 1.3 Dimitri Komatitsch, University of Pau, and Qinya Liu, Caltech, July 2005:
-! serial version, regular mesh, adjoint and kernel calculations, ParaView support
-!
-! MPI v. 1.2 Min Chen and Dimitri Komatitsch, Caltech, July 2004:
-! full anisotropy, volume movie
-!
-! MPI v. 1.1 Dimitri Komatitsch, Caltech, October 2002: Zhu's Moho map, scaling
-! of Vs with depth, Hauksson's regional model, attenuation, oceans, movies
-!
-! MPI v. 1.0 Dimitri Komatitsch, Caltech, May 2002: first MPI version based on global code
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- module generate_databases_par
-
- implicit none
-
- include "constants.h"
-
-! number of spectral elements in each block
- integer nspec,npointot
-
-! local to global indexing array
- integer, dimension(:,:,:,:), allocatable :: ibool
-
-! arrays with the mesh in double precision
- double precision, dimension(:,:,:,:), allocatable :: xstore,ystore,zstore
-
-! proc numbers for MPI
- integer :: myrank,sizeprocs,ier
-
-! use integer array to store topography values
- integer :: UTM_PROJECTION_ZONE
- logical :: SUPPRESS_UTM_PROJECTION
- integer :: NX_TOPO,NY_TOPO
- double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
- character(len=100) :: topo_file
- integer, dimension(:,:), allocatable :: itopo_bathy
-
-! timer MPI
- double precision, external :: wtime
- double precision :: time_start,tCPU
-
-! parameters read from parameter file
- integer :: NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,SIMULATION_TYPE
- integer :: NSOURCES
-
- double precision :: DT,HDUR_MOVIE
-
- logical :: ATTENUATION,USE_OLSEN_ATTENUATION, &
- OCEANS, TOPOGRAPHY, SAVE_FORWARD
- logical :: ANISOTROPY,ABSORBING_CONDITIONS,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
-
- logical :: MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
- USE_HIGHRES_FOR_MOVIES
- integer :: NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO,NTSTEP_BETWEEN_READ_ADJSRC
-
- character(len=256) OUTPUT_FILES,LOCAL_PATH
-
-! parameters deduced from parameters read from file
- integer :: NPROC
-
-! static memory size that will be needed by the solver
- double precision :: max_static_memory_size,max_static_memory_size_request
-
-! this for all the regions
- integer NSPEC_AB,NGLOB_AB
-
- integer NSPEC2D_BOTTOM,NSPEC2D_TOP
-
- double precision min_elevation,max_elevation
- double precision min_elevation_all,max_elevation_all
-
-! for Databases of external meshes
- character(len=256) prname
- integer :: dummy_node
- integer :: dummy_elmnt
- integer :: ispec, inode, num_interface,ie,imat,iface,icorner
- integer :: nnodes_ext_mesh, nelmnts_ext_mesh
- integer :: num_interfaces_ext_mesh
- integer :: max_interface_size_ext_mesh
- integer :: nmat_ext_mesh, nundefMat_ext_mesh
- integer, dimension(:), allocatable :: my_neighbours_ext_mesh
- integer, dimension(:), allocatable :: my_nelmnts_neighbours_ext_mesh
- integer, dimension(:,:,:), allocatable :: my_interfaces_ext_mesh
- integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh
- integer, dimension(:), allocatable :: nibool_interfaces_ext_mesh
- double precision, dimension(:,:), allocatable :: nodes_coords_ext_mesh
- integer, dimension(:,:), allocatable :: elmnts_ext_mesh
- integer, dimension(:,:), allocatable :: mat_ext_mesh
- integer :: max_nibool_interfaces_ext_mesh
- integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh_dummy
-
-! boundaries and materials
- integer :: ispec2D, boundary_number
- integer :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, nspec2D_bottom_ext, nspec2D_top_ext
- character (len=30), dimension(:,:), allocatable :: undef_mat_prop
- integer, dimension(:), allocatable :: ibelm_xmin,ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top
- integer, dimension(:,:), allocatable :: nodes_ibelm_xmin,nodes_ibelm_xmax, &
- nodes_ibelm_ymin, nodes_ibelm_ymax, nodes_ibelm_bottom, nodes_ibelm_top
- double precision, dimension(:,:), allocatable :: materials_ext_mesh
-
-! moho (optional)
- integer :: nspec2D_moho_ext
- integer, dimension(:), allocatable :: ibelm_moho
- integer, dimension(:,:), allocatable :: nodes_ibelm_moho
-
- integer :: nglob,nglob_total,nspec_total
-
- logical,dimension(:),allocatable :: ispec_is_surface_external_mesh,iglob_is_surface_external_mesh
- integer :: nfaces_surface_ext_mesh,nfaces_surface_glob_ext_mesh
-
-! flag for noise simulation
- integer :: NOISE_TOMOGRAPHY
-
- end module generate_databases_par
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine generate_databases
-
- use generate_databases_par
- implicit none
-
-! sizeprocs returns number of processes started (should be equal to NPROC).
-! myrank is the rank of each process, between 0 and NPROC-1.
-! as usual in MPI, process 0 is in charge of coordinating everything
-! and also takes care of the main output
- call world_size(sizeprocs)
- call world_rank(myrank)
-
-! get the base pathname for output files
- call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
-
-! open main output file, only written to by process 0
- if(myrank == 0 .and. IMAIN /= ISTANDARD_OUTPUT) &
- open(unit=IMAIN,file=trim(OUTPUT_FILES)//'/output_mesher.txt',status='unknown')
-
-! get MPI starting time
- time_start = wtime()
-
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) '******************************************'
- write(IMAIN,*) '*** Specfem3D MPI Mesher - f90 version ***'
- write(IMAIN,*) '******************************************'
- write(IMAIN,*)
- endif
-
-! read the parameter file
- call gd_read_parameters()
-
-! makes sure processes are synchronized
- call sync_all()
-
-! reads topography and bathymetry file
- call gd_read_topography()
-
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) '**************************'
- write(IMAIN,*) 'creating mesh in the model'
- write(IMAIN,*) '**************************'
- write(IMAIN,*)
- endif
-
-! reads Databases files
- call gd_read_partition_files()
-
-! external mesh creation
- call gd_setup_mesh()
-
-! finalize mesher
- call gd_finalize()
-
- end subroutine generate_databases
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine gd_read_parameters
-
-! reads and checks user input parameters
-
- use generate_databases_par
- implicit none
-
-! reads Par_file
- call read_parameter_file( NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
- ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
- OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
- MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
- NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
- SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION, &
- NTSTEP_BETWEEN_OUTPUT_INFO,SIMULATION_TYPE,SAVE_FORWARD, &
- NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
-
-! check that the code is running with the requested nb of processes
- if(sizeprocs /= NPROC) then
- if( myrank == 0 ) then
- write(IMAIN,*) 'error: number of processors supposed to run on: ',NPROC
- write(IMAIN,*) 'error: number of processors actually run on: ',sizeprocs
- endif
- call exit_MPI(myrank,'wrong number of MPI processes')
- endif
-
-! there would be a problem with absorbing boundaries for different NGLLX,NGLLY,NGLLZ values
-! just to be sure for now..
- if( ABSORBING_CONDITIONS ) then
- if( NGLLX /= NGLLY .and. NGLLY /= NGLLZ ) &
- call exit_MPI(myrank,'must have NGLLX = NGLLY = NGLLZ for external meshes')
- endif
-
-! info about external mesh simulation
- if(myrank == 0) then
- write(IMAIN,*) 'This is process ',myrank
- write(IMAIN,*) 'There are ',sizeprocs,' MPI processes'
- write(IMAIN,*) 'Processes are numbered from 0 to ',sizeprocs-1
- write(IMAIN,*)
- write(IMAIN,*) 'There is a total of ',NPROC,' slices'
- write(IMAIN,*)
- write(IMAIN,*) 'NGLLX = ',NGLLX
- write(IMAIN,*) 'NGLLY = ',NGLLY
- write(IMAIN,*) 'NGLLZ = ',NGLLZ
-
- write(IMAIN,*)
- write(IMAIN,*) 'Shape functions defined by NGNOD = ',NGNOD,' control nodes'
- write(IMAIN,*) 'Surface shape functions defined by NGNOD2D = ',NGNOD2D,' control nodes'
- write(IMAIN,*)
- endif
-
-! check that reals are either 4 or 8 bytes
- if(CUSTOM_REAL /= SIZE_REAL .and. CUSTOM_REAL /= SIZE_DOUBLE) &
- call exit_MPI(myrank,'wrong size of CUSTOM_REAL for reals')
-
- if(NGNOD /= 8) call exit_MPI(myrank,'number of control nodes must be 8')
- if(NGNOD2D /= 4) call exit_MPI(myrank,'elements with 8 points should have NGNOD2D = 4')
-
-! for the number of standard linear solids for attenuation
- if(N_SLS /= 3) call exit_MPI(myrank,'number of SLS must be 3')
-
- ! exclusive movie flags
- if( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP ) then
- MOVIE_SURFACE = .false.
- CREATE_SHAKEMAP = .false.
- endif
-
- ! for noise simulations, we need to save movies at the surface (where the noise is generated)
- ! and thus we force MOVIE_SURFACE to be .true., in order to use variables defined for surface movies later
- if ( NOISE_TOMOGRAPHY /= 0 ) then
- MOVIE_SURFACE = .true.
- CREATE_SHAKEMAP = .false.
- if( ( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP ) .and. myrank == 0 ) then
- write(IMAIN,*) 'error: when running noise simulations ( NOISE_TOMOGRAPHY /= 0 ),'
- write(IMAIN,*) ' we can NOT use EXTERNAL_MESH_MOVIE_SURFACE or EXTERNAL_MESH_CREATE_SHAKEMAP'
- write(IMAIN,*) ' change EXTERNAL_MESH_MOVIE_SURFACE & EXTERNAL_MESH_CREATE_SHAKEMAP in constant.h'
- call exit_MPI(myrank,'incompatible NOISE_TOMOGRAPHY, EXTERNAL_MESH_MOVIE_SURFACE, EXTERNAL_MESH_CREATE_SHAKEMAP')
- endif
- endif
-
-
- if(myrank == 0) then
-! chris: I am not sure if we should suppress the following. topography should appear in the external mesh
-! leave it for now
-
- write(IMAIN,*)
- if(SUPPRESS_UTM_PROJECTION) then
- write(IMAIN,*) 'suppressing UTM projection'
- else
- write(IMAIN,*) 'using UTM projection in region ',UTM_PROJECTION_ZONE
- endif
-
- write(IMAIN,*)
- if(ATTENUATION) then
- write(IMAIN,*) 'incorporating attenuation using ',N_SLS,' standard linear solids'
- if(USE_OLSEN_ATTENUATION) then
- write(IMAIN,*) 'using Olsen''s attenuation'
- else
- write(IMAIN,*) 'not using Olsen''s attenuation'
- endif
- else
- write(IMAIN,*) 'no attenuation'
- endif
-
- write(IMAIN,*)
- if(ANISOTROPY) then
- write(IMAIN,*) 'incorporating anisotropy'
- else
- write(IMAIN,*) 'no anisotropy'
- endif
-
- write(IMAIN,*)
- if(OCEANS) then
- write(IMAIN,*) 'incorporating the oceans using equivalent load'
- if( TOPOGRAPHY ) write(IMAIN,*) ' with elevation from topography file'
- else
- write(IMAIN,*) 'no oceans'
- endif
-
- write(IMAIN,*)
-
- endif
-
- end subroutine gd_read_parameters
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine gd_read_topography
-
-! reads in topography files
-
- use generate_databases_par
- implicit none
-
- if( OCEANS .and. TOPOGRAPHY ) then
-
- ! for Southern California
- NX_TOPO = NX_TOPO_SOCAL
- NY_TOPO = NY_TOPO_SOCAL
- ORIG_LAT_TOPO = ORIG_LAT_TOPO_SOCAL
- ORIG_LONG_TOPO = ORIG_LONG_TOPO_SOCAL
- DEGREES_PER_CELL_TOPO = DEGREES_PER_CELL_TOPO_SOCAL
- topo_file = TOPO_FILE_SOCAL
-
- allocate(itopo_bathy(NX_TOPO,NY_TOPO),stat=ier)
- if( ier /= 0 ) stop 'error allocating array itopo_bathy'
-
- call read_topo_bathy_file(itopo_bathy,NX_TOPO,NY_TOPO,topo_file)
-
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) 'regional topography file read ranges in m from ',minval(itopo_bathy),' to ',maxval(itopo_bathy)
- write(IMAIN,*)
- endif
- else
- NX_TOPO = 1
- NY_TOPO = 1
- allocate(itopo_bathy(NX_TOPO,NY_TOPO),stat=ier)
- if( ier /= 0 ) stop 'error allocating dummy array itopo_bathy'
-
- endif
-
-!! read basement map
-! if(BASEMENT_MAP) then
-! call get_value_string(BASEMENT_MAP_FILE,'model.BASEMENT_MAP_FILE', &
-! '../in_data_files/la_basement/reggridbase2_filtered_ascii.dat')
-! open(unit=55,file=BASEMENT_MAP_FILE,status='old',action='read')
-! do ix=1,NX_BASEMENT
-! do iy=1,NY_BASEMENT
-! read(55,*) iz_basement
-! z_basement(ix,iy) = dble(iz_basement)
-! enddo
-! enddo
-! close(55)
-! endif
-
- end subroutine gd_read_topography
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine gd_read_partition_files
-
-! reads in proc***_Databases files
-
- use generate_databases_par
- implicit none
-
- integer :: num_xmin,num_xmax,num_ymin,num_ymax,num_top,num_bottom,num
- integer :: num_moho
- integer :: j
- character(len=128) :: line
-
-! read databases about external mesh simulation
-! global node coordinates
- call create_name_database(prname,myrank,LOCAL_PATH)
- open(unit=IIN,file=prname(1:len_trim(prname))//'Database',status='old',action='read',form='formatted',iostat=ier)
- if( ier /= 0 ) then
- write(IMAIN,*) 'error opening file: ',prname(1:len_trim(prname))//'Database'
- write(IMAIN,*) 'make sure file exists'
- call exit_mpi(myrank,'error opening database file')
- endif
- read(IIN,*) nnodes_ext_mesh
- allocate(nodes_coords_ext_mesh(NDIM,nnodes_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array nodes_coords_ext_mesh'
- do inode = 1, nnodes_ext_mesh
- read(IIN,*) dummy_node, nodes_coords_ext_mesh(1,inode), nodes_coords_ext_mesh(2,inode), &
- nodes_coords_ext_mesh(3,inode)
- enddo
-
- call sum_all_i(nnodes_ext_mesh,num)
- if(myrank == 0) then
- write(IMAIN,*) ' external mesh points: ',num
- endif
- call sync_all()
-
-! read materials' physical properties
- read(IIN,*) nmat_ext_mesh, nundefMat_ext_mesh
- allocate(materials_ext_mesh(6,nmat_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array materials_ext_mesh'
- do imat = 1, nmat_ext_mesh
- ! format: #(1) rho #(2) vp #(3) vs #(4) Q_flag #(5) anisotropy_flag #(6) material_domain_id
- read(IIN,*) materials_ext_mesh(1,imat), materials_ext_mesh(2,imat), materials_ext_mesh(3,imat), &
- materials_ext_mesh(4,imat), materials_ext_mesh(5,imat), materials_ext_mesh(6,imat)
-
- ! output
- !print*,'materials:',materials_ext_mesh(1,imat), materials_ext_mesh(2,imat), materials_ext_mesh(3,imat), &
- ! materials_ext_mesh(4,imat), materials_ext_mesh(5,imat), materials_ext_mesh(6,imat)
- end do
-
- if(myrank == 0) then
- write(IMAIN,*) ' defined materials: ',nmat_ext_mesh
- endif
- call sync_all()
-
- allocate(undef_mat_prop(6,nundefMat_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array undef_mat_prop'
- do imat = 1, nundefMat_ext_mesh
- ! format example tomography:
- ! -1 tomography elastic tomography_model.xyz 1 2
- ! format example interface:
- ! -1 interface 14 15 1 2
- read(IIN,*) undef_mat_prop(1,imat),undef_mat_prop(2,imat),undef_mat_prop(3,imat),undef_mat_prop(4,imat), &
- undef_mat_prop(5,imat), undef_mat_prop(6,imat)
-
- ! output debug
- !print*,'undefined materials:'
- !print*,undef_mat_prop(:,imat)
- end do
-
- if(myrank == 0) then
- write(IMAIN,*) ' undefined materials: ',nundefMat_ext_mesh
- endif
- call sync_all()
-
-! element indexing
- read(IIN,*) nelmnts_ext_mesh
- allocate(elmnts_ext_mesh(esize,nelmnts_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array elmnts_ext_mesh'
- allocate(mat_ext_mesh(2,nelmnts_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array mat_ext_mesh'
-
- ! reads in material association for each spectral element and corner node indices
- do ispec = 1, nelmnts_ext_mesh
- ! format:
- ! # ispec_local # material_index_1 # material_index_2 # corner_id1 # corner_id2 # ... # corner_id8
- read(IIN,*) dummy_elmnt, mat_ext_mesh(1,ispec),mat_ext_mesh(2,ispec), &
- elmnts_ext_mesh(1,ispec), elmnts_ext_mesh(2,ispec), elmnts_ext_mesh(3,ispec), elmnts_ext_mesh(4,ispec), &
- elmnts_ext_mesh(5,ispec), elmnts_ext_mesh(6,ispec), elmnts_ext_mesh(7,ispec), elmnts_ext_mesh(8,ispec)
-
- ! check debug
- if( dummy_elmnt /= ispec) stop "error ispec order in materials file"
-
- enddo
- NSPEC_AB = nelmnts_ext_mesh
-
- call sum_all_i(nspec_ab,num)
- if(myrank == 0) then
- write(IMAIN,*) ' spectral elements: ',num
- endif
- call sync_all()
-
-
-! read boundaries
- read(IIN,*) boundary_number ,nspec2D_xmin
- if(boundary_number /= 1) stop "Error : invalid database file"
- read(IIN,*) boundary_number ,nspec2D_xmax
- if(boundary_number /= 2) stop "Error : invalid database file"
- read(IIN,*) boundary_number ,nspec2D_ymin
- if(boundary_number /= 3) stop "Error : invalid database file"
- read(IIN,*) boundary_number ,nspec2D_ymax
- if(boundary_number /= 4) stop "Error : invalid database file"
- read(IIN,*) boundary_number ,nspec2D_bottom_ext
- if(boundary_number /= 5) stop "Error : invalid database file"
- read(IIN,*) boundary_number ,nspec2D_top_ext
- if(boundary_number /= 6) stop "Error : invalid database file"
-
- NSPEC2D_BOTTOM = nspec2D_bottom_ext
- NSPEC2D_TOP = nspec2D_top_ext
-
- allocate(ibelm_xmin(nspec2D_xmin),nodes_ibelm_xmin(4,nspec2D_xmin),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibelm_xmin etc.'
- do ispec2D = 1,nspec2D_xmin
- read(IIN,*) ibelm_xmin(ispec2D),(nodes_ibelm_xmin(j,ispec2D),j=1,4)
- end do
-
- allocate(ibelm_xmax(nspec2D_xmax),nodes_ibelm_xmax(4,nspec2D_xmax),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibelm_xmax etc.'
- do ispec2D = 1,nspec2D_xmax
- read(IIN,*) ibelm_xmax(ispec2D),(nodes_ibelm_xmax(j,ispec2D),j=1,4)
- end do
-
- allocate(ibelm_ymin(nspec2D_ymin),nodes_ibelm_ymin(4,nspec2D_ymin),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibelm_ymin'
- do ispec2D = 1,nspec2D_ymin
- read(IIN,*) ibelm_ymin(ispec2D),(nodes_ibelm_ymin(j,ispec2D),j=1,4)
- end do
-
- allocate(ibelm_ymax(nspec2D_ymax),nodes_ibelm_ymax(4,nspec2D_ymax),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibelm_ymax etc.'
- do ispec2D = 1,nspec2D_ymax
- read(IIN,*) ibelm_ymax(ispec2D),(nodes_ibelm_ymax(j,ispec2D),j=1,4)
- end do
-
- allocate(ibelm_bottom(nspec2D_bottom_ext),nodes_ibelm_bottom(4,nspec2D_bottom_ext),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibelm_bottom etc.'
- do ispec2D = 1,nspec2D_bottom_ext
- read(IIN,*) ibelm_bottom(ispec2D),(nodes_ibelm_bottom(j,ispec2D),j=1,4)
- end do
-
- allocate(ibelm_top(nspec2D_top_ext),nodes_ibelm_top(4,nspec2D_top_ext),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibelm_top etc.'
- do ispec2D = 1,nspec2D_top_ext
- read(IIN,*) ibelm_top(ispec2D),(nodes_ibelm_top(j,ispec2D),j=1,4)
- end do
-
- call sum_all_i(nspec2D_xmin,num_xmin)
- call sum_all_i(nspec2D_xmax,num_xmax)
- call sum_all_i(nspec2D_ymin,num_ymin)
- call sum_all_i(nspec2D_ymax,num_ymax)
- call sum_all_i(nspec2D_top_ext,num_top)
- call sum_all_i(nspec2D_bottom_ext,num_bottom)
-
- if(myrank == 0) then
- write(IMAIN,*) ' absorbing boundaries: '
- write(IMAIN,*) ' xmin,xmax: ',num_xmin,num_xmax
- write(IMAIN,*) ' ymin,ymax: ',num_ymin,num_ymax
- write(IMAIN,*) ' bottom,top: ',num_bottom,num_top
- endif
- call sync_all()
-
-! MPI interfaces between different partitions
- ! format: #number_of_MPI_interfaces #maximum_number_of_elements_on_each_interface
- read(IIN,*) num_interfaces_ext_mesh, max_interface_size_ext_mesh
-
- ! allocates interfaces
- allocate(my_neighbours_ext_mesh(num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array my_neighbours_ext_mesh'
- allocate(my_nelmnts_neighbours_ext_mesh(num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array my_nelmnts_neighbours_ext_mesh'
- allocate(my_interfaces_ext_mesh(6,max_interface_size_ext_mesh,num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array my_interfaces_ext_mesh'
- allocate(ibool_interfaces_ext_mesh(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibool_interfaces_ext_mesh'
- allocate(nibool_interfaces_ext_mesh(num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array nibool_interfaces_ext_mesh'
-
- ! loops over MPI interfaces with other partitions
- do num_interface = 1, num_interfaces_ext_mesh
- ! format: #process_interface_id #number_of_elements_on_interface
- ! where
- ! process_interface_id = rank of (neighbor) process to share MPI interface with
- ! number_of_elements_on_interface = number of interface elements
- read(IIN,*) my_neighbours_ext_mesh(num_interface), my_nelmnts_neighbours_ext_mesh(num_interface)
-
- ! loops over interface elements
- do ie = 1, my_nelmnts_neighbours_ext_mesh(num_interface)
- ! format: #(1)spectral_element_id #(2)interface_type #(3)node_id1 #(4)node_id2 #(5)...
- !
- ! interface types:
- ! 1 - corner point only
- ! 2 - element edge
- ! 4 - element face
- read(IIN,*) my_interfaces_ext_mesh(1,ie,num_interface), my_interfaces_ext_mesh(2,ie,num_interface), &
- my_interfaces_ext_mesh(3,ie,num_interface), my_interfaces_ext_mesh(4,ie,num_interface), &
- my_interfaces_ext_mesh(5,ie,num_interface), my_interfaces_ext_mesh(6,ie,num_interface)
- enddo
- enddo
-
- call sum_all_i(num_interfaces_ext_mesh,num)
- if(myrank == 0) then
- write(IMAIN,*) ' number of MPI partition interfaces: ',num
- endif
- call sync_all()
-
- ! optional moho
- if( SAVE_MOHO_MESH ) then
- ! checks if additional line exists
- read(IIN,'(a128)',iostat=ier) line
- if( ier /= 0 ) then
- ! no moho informations given
- nspec2D_moho_ext = 0
- boundary_number = 7
- else
- ! tries to read in number of moho elements
- read(line,*,iostat=ier) boundary_number ,nspec2D_moho_ext
- if( ier /= 0 ) call exit_mpi(myrank,'error reading moho mesh in database')
- endif
- if(boundary_number /= 7) stop "Error : invalid database file"
-
- ! checks total number of elements
- call sum_all_i(nspec2D_moho_ext,num_moho)
- if( num_moho == 0 ) call exit_mpi(myrank,'error no moho mesh in database')
-
- ! reads in element informations
- allocate(ibelm_moho(nspec2D_moho_ext),nodes_ibelm_moho(4,nspec2D_moho_ext),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibelm_moho etc.'
- do ispec2D = 1,nspec2D_moho_ext
- ! format: #element_id #node_id1 #node_id2 #node_id3 #node_id4
- read(IIN,*) ibelm_moho(ispec2D),(nodes_ibelm_moho(j,ispec2D),j=1,4)
- end do
-
- ! user output
- if(myrank == 0) then
- write(IMAIN,*) ' moho surfaces: ',num_moho
- endif
- call sync_all()
- endif
-
- close(IIN)
-
- end subroutine gd_read_partition_files
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine gd_setup_mesh
-
-! mesh creation for static solver
-
- use generate_databases_par
- implicit none
-
-! assign theoretical number of elements
- nspec = NSPEC_AB
-
-! compute maximum number of points
- npointot = nspec * NGLLCUBE
-
-! use dynamic allocation to allocate memory for arrays
-! allocate(idoubling(nspec))
- allocate(ibool(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibool'
- allocate(xstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
- if( ier /= 0 ) stop 'error allocating array xstore'
- allocate(ystore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ystore'
- allocate(zstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
- if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
-
- call memory_eval_mesher(myrank,nspec,npointot,nnodes_ext_mesh, &
- nelmnts_ext_mesh,nmat_ext_mesh,num_interfaces_ext_mesh, &
- max_interface_size_ext_mesh,nspec2D_xmin,nspec2D_xmax, &
- nspec2D_ymin,nspec2D_ymax,nspec2D_bottom,nspec2D_top, &
- max_static_memory_size_request)
-
- max_static_memory_size = max_static_memory_size_request
-
-! make sure everybody is synchronized
- call sync_all()
-
-! main working routine to create all the regions of the mesh
- if(myrank == 0) then
- write(IMAIN,*) 'create regions: '
- endif
- call create_regions_mesh_ext(ibool, &
- xstore, ystore, zstore, nspec, npointot, myrank, LOCAL_PATH, &
- nnodes_ext_mesh, nelmnts_ext_mesh, &
- nodes_coords_ext_mesh, elmnts_ext_mesh, &
- max_static_memory_size, mat_ext_mesh, materials_ext_mesh, &
- nmat_ext_mesh, undef_mat_prop, nundefMat_ext_mesh, &
- num_interfaces_ext_mesh, max_interface_size_ext_mesh, &
- my_neighbours_ext_mesh, my_nelmnts_neighbours_ext_mesh, &
- my_interfaces_ext_mesh, &
- ibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh, &
- nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, &
- NSPEC2D_BOTTOM, NSPEC2D_TOP,&
- ibelm_xmin, ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top, &
- nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax, &
- nodes_ibelm_bottom,nodes_ibelm_top, &
- SAVE_MESH_FILES,nglob, &
- ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
- ATTENUATION,USE_OLSEN_ATTENUATION, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
- ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
- itopo_bathy)
-
-! Moho boundary parameters, 2-D jacobians and normals
- if( SAVE_MOHO_MESH ) then
- call create_regions_mesh_save_moho(myrank,nglob,nspec, &
- nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
- nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
- endif
-
-! defines global number of nodes in model
- NGLOB_AB = nglob
-
-! print min and max of topography included
- min_elevation = HUGEVAL
- max_elevation = -HUGEVAL
- do iface = 1,nspec2D_top_ext
- do icorner = 1,NGNOD2D
- inode = nodes_ibelm_top(icorner,iface)
- if (nodes_coords_ext_mesh(3,inode) < min_elevation) then
- min_elevation = nodes_coords_ext_mesh(3,inode)
- end if
- if (nodes_coords_ext_mesh(3,inode) > max_elevation) then
- max_elevation = nodes_coords_ext_mesh(3,inode)
- end if
- end do
- end do
-
-! compute the maximum of the maxima for all the slices using an MPI reduction
- call min_all_dp(min_elevation,min_elevation_all)
- call max_all_dp(max_elevation,max_elevation_all)
-
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) 'min and max of topography included in mesh in m is ',min_elevation_all,' ',max_elevation_all
- write(IMAIN,*)
- endif
-
-! clean-up
- deallocate(xstore,ystore,zstore)
-
-! make sure everybody is synchronized
- call sync_all()
-
- end subroutine gd_setup_mesh
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine gd_finalize
-
-! checks user input parameters
-
- use generate_databases_par
- implicit none
-
- integer :: i
-
-! print number of points and elements in the mesh
- call sum_all_i(NGLOB_AB,nglob_total)
- call sum_all_i(NSPEC_AB,nspec_total)
- call sync_all()
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) 'Repartition of elements:'
- write(IMAIN,*) '-----------------------'
- write(IMAIN,*)
- write(IMAIN,*) 'total number of elements in each slice: ',NSPEC_AB
- write(IMAIN,*) 'total number of points in each slice: ',NGLOB_AB
- write(IMAIN,*)
- write(IMAIN,*) 'total number of elements in entire mesh: ',nspec_total ! NSPEC_AB*NPROC
- write(IMAIN,*) 'total number of points in entire mesh: ',nglob_total !NGLOB_AB*NPROC
- write(IMAIN,*) 'total number of DOFs in entire mesh: ',nglob_total*NDIM !NGLOB_AB*NPROC*NDIM
- write(IMAIN,*)
- write(IMAIN,*) 'total number of time steps in the solver will be: ',NSTEP
- write(IMAIN,*)
- ! write information about precision used for floating-point operations
- if(CUSTOM_REAL == SIZE_REAL) then
- write(IMAIN,*) 'using single precision for the calculations'
- else
- write(IMAIN,*) 'using double precision for the calculations'
- endif
- write(IMAIN,*)
- write(IMAIN,*) 'smallest and largest possible floating-point numbers are: ',tiny(1._CUSTOM_REAL),huge(1._CUSTOM_REAL)
- write(IMAIN,*)
- endif
-
-! gets number of surface elements (for movie outputs)
- allocate( ispec_is_surface_external_mesh(NSPEC_AB), &
- iglob_is_surface_external_mesh(NGLOB_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array'
- max_nibool_interfaces_ext_mesh = maxval(nibool_interfaces_ext_mesh)
- allocate(ibool_interfaces_ext_mesh_dummy(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array'
- do i = 1, num_interfaces_ext_mesh
- ibool_interfaces_ext_mesh_dummy(:,:) = ibool_interfaces_ext_mesh(1:max_nibool_interfaces_ext_mesh,:)
- enddo
- call sync_all()
- call detect_surface(NPROC,NGLOB_AB,NSPEC_AB,ibool, &
- ispec_is_surface_external_mesh, &
- iglob_is_surface_external_mesh, &
- nfaces_surface_ext_mesh, &
- num_interfaces_ext_mesh, &
- max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh, &
- my_neighbours_ext_mesh, &
- ibool_interfaces_ext_mesh_dummy )
-
- deallocate(ibool)
- deallocate(ispec_is_surface_external_mesh)
- deallocate(iglob_is_surface_external_mesh)
- deallocate(ibool_interfaces_ext_mesh_dummy)
-
- ! takes number of faces for top, free surface only
- if( MOVIE_SURFACE .or. CREATE_SHAKEMAP ) then
- nfaces_surface_ext_mesh = NSPEC2D_TOP
- endif
-
-! number of surface faces for all partitions together
- call sum_all_i(nfaces_surface_ext_mesh,nfaces_surface_glob_ext_mesh)
-
-
-! copy number of elements and points in an include file for the solver
- if( myrank == 0 ) then
- call save_header_file(NSPEC_AB,NGLOB_AB,NPROC, &
- ATTENUATION,ANISOTROPY,NSTEP,DT, &
- SIMULATION_TYPE,max_static_memory_size,nfaces_surface_glob_ext_mesh)
- endif
-
-! elapsed time since beginning of mesh generation
- if(myrank == 0) then
- tCPU = wtime() - time_start
- write(IMAIN,*)
- write(IMAIN,*) 'Elapsed time for mesh generation and buffer creation in seconds = ',tCPU
- write(IMAIN,*) 'End of mesh generation'
- write(IMAIN,*)
- endif
-
-! close main output file
- if(myrank == 0) then
- write(IMAIN,*) 'done'
- write(IMAIN,*)
- close(IMAIN)
- endif
-
-! synchronize all the processes to make sure everybody has finished
- call sync_all()
-
- end subroutine gd_finalize
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+!
+! United States and French Government Sponsorship Acknowledged.
+!
+!=============================================================================!
+! !
+! generate_databases produces a spectral element grid !
+! for a local or regional model. !
+! The mesher uses the UTM projection !
+! !
+!=============================================================================!
+!
+! If you use this code for your own research, please cite at least one article
+! written by the developers of the package, for instance:
+!
+! @ARTICLE{TrKoLi08,
+! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
+! title = {Spectral-Element and Adjoint Methods in Seismology},
+! journal = {Communications in Computational Physics},
+! year = {2008},
+! volume = {3},
+! pages = {1-32},
+! number = {1}}
+!
+! or
+!
+! @ARTICLE{LiPoKoTr04,
+! author = {Qinya Liu and Jascha Polet and Dimitri Komatitsch and Jeroen Tromp},
+! title = {Spectral-element moment tensor inversions for earthquakes in {S}outhern {C}alifornia},
+! journal={Bull. Seismol. Soc. Am.},
+! year = {2004},
+! volume = {94},
+! pages = {1748-1761},
+! number = {5},
+! doi = {10.1785/012004038}}
+!
+! @INCOLLECTION{ChKoViCaVaFe07,
+! author = {Emmanuel Chaljub and Dimitri Komatitsch and Jean-Pierre Vilotte and
+! Yann Capdeville and Bernard Valette and Gaetano Festa},
+! title = {Spectral Element Analysis in Seismology},
+! booktitle = {Advances in Wave Propagation in Heterogeneous Media},
+! publisher = {Elsevier - Academic Press},
+! year = {2007},
+! editor = {Ru-Shan Wu and Val\'erie Maupin},
+! volume = {48},
+! series = {Advances in Geophysics},
+! pages = {365-419}}
+!
+! @ARTICLE{KoVi98,
+! author={D. Komatitsch and J. P. Vilotte},
+! title={The spectral-element method: an efficient tool to simulate the seismic response of 2{D} and 3{D} geological structures},
+! journal={Bull. Seismol. Soc. Am.},
+! year=1998,
+! volume=88,
+! number=2,
+! pages={368-392}}
+!
+! @ARTICLE{KoTr99,
+! author={D. Komatitsch and J. Tromp},
+! year=1999,
+! title={Introduction to the spectral-element method for 3-{D} seismic wave propagation},
+! journal={Geophys. J. Int.},
+! volume=139,
+! number=3,
+! pages={806-822},
+! doi={10.1046/j.1365-246x.1999.00967.x}}
+!
+! @ARTICLE{KoLiTrSuStSh04,
+! author={Dimitri Komatitsch and Qinya Liu and Jeroen Tromp and Peter S\"{u}ss
+! and Christiane Stidham and John H. Shaw},
+! year=2004,
+! title={Simulations of Ground Motion in the {L}os {A}ngeles {B}asin
+! based upon the Spectral-Element Method},
+! journal={Bull. Seism. Soc. Am.},
+! volume=94,
+! number=1,
+! pages={187-206}}
+!
+! and/or another article from http://web.univ-pau.fr/~dkomati1/publications.html
+!
+!
+! If you use the kernel capabilities of the code, please cite at least one article
+! written by the developers of the package, for instance:
+!
+! @ARTICLE{TrKoLi08,
+! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
+! title = {Spectral-Element and Adjoint Methods in Seismology},
+! journal = {Communications in Computational Physics},
+! year = {2008},
+! volume = {3},
+! pages = {1-32},
+! number = {1}}
+!
+! or
+!
+! @ARTICLE{LiTr06,
+! author={Qinya Liu and Jeroen Tromp},
+! title={Finite-frequency kernels based on adjoint methods},
+! journal={Bull. Seismol. Soc. Am.},
+! year=2006,
+! volume=96,
+! number=6,
+! pages={2383-2397},
+! doi={10.1785/0120060041}}
+!
+!
+! Reference frame - convention:
+! ----------------------------
+!
+! The code uses the following convention for the reference frame:
+!
+! - X axis is East
+! - Y axis is North
+! - Z axis is up
+!
+! Note that this convention is different from both the Aki-Richards convention
+! and the Harvard CMT convention.
+!
+! Let us recall that the Aki-Richards convention is:
+!
+! - X axis is North
+! - Y axis is East
+! - Z axis is down
+!
+! and that the Harvard CMT convention is:
+!
+! - X axis is South
+! - Y axis is East
+! - Z axis is up
+!
+! To report bugs or suggest improvements to the code, please send an email
+! to Jeroen Tromp <jtromp AT princeton.edu> and/or use our online
+! bug tracking system at http://www.geodynamics.org/roundup .
+!
+! Evolution of the code:
+! ---------------------
+!
+! MPI v. 2.0 "Sesame" (Spectral ElementS on Any MEsh), November 2010:
+! Dimitri Komatitsch, Nicolas Le Goff, Roland Martin and Pieyre Le Loher, University of Pau, France,
+! Jeroen Tromp and the Princeton group of developers, Princeton University, USA,
+! and Emanuele Casarotti, INGV Roma, Italy:
+! support for CUBIT meshes decomposed by SCOTCH, METIS or ZOLTAN;
+! much faster solver using Michel Deville's inlined matrix products.
+!
+! MPI v. 1.4 Dimitri Komatitsch, University of Pau, Qinya Liu and others, Caltech, September 2006:
+! better adjoint and kernel calculations, faster and better I/Os
+! on very large systems, many small improvements and bug fixes
+!
+! MPI v. 1.3 Dimitri Komatitsch, University of Pau, and Qinya Liu, Caltech, July 2005:
+! serial version, regular mesh, adjoint and kernel calculations, ParaView support
+!
+! MPI v. 1.2 Min Chen and Dimitri Komatitsch, Caltech, July 2004:
+! full anisotropy, volume movie
+!
+! MPI v. 1.1 Dimitri Komatitsch, Caltech, October 2002: Zhu's Moho map, scaling
+! of Vs with depth, Hauksson's regional model, attenuation, oceans, movies
+!
+! MPI v. 1.0 Dimitri Komatitsch, Caltech, May 2002: first MPI version based on global code
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ module generate_databases_par
+
+ implicit none
+
+ include "constants.h"
+
+! number of spectral elements in each block
+ integer nspec,npointot
+
+! local to global indexing array
+ integer, dimension(:,:,:,:), allocatable :: ibool
+
+! arrays with the mesh in double precision
+ double precision, dimension(:,:,:,:), allocatable :: xstore,ystore,zstore
+
+! proc numbers for MPI
+ integer :: myrank,sizeprocs,ier
+
+! use integer array to store topography values
+ integer :: UTM_PROJECTION_ZONE
+ logical :: SUPPRESS_UTM_PROJECTION
+ integer :: NX_TOPO,NY_TOPO
+ double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
+ character(len=100) :: topo_file
+ integer, dimension(:,:), allocatable :: itopo_bathy
+
+! timer MPI
+ double precision, external :: wtime
+ double precision :: time_start,tCPU
+
+! parameters read from parameter file
+ integer :: NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,SIMULATION_TYPE
+ integer :: NSOURCES
+
+ double precision :: DT,HDUR_MOVIE
+
+ logical :: ATTENUATION,USE_OLSEN_ATTENUATION, &
+ OCEANS, TOPOGRAPHY, SAVE_FORWARD
+ logical :: ANISOTROPY,ABSORBING_CONDITIONS,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
+
+ logical :: MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+ USE_HIGHRES_FOR_MOVIES
+ integer :: NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO,NTSTEP_BETWEEN_READ_ADJSRC
+
+ character(len=256) OUTPUT_FILES,LOCAL_PATH
+
+! parameters deduced from parameters read from file
+ integer :: NPROC
+
+! static memory size that will be needed by the solver
+ double precision :: max_static_memory_size,max_static_memory_size_request
+
+! this for all the regions
+ integer NSPEC_AB,NGLOB_AB
+
+ integer NSPEC2D_BOTTOM,NSPEC2D_TOP
+
+ double precision min_elevation,max_elevation
+ double precision min_elevation_all,max_elevation_all
+
+! for Databases of external meshes
+ double precision, dimension(:,:), allocatable :: nodes_coords_ext_mesh
+
+ integer :: dummy_node
+ integer :: dummy_elmnt
+
+ integer :: ispec, inode, num_interface,ie,imat,iface,icorner
+ integer :: nnodes_ext_mesh, nelmnts_ext_mesh
+ integer :: num_interfaces_ext_mesh
+ integer :: max_interface_size_ext_mesh
+ integer :: nmat_ext_mesh, nundefMat_ext_mesh
+ integer, dimension(:), allocatable :: my_neighbours_ext_mesh
+ integer, dimension(:), allocatable :: my_nelmnts_neighbours_ext_mesh
+
+ integer, dimension(:,:,:), allocatable :: my_interfaces_ext_mesh
+ integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh
+ integer, dimension(:), allocatable :: nibool_interfaces_ext_mesh
+
+ integer, dimension(:,:), allocatable :: elmnts_ext_mesh
+ integer, dimension(:,:), allocatable :: mat_ext_mesh
+ integer :: max_nibool_interfaces_ext_mesh
+ integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh_dummy
+
+ character(len=256) prname
+
+! boundaries and materials
+ double precision, dimension(:,:), allocatable :: materials_ext_mesh
+
+ integer :: ispec2D, boundary_number
+ integer :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, nspec2D_bottom_ext, nspec2D_top_ext
+
+ integer, dimension(:), allocatable :: ibelm_xmin,ibelm_xmax, &
+ ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top
+ integer, dimension(:,:), allocatable :: nodes_ibelm_xmin,nodes_ibelm_xmax, &
+ nodes_ibelm_ymin, nodes_ibelm_ymax, nodes_ibelm_bottom, nodes_ibelm_top
+
+ character (len=30), dimension(:,:), allocatable :: undef_mat_prop
+
+! moho (optional)
+ integer :: nspec2D_moho_ext
+ integer, dimension(:), allocatable :: ibelm_moho
+ integer, dimension(:,:), allocatable :: nodes_ibelm_moho
+
+ integer :: nglob,nglob_total,nspec_total
+
+ logical,dimension(:),allocatable :: ispec_is_surface_external_mesh,iglob_is_surface_external_mesh
+ integer :: nfaces_surface_ext_mesh,nfaces_surface_glob_ext_mesh
+
+! flag for noise simulation
+ integer :: NOISE_TOMOGRAPHY
+
+ end module generate_databases_par
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine generate_databases
+
+ use generate_databases_par
+ implicit none
+
+! sizeprocs returns number of processes started (should be equal to NPROC).
+! myrank is the rank of each process, between 0 and NPROC-1.
+! as usual in MPI, process 0 is in charge of coordinating everything
+! and also takes care of the main output
+ call world_size(sizeprocs)
+ call world_rank(myrank)
+
+! get the base pathname for output files
+ call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
+
+! open main output file, only written to by process 0
+ if(myrank == 0 .and. IMAIN /= ISTANDARD_OUTPUT) &
+ open(unit=IMAIN,file=trim(OUTPUT_FILES)//'/output_mesher.txt',status='unknown')
+
+! get MPI starting time
+ time_start = wtime()
+
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) '******************************************'
+ write(IMAIN,*) '*** Specfem3D MPI Mesher - f90 version ***'
+ write(IMAIN,*) '******************************************'
+ write(IMAIN,*)
+ endif
+
+! read the parameter file
+ call gd_read_parameters()
+
+! makes sure processes are synchronized
+ call sync_all()
+
+! reads topography and bathymetry file
+ call gd_read_topography()
+
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) '**************************'
+ write(IMAIN,*) 'creating mesh in the model'
+ write(IMAIN,*) '**************************'
+ write(IMAIN,*)
+ endif
+
+! reads Databases files
+ call gd_read_partition_files()
+
+! external mesh creation
+ call gd_setup_mesh()
+
+! finalize mesher
+ call gd_finalize()
+
+ end subroutine generate_databases
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine gd_read_parameters
+
+! reads and checks user input parameters
+
+ use generate_databases_par
+ implicit none
+
+! reads Par_file
+ call read_parameter_file( NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
+ UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+ ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
+ OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
+ MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+ NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
+ SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION, &
+ NTSTEP_BETWEEN_OUTPUT_INFO,SIMULATION_TYPE,SAVE_FORWARD, &
+ NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
+
+! check that the code is running with the requested nb of processes
+ if(sizeprocs /= NPROC) then
+ if( myrank == 0 ) then
+ write(IMAIN,*) 'error: number of processors supposed to run on: ',NPROC
+ write(IMAIN,*) 'error: number of processors actually run on: ',sizeprocs
+ endif
+ call exit_MPI(myrank,'wrong number of MPI processes')
+ endif
+
+! there would be a problem with absorbing boundaries for different NGLLX,NGLLY,NGLLZ values
+! just to be sure for now..
+ if( ABSORBING_CONDITIONS ) then
+ if( NGLLX /= NGLLY .and. NGLLY /= NGLLZ ) &
+ call exit_MPI(myrank,'must have NGLLX = NGLLY = NGLLZ for external meshes')
+ endif
+
+! info about external mesh simulation
+ if(myrank == 0) then
+ write(IMAIN,*) 'This is process ',myrank
+ write(IMAIN,*) 'There are ',sizeprocs,' MPI processes'
+ write(IMAIN,*) 'Processes are numbered from 0 to ',sizeprocs-1
+ write(IMAIN,*)
+ write(IMAIN,*) 'There is a total of ',NPROC,' slices'
+ write(IMAIN,*)
+ write(IMAIN,*) 'NGLLX = ',NGLLX
+ write(IMAIN,*) 'NGLLY = ',NGLLY
+ write(IMAIN,*) 'NGLLZ = ',NGLLZ
+
+ write(IMAIN,*)
+ write(IMAIN,*) 'Shape functions defined by NGNOD = ',NGNOD,' control nodes'
+ write(IMAIN,*) 'Surface shape functions defined by NGNOD2D = ',NGNOD2D,' control nodes'
+ write(IMAIN,*)
+ endif
+
+! check that reals are either 4 or 8 bytes
+ if(CUSTOM_REAL /= SIZE_REAL .and. CUSTOM_REAL /= SIZE_DOUBLE) &
+ call exit_MPI(myrank,'wrong size of CUSTOM_REAL for reals')
+
+ if(NGNOD /= 8) call exit_MPI(myrank,'number of control nodes must be 8')
+ if(NGNOD2D /= 4) call exit_MPI(myrank,'elements with 8 points should have NGNOD2D = 4')
+
+! for the number of standard linear solids for attenuation
+ if(N_SLS /= 3) call exit_MPI(myrank,'number of SLS must be 3')
+
+ ! exclusive movie flags
+ if( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP ) then
+ MOVIE_SURFACE = .false.
+ CREATE_SHAKEMAP = .false.
+ endif
+
+ ! for noise simulations, we need to save movies at the surface (where the noise is generated)
+ ! and thus we force MOVIE_SURFACE to be .true., in order to use variables defined for surface movies later
+ if ( NOISE_TOMOGRAPHY /= 0 ) then
+ MOVIE_SURFACE = .true.
+ CREATE_SHAKEMAP = .false.
+ if( ( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP ) .and. myrank == 0 ) then
+ write(IMAIN,*) 'error: when running noise simulations ( NOISE_TOMOGRAPHY /= 0 ),'
+ write(IMAIN,*) ' we can NOT use EXTERNAL_MESH_MOVIE_SURFACE or EXTERNAL_MESH_CREATE_SHAKEMAP'
+ write(IMAIN,*) ' change EXTERNAL_MESH_MOVIE_SURFACE & EXTERNAL_MESH_CREATE_SHAKEMAP in constant.h'
+ call exit_MPI(myrank,'incompatible NOISE_TOMOGRAPHY, EXTERNAL_MESH_MOVIE_SURFACE, EXTERNAL_MESH_CREATE_SHAKEMAP')
+ endif
+ endif
+
+
+ if(myrank == 0) then
+! chris: I am not sure if we should suppress the following. topography should appear in the external mesh
+! leave it for now
+
+ write(IMAIN,*)
+ if(SUPPRESS_UTM_PROJECTION) then
+ write(IMAIN,*) 'suppressing UTM projection'
+ else
+ write(IMAIN,*) 'using UTM projection in region ',UTM_PROJECTION_ZONE
+ endif
+
+ write(IMAIN,*)
+ if(ATTENUATION) then
+ write(IMAIN,*) 'incorporating attenuation using ',N_SLS,' standard linear solids'
+ if(USE_OLSEN_ATTENUATION) then
+ write(IMAIN,*) 'using Olsen''s attenuation'
+ else
+ write(IMAIN,*) 'not using Olsen''s attenuation'
+ endif
+ else
+ write(IMAIN,*) 'no attenuation'
+ endif
+
+ write(IMAIN,*)
+ if(ANISOTROPY) then
+ write(IMAIN,*) 'incorporating anisotropy'
+ else
+ write(IMAIN,*) 'no anisotropy'
+ endif
+
+ write(IMAIN,*)
+ if(OCEANS) then
+ write(IMAIN,*) 'incorporating the oceans using equivalent load'
+ if( TOPOGRAPHY ) write(IMAIN,*) ' with elevation from topography file'
+ else
+ write(IMAIN,*) 'no oceans'
+ endif
+
+ write(IMAIN,*)
+
+ endif
+
+ end subroutine gd_read_parameters
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine gd_read_topography
+
+! reads in topography files
+
+ use generate_databases_par
+ implicit none
+
+ if( OCEANS .and. TOPOGRAPHY ) then
+
+ ! for Southern California
+ NX_TOPO = NX_TOPO_SOCAL
+ NY_TOPO = NY_TOPO_SOCAL
+ ORIG_LAT_TOPO = ORIG_LAT_TOPO_SOCAL
+ ORIG_LONG_TOPO = ORIG_LONG_TOPO_SOCAL
+ DEGREES_PER_CELL_TOPO = DEGREES_PER_CELL_TOPO_SOCAL
+ topo_file = TOPO_FILE_SOCAL
+
+ allocate(itopo_bathy(NX_TOPO,NY_TOPO),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array itopo_bathy'
+
+ call read_topo_bathy_file(itopo_bathy,NX_TOPO,NY_TOPO,topo_file)
+
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'regional topography file read ranges in m from ',minval(itopo_bathy),' to ',maxval(itopo_bathy)
+ write(IMAIN,*)
+ endif
+ else
+ NX_TOPO = 1
+ NY_TOPO = 1
+ allocate(itopo_bathy(NX_TOPO,NY_TOPO),stat=ier)
+ if( ier /= 0 ) stop 'error allocating dummy array itopo_bathy'
+
+ endif
+
+!! read basement map
+! if(BASEMENT_MAP) then
+! call get_value_string(BASEMENT_MAP_FILE,'model.BASEMENT_MAP_FILE', &
+! '../in_data_files/la_basement/reggridbase2_filtered_ascii.dat')
+! open(unit=55,file=BASEMENT_MAP_FILE,status='old',action='read')
+! do ix=1,NX_BASEMENT
+! do iy=1,NY_BASEMENT
+! read(55,*) iz_basement
+! z_basement(ix,iy) = dble(iz_basement)
+! enddo
+! enddo
+! close(55)
+! endif
+
+ end subroutine gd_read_topography
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine gd_read_partition_files
+
+! reads in proc***_Databases files
+
+ use generate_databases_par
+ implicit none
+
+ integer :: num_xmin,num_xmax,num_ymin,num_ymax,num_top,num_bottom,num
+ integer :: num_moho
+ integer :: j
+ character(len=128) :: line
+
+! read databases about external mesh simulation
+! global node coordinates
+ call create_name_database(prname,myrank,LOCAL_PATH)
+ !open(unit=IIN,file=prname(1:len_trim(prname))//'Database', &
+ ! status='old',action='read',form='formatted',iostat=ier)
+ open(unit=IIN,file=prname(1:len_trim(prname))//'Database', &
+ status='old',action='read',form='unformatted',iostat=ier)
+ if( ier /= 0 ) then
+ write(IMAIN,*) 'error opening file: ',prname(1:len_trim(prname))//'Database'
+ write(IMAIN,*) 'make sure file exists'
+ call exit_mpi(myrank,'error opening database file')
+ endif
+ !read(IIN,*) nnodes_ext_mesh
+ read(IIN) nnodes_ext_mesh
+
+ allocate(nodes_coords_ext_mesh(NDIM,nnodes_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array nodes_coords_ext_mesh'
+
+ do inode = 1, nnodes_ext_mesh
+ !read(IIN,*) dummy_node, nodes_coords_ext_mesh(1,inode), nodes_coords_ext_mesh(2,inode), &
+ ! nodes_coords_ext_mesh(3,inode)
+
+ read(IIN) dummy_node, nodes_coords_ext_mesh(1,inode), nodes_coords_ext_mesh(2,inode), &
+ nodes_coords_ext_mesh(3,inode)
+
+ enddo
+
+ call sum_all_i(nnodes_ext_mesh,num)
+ if(myrank == 0) then
+ write(IMAIN,*) ' external mesh points: ',num
+ endif
+ call sync_all()
+
+! read materials' physical properties
+ !read(IIN,*) nmat_ext_mesh, nundefMat_ext_mesh
+ read(IIN) nmat_ext_mesh, nundefMat_ext_mesh
+
+ allocate(materials_ext_mesh(6,nmat_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array materials_ext_mesh'
+ do imat = 1, nmat_ext_mesh
+ ! format: #(1) rho #(2) vp #(3) vs #(4) Q_flag #(5) anisotropy_flag #(6) material_domain_id
+ !read(IIN,*) materials_ext_mesh(1,imat), materials_ext_mesh(2,imat), materials_ext_mesh(3,imat), &
+ ! materials_ext_mesh(4,imat), materials_ext_mesh(5,imat), materials_ext_mesh(6,imat)
+
+ read(IIN) materials_ext_mesh(1,imat), materials_ext_mesh(2,imat), materials_ext_mesh(3,imat), &
+ materials_ext_mesh(4,imat), materials_ext_mesh(5,imat), materials_ext_mesh(6,imat)
+
+ ! output
+ !print*,'materials:',materials_ext_mesh(1,imat), materials_ext_mesh(2,imat), materials_ext_mesh(3,imat), &
+ ! materials_ext_mesh(4,imat), materials_ext_mesh(5,imat), materials_ext_mesh(6,imat)
+ end do
+
+ if(myrank == 0) then
+ write(IMAIN,*) ' defined materials: ',nmat_ext_mesh
+ endif
+ call sync_all()
+
+ allocate(undef_mat_prop(6,nundefMat_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array undef_mat_prop'
+ do imat = 1, nundefMat_ext_mesh
+ ! format example tomography:
+ ! -1 tomography elastic tomography_model.xyz 1 2
+ ! format example interface:
+ ! -1 interface 14 15 1 2
+ !read(IIN,*) undef_mat_prop(1,imat),undef_mat_prop(2,imat),undef_mat_prop(3,imat),undef_mat_prop(4,imat), &
+ ! undef_mat_prop(5,imat), undef_mat_prop(6,imat)
+
+ read(IIN) undef_mat_prop(1,imat),undef_mat_prop(2,imat),undef_mat_prop(3,imat),undef_mat_prop(4,imat), &
+ undef_mat_prop(5,imat), undef_mat_prop(6,imat)
+
+ ! output debug
+ !print*,'undefined materials:'
+ !print*,undef_mat_prop(:,imat)
+ end do
+
+ if(myrank == 0) then
+ write(IMAIN,*) ' undefined materials: ',nundefMat_ext_mesh
+ endif
+ call sync_all()
+
+! element indexing
+ !read(IIN,*) nelmnts_ext_mesh
+ read(IIN) nelmnts_ext_mesh
+ allocate(elmnts_ext_mesh(esize,nelmnts_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array elmnts_ext_mesh'
+ allocate(mat_ext_mesh(2,nelmnts_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array mat_ext_mesh'
+
+ ! reads in material association for each spectral element and corner node indices
+ do ispec = 1, nelmnts_ext_mesh
+ ! format:
+ ! # ispec_local # material_index_1 # material_index_2 # corner_id1 # corner_id2 # ... # corner_id8
+ !read(IIN,*) dummy_elmnt, mat_ext_mesh(1,ispec),mat_ext_mesh(2,ispec), &
+ ! elmnts_ext_mesh(1,ispec), elmnts_ext_mesh(2,ispec), elmnts_ext_mesh(3,ispec), elmnts_ext_mesh(4,ispec), &
+ ! elmnts_ext_mesh(5,ispec), elmnts_ext_mesh(6,ispec), elmnts_ext_mesh(7,ispec), elmnts_ext_mesh(8,ispec)
+
+ read(IIN) dummy_elmnt, mat_ext_mesh(1,ispec),mat_ext_mesh(2,ispec), &
+ elmnts_ext_mesh(1,ispec), elmnts_ext_mesh(2,ispec), elmnts_ext_mesh(3,ispec), elmnts_ext_mesh(4,ispec), &
+ elmnts_ext_mesh(5,ispec), elmnts_ext_mesh(6,ispec), elmnts_ext_mesh(7,ispec), elmnts_ext_mesh(8,ispec)
+
+ ! check debug
+ if( dummy_elmnt /= ispec) stop "error ispec order in materials file"
+
+ enddo
+ NSPEC_AB = nelmnts_ext_mesh
+
+ call sum_all_i(nspec_ab,num)
+ if(myrank == 0) then
+ write(IMAIN,*) ' spectral elements: ',num
+ endif
+ call sync_all()
+
+
+! read boundaries
+ !read(IIN,*) boundary_number ,nspec2D_xmin
+ read(IIN) boundary_number ,nspec2D_xmin
+ if(boundary_number /= 1) stop "Error : invalid database file"
+
+ !read(IIN,*) boundary_number ,nspec2D_xmax
+ read(IIN) boundary_number ,nspec2D_xmax
+ if(boundary_number /= 2) stop "Error : invalid database file"
+
+ !read(IIN,*) boundary_number ,nspec2D_ymin
+ read(IIN) boundary_number ,nspec2D_ymin
+ if(boundary_number /= 3) stop "Error : invalid database file"
+
+ !read(IIN,*) boundary_number ,nspec2D_ymax
+ read(IIN) boundary_number ,nspec2D_ymax
+ if(boundary_number /= 4) stop "Error : invalid database file"
+
+ !read(IIN,*) boundary_number ,nspec2D_bottom_ext
+ read(IIN) boundary_number ,nspec2D_bottom_ext
+ if(boundary_number /= 5) stop "Error : invalid database file"
+
+ !read(IIN,*) boundary_number ,nspec2D_top_ext
+ read(IIN) boundary_number ,nspec2D_top_ext
+ if(boundary_number /= 6) stop "Error : invalid database file"
+
+ NSPEC2D_BOTTOM = nspec2D_bottom_ext
+ NSPEC2D_TOP = nspec2D_top_ext
+
+ allocate(ibelm_xmin(nspec2D_xmin),nodes_ibelm_xmin(4,nspec2D_xmin),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibelm_xmin etc.'
+ do ispec2D = 1,nspec2D_xmin
+ !read(IIN,*) ibelm_xmin(ispec2D),(nodes_ibelm_xmin(j,ispec2D),j=1,4)
+ read(IIN) ibelm_xmin(ispec2D),(nodes_ibelm_xmin(j,ispec2D),j=1,4)
+ end do
+
+ allocate(ibelm_xmax(nspec2D_xmax),nodes_ibelm_xmax(4,nspec2D_xmax),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibelm_xmax etc.'
+ do ispec2D = 1,nspec2D_xmax
+ !read(IIN,*) ibelm_xmax(ispec2D),(nodes_ibelm_xmax(j,ispec2D),j=1,4)
+ read(IIN) ibelm_xmax(ispec2D),(nodes_ibelm_xmax(j,ispec2D),j=1,4)
+ end do
+
+ allocate(ibelm_ymin(nspec2D_ymin),nodes_ibelm_ymin(4,nspec2D_ymin),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibelm_ymin'
+ do ispec2D = 1,nspec2D_ymin
+ !read(IIN,*) ibelm_ymin(ispec2D),(nodes_ibelm_ymin(j,ispec2D),j=1,4)
+ read(IIN) ibelm_ymin(ispec2D),(nodes_ibelm_ymin(j,ispec2D),j=1,4)
+ end do
+
+ allocate(ibelm_ymax(nspec2D_ymax),nodes_ibelm_ymax(4,nspec2D_ymax),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibelm_ymax etc.'
+ do ispec2D = 1,nspec2D_ymax
+ !read(IIN,*) ibelm_ymax(ispec2D),(nodes_ibelm_ymax(j,ispec2D),j=1,4)
+ read(IIN) ibelm_ymax(ispec2D),(nodes_ibelm_ymax(j,ispec2D),j=1,4)
+ end do
+
+ allocate(ibelm_bottom(nspec2D_bottom_ext),nodes_ibelm_bottom(4,nspec2D_bottom_ext),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibelm_bottom etc.'
+ do ispec2D = 1,nspec2D_bottom_ext
+ !read(IIN,*) ibelm_bottom(ispec2D),(nodes_ibelm_bottom(j,ispec2D),j=1,4)
+ read(IIN) ibelm_bottom(ispec2D),(nodes_ibelm_bottom(j,ispec2D),j=1,4)
+ end do
+
+ allocate(ibelm_top(nspec2D_top_ext),nodes_ibelm_top(4,nspec2D_top_ext),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibelm_top etc.'
+ do ispec2D = 1,nspec2D_top_ext
+ !read(IIN,*) ibelm_top(ispec2D),(nodes_ibelm_top(j,ispec2D),j=1,4)
+ read(IIN) ibelm_top(ispec2D),(nodes_ibelm_top(j,ispec2D),j=1,4)
+ end do
+
+ call sum_all_i(nspec2D_xmin,num_xmin)
+ call sum_all_i(nspec2D_xmax,num_xmax)
+ call sum_all_i(nspec2D_ymin,num_ymin)
+ call sum_all_i(nspec2D_ymax,num_ymax)
+ call sum_all_i(nspec2D_top_ext,num_top)
+ call sum_all_i(nspec2D_bottom_ext,num_bottom)
+
+ if(myrank == 0) then
+ write(IMAIN,*) ' absorbing boundaries: '
+ write(IMAIN,*) ' xmin,xmax: ',num_xmin,num_xmax
+ write(IMAIN,*) ' ymin,ymax: ',num_ymin,num_ymax
+ write(IMAIN,*) ' bottom,top: ',num_bottom,num_top
+ endif
+ call sync_all()
+
+! MPI interfaces between different partitions
+ if( NPROC > 1 ) then
+ ! format: #number_of_MPI_interfaces #maximum_number_of_elements_on_each_interface
+ !read(IIN,*) num_interfaces_ext_mesh, max_interface_size_ext_mesh
+ read(IIN) num_interfaces_ext_mesh, max_interface_size_ext_mesh
+ else
+ num_interfaces_ext_mesh = 0
+ max_interface_size_ext_mesh = 0
+ endif
+
+ ! allocates interfaces
+ allocate(my_neighbours_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array my_neighbours_ext_mesh'
+ allocate(my_nelmnts_neighbours_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array my_nelmnts_neighbours_ext_mesh'
+ allocate(my_interfaces_ext_mesh(6,max_interface_size_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array my_interfaces_ext_mesh'
+ allocate(ibool_interfaces_ext_mesh(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibool_interfaces_ext_mesh'
+ allocate(nibool_interfaces_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array nibool_interfaces_ext_mesh'
+
+ ! loops over MPI interfaces with other partitions
+ do num_interface = 1, num_interfaces_ext_mesh
+ ! format: #process_interface_id #number_of_elements_on_interface
+ ! where
+ ! process_interface_id = rank of (neighbor) process to share MPI interface with
+ ! number_of_elements_on_interface = number of interface elements
+ !read(IIN,*) my_neighbours_ext_mesh(num_interface), my_nelmnts_neighbours_ext_mesh(num_interface)
+ read(IIN) my_neighbours_ext_mesh(num_interface), my_nelmnts_neighbours_ext_mesh(num_interface)
+
+ ! loops over interface elements
+ do ie = 1, my_nelmnts_neighbours_ext_mesh(num_interface)
+ ! format: #(1)spectral_element_id #(2)interface_type #(3)node_id1 #(4)node_id2 #(5)...
+ !
+ ! interface types:
+ ! 1 - corner point only
+ ! 2 - element edge
+ ! 4 - element face
+ !read(IIN,*) my_interfaces_ext_mesh(1,ie,num_interface), my_interfaces_ext_mesh(2,ie,num_interface), &
+ ! my_interfaces_ext_mesh(3,ie,num_interface), my_interfaces_ext_mesh(4,ie,num_interface), &
+ ! my_interfaces_ext_mesh(5,ie,num_interface), my_interfaces_ext_mesh(6,ie,num_interface)
+
+ read(IIN) my_interfaces_ext_mesh(1,ie,num_interface), my_interfaces_ext_mesh(2,ie,num_interface), &
+ my_interfaces_ext_mesh(3,ie,num_interface), my_interfaces_ext_mesh(4,ie,num_interface), &
+ my_interfaces_ext_mesh(5,ie,num_interface), my_interfaces_ext_mesh(6,ie,num_interface)
+ enddo
+ enddo
+
+ call sum_all_i(num_interfaces_ext_mesh,num)
+ if(myrank == 0) then
+ write(IMAIN,*) ' number of MPI partition interfaces: ',num
+ endif
+ call sync_all()
+
+ ! optional moho
+ if( SAVE_MOHO_MESH ) then
+ ! checks if additional line exists
+ !read(IIN,'(a128)',iostat=ier) line
+ read(IIN,iostat=ier) boundary_number,nspec2D_moho_ext
+ if( ier /= 0 ) then
+ ! no moho informations given
+ nspec2D_moho_ext = 0
+ boundary_number = 7
+ else
+ ! tries to read in number of moho elements
+ read(line,*,iostat=ier) boundary_number ,nspec2D_moho_ext
+ if( ier /= 0 ) call exit_mpi(myrank,'error reading moho mesh in database')
+ endif
+ if(boundary_number /= 7) stop "Error : invalid database file"
+
+ ! checks total number of elements
+ call sum_all_i(nspec2D_moho_ext,num_moho)
+ if( num_moho == 0 ) call exit_mpi(myrank,'error no moho mesh in database')
+
+ ! reads in element informations
+ allocate(ibelm_moho(nspec2D_moho_ext),nodes_ibelm_moho(4,nspec2D_moho_ext),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibelm_moho etc.'
+ do ispec2D = 1,nspec2D_moho_ext
+ ! format: #element_id #node_id1 #node_id2 #node_id3 #node_id4
+ !read(IIN,*) ibelm_moho(ispec2D),(nodes_ibelm_moho(j,ispec2D),j=1,4)
+ read(IIN) ibelm_moho(ispec2D),(nodes_ibelm_moho(j,ispec2D),j=1,4)
+ end do
+
+ ! user output
+ if(myrank == 0) then
+ write(IMAIN,*) ' moho surfaces: ',num_moho
+ endif
+ call sync_all()
+ endif
+
+ close(IIN)
+
+ end subroutine gd_read_partition_files
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine gd_setup_mesh
+
+! mesh creation for static solver
+
+ use generate_databases_par
+ implicit none
+
+! assign theoretical number of elements
+ nspec = NSPEC_AB
+
+! compute maximum number of points
+ npointot = nspec * NGLLCUBE
+
+! use dynamic allocation to allocate memory for arrays
+! allocate(idoubling(nspec))
+ allocate(ibool(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibool'
+ allocate(xstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array xstore'
+ allocate(ystore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ystore'
+ allocate(zstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if(ier /= 0) call exit_MPI(myrank,'not enough memory to allocate arrays')
+
+ call memory_eval_mesher(myrank,nspec,npointot,nnodes_ext_mesh, &
+ nelmnts_ext_mesh,nmat_ext_mesh,num_interfaces_ext_mesh, &
+ max_interface_size_ext_mesh,nspec2D_xmin,nspec2D_xmax, &
+ nspec2D_ymin,nspec2D_ymax,nspec2D_bottom,nspec2D_top, &
+ max_static_memory_size_request)
+
+ max_static_memory_size = max_static_memory_size_request
+
+! make sure everybody is synchronized
+ call sync_all()
+
+! main working routine to create all the regions of the mesh
+ if(myrank == 0) then
+ write(IMAIN,*) 'create regions: '
+ endif
+ call create_regions_mesh_ext(ibool, &
+ xstore, ystore, zstore, nspec, npointot, myrank, LOCAL_PATH, &
+ nnodes_ext_mesh, nelmnts_ext_mesh, &
+ nodes_coords_ext_mesh, elmnts_ext_mesh, &
+ max_static_memory_size, mat_ext_mesh, materials_ext_mesh, &
+ nmat_ext_mesh, undef_mat_prop, nundefMat_ext_mesh, &
+ num_interfaces_ext_mesh, max_interface_size_ext_mesh, &
+ my_neighbours_ext_mesh, my_nelmnts_neighbours_ext_mesh, &
+ my_interfaces_ext_mesh, &
+ ibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh, &
+ nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, &
+ NSPEC2D_BOTTOM, NSPEC2D_TOP,&
+ ibelm_xmin, ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top, &
+ nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax, &
+ nodes_ibelm_bottom,nodes_ibelm_top, &
+ SAVE_MESH_FILES,nglob, &
+ ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
+ ATTENUATION,USE_OLSEN_ATTENUATION, &
+ UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
+ ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
+ itopo_bathy)
+
+! Moho boundary parameters, 2-D jacobians and normals
+ if( SAVE_MOHO_MESH ) then
+ call create_regions_mesh_save_moho(myrank,nglob,nspec, &
+ nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
+ nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
+ endif
+
+! defines global number of nodes in model
+ NGLOB_AB = nglob
+
+! print min and max of topography included
+ min_elevation = HUGEVAL
+ max_elevation = -HUGEVAL
+ do iface = 1,nspec2D_top_ext
+ do icorner = 1,NGNOD2D
+ inode = nodes_ibelm_top(icorner,iface)
+ if (nodes_coords_ext_mesh(3,inode) < min_elevation) then
+ min_elevation = nodes_coords_ext_mesh(3,inode)
+ end if
+ if (nodes_coords_ext_mesh(3,inode) > max_elevation) then
+ max_elevation = nodes_coords_ext_mesh(3,inode)
+ end if
+ end do
+ end do
+
+! compute the maximum of the maxima for all the slices using an MPI reduction
+ call min_all_dp(min_elevation,min_elevation_all)
+ call max_all_dp(max_elevation,max_elevation_all)
+
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'min and max of topography included in mesh in m is ',min_elevation_all,' ',max_elevation_all
+ write(IMAIN,*)
+ endif
+
+! clean-up
+ deallocate(xstore,ystore,zstore)
+
+! make sure everybody is synchronized
+ call sync_all()
+
+ end subroutine gd_setup_mesh
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine gd_finalize
+
+! checks user input parameters
+
+ use generate_databases_par
+ implicit none
+
+ integer :: i
+
+! print number of points and elements in the mesh
+ call sum_all_i(NGLOB_AB,nglob_total)
+ call sum_all_i(NSPEC_AB,nspec_total)
+ call sync_all()
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'Repartition of elements:'
+ write(IMAIN,*) '-----------------------'
+ write(IMAIN,*)
+ write(IMAIN,*) 'total number of elements in each slice: ',NSPEC_AB
+ write(IMAIN,*) 'total number of points in each slice: ',NGLOB_AB
+ write(IMAIN,*)
+ write(IMAIN,*) 'total number of elements in entire mesh: ',nspec_total ! NSPEC_AB*NPROC
+ write(IMAIN,*) 'total number of points in entire mesh: ',nglob_total !NGLOB_AB*NPROC
+ write(IMAIN,*) 'total number of DOFs in entire mesh: ',nglob_total*NDIM !NGLOB_AB*NPROC*NDIM
+ write(IMAIN,*)
+ write(IMAIN,*) 'total number of time steps in the solver will be: ',NSTEP
+ write(IMAIN,*)
+ ! write information about precision used for floating-point operations
+ if(CUSTOM_REAL == SIZE_REAL) then
+ write(IMAIN,*) 'using single precision for the calculations'
+ else
+ write(IMAIN,*) 'using double precision for the calculations'
+ endif
+ write(IMAIN,*)
+ write(IMAIN,*) 'smallest and largest possible floating-point numbers are: ',tiny(1._CUSTOM_REAL),huge(1._CUSTOM_REAL)
+ write(IMAIN,*)
+ endif
+
+! gets number of surface elements (for movie outputs)
+ allocate( ispec_is_surface_external_mesh(NSPEC_AB), &
+ iglob_is_surface_external_mesh(NGLOB_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array'
+ max_nibool_interfaces_ext_mesh = maxval(nibool_interfaces_ext_mesh)
+ allocate(ibool_interfaces_ext_mesh_dummy(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array'
+ do i = 1, num_interfaces_ext_mesh
+ ibool_interfaces_ext_mesh_dummy(:,:) = ibool_interfaces_ext_mesh(1:max_nibool_interfaces_ext_mesh,:)
+ enddo
+ call sync_all()
+ call detect_surface(NPROC,NGLOB_AB,NSPEC_AB,ibool, &
+ ispec_is_surface_external_mesh, &
+ iglob_is_surface_external_mesh, &
+ nfaces_surface_ext_mesh, &
+ num_interfaces_ext_mesh, &
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh, &
+ ibool_interfaces_ext_mesh_dummy )
+
+ deallocate(ibool)
+ deallocate(ispec_is_surface_external_mesh)
+ deallocate(iglob_is_surface_external_mesh)
+ deallocate(ibool_interfaces_ext_mesh_dummy)
+
+ ! takes number of faces for top, free surface only
+ if( MOVIE_SURFACE .or. CREATE_SHAKEMAP ) then
+ nfaces_surface_ext_mesh = NSPEC2D_TOP
+ endif
+
+! number of surface faces for all partitions together
+ call sum_all_i(nfaces_surface_ext_mesh,nfaces_surface_glob_ext_mesh)
+
+
+! copy number of elements and points in an include file for the solver
+ if( myrank == 0 ) then
+ call save_header_file(NSPEC_AB,NGLOB_AB,NPROC, &
+ ATTENUATION,ANISOTROPY,NSTEP,DT, &
+ SIMULATION_TYPE,max_static_memory_size,nfaces_surface_glob_ext_mesh)
+ endif
+
+! elapsed time since beginning of mesh generation
+ if(myrank == 0) then
+ tCPU = wtime() - time_start
+ write(IMAIN,*)
+ write(IMAIN,*) 'Elapsed time for mesh generation and buffer creation in seconds = ',tCPU
+ write(IMAIN,*) 'End of mesh generation'
+ write(IMAIN,*)
+ endif
+
+! close main output file
+ if(myrank == 0) then
+ write(IMAIN,*) 'done'
+ write(IMAIN,*)
+ close(IMAIN)
+ endif
+
+! synchronize all the processes to make sure everybody has finished
+ call sync_all()
+
+ end subroutine gd_finalize
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_global.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_global.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_global.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,295 +1,295 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine get_global(nspec,xp,yp,zp,iglob,loc,ifseg,nglob,npointot,UTM_X_MIN,UTM_X_MAX)
-
-! this routine MUST be in double precision to avoid sensitivity
-! to roundoff errors in the coordinates of the points
-
-! non-structured global numbering software provided by Paul F. Fischer
-
-! leave the sorting subroutines in the same source file to allow for inlining
-
- implicit none
-
- include "constants.h"
-
-
- integer npointot
- integer nspec,nglob
- integer iglob(npointot),loc(npointot)
- logical ifseg(npointot)
- double precision xp(npointot),yp(npointot),zp(npointot)
- double precision UTM_X_MIN,UTM_X_MAX
-
- integer ispec,i,j,ier
- integer ieoff,ilocnum,nseg,ioff,iseg,ig
-
- integer, dimension(:), allocatable :: ind,ninseg,iwork
- double precision, dimension(:), allocatable :: work
-
-! geometry tolerance parameter to calculate number of independent grid points
-! small value for double precision and to avoid sensitivity to roundoff
- double precision SMALLVALTOL
-
-! define geometrical tolerance based upon typical size of the model
- SMALLVALTOL = 1.d-10 * dabs(UTM_X_MAX - UTM_X_MIN)
-
-! dynamically allocate arrays
- allocate(ind(npointot), &
- ninseg(npointot), &
- iwork(npointot), &
- work(npointot),stat=ier)
- if( ier /= 0 ) stop 'error allocating arrays'
-
-! establish initial pointers
- do ispec=1,nspec
- ieoff=NGLLCUBE*(ispec-1)
- do ilocnum=1,NGLLCUBE
- loc(ilocnum+ieoff)=ilocnum+ieoff
- enddo
- enddo
-
- ifseg(:)=.false.
-
- nseg=1
- ifseg(1)=.true.
- ninseg(1)=npointot
-
- do j=1,NDIM
-
-! sort within each segment
- ioff=1
- do iseg=1,nseg
- if(j == 1) then
- call rank(xp(ioff),ind,ninseg(iseg))
- else if(j == 2) then
- call rank(yp(ioff),ind,ninseg(iseg))
- else
- call rank(zp(ioff),ind,ninseg(iseg))
- endif
- call swap_all(loc(ioff),xp(ioff),yp(ioff),zp(ioff),iwork,work,ind,ninseg(iseg))
- ioff=ioff+ninseg(iseg)
- enddo
-
-! check for jumps in current coordinate
-! compare the coordinates of the points within a small tolerance
- if(j == 1) then
- do i=2,npointot
- if(dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
- enddo
- else if(j == 2) then
- do i=2,npointot
- if(dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
- enddo
- else
- do i=2,npointot
- if(dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
- enddo
- endif
-
-! count up number of different segments
- nseg=0
- do i=1,npointot
- if(ifseg(i)) then
- nseg=nseg+1
- ninseg(nseg)=1
- else
- ninseg(nseg)=ninseg(nseg)+1
- endif
- enddo
- enddo
-
-! assign global node numbers (now sorted lexicographically)
- ig=0
- do i=1,npointot
- if(ifseg(i)) ig=ig+1
- iglob(loc(i))=ig
- enddo
-
- nglob=ig
-
-! deallocate arrays
- deallocate(ind)
- deallocate(ninseg)
- deallocate(iwork)
- deallocate(work)
-
- end subroutine get_global
-
-! -----------------------------------
-
-! sorting routines put in same file to allow for inlining
-
- subroutine rank(A,IND,N)
-!
-! Use Heap Sort (Numerical Recipes)
-!
- implicit none
-
- integer n
- double precision A(n)
- integer IND(n)
-
- integer i,j,l,ir,indx
- double precision q
-
- do j=1,n
- IND(j)=j
- enddo
-
- if (n == 1) return
-
- L=n/2+1
- ir=n
- 100 CONTINUE
- IF (l>1) THEN
- l=l-1
- indx=ind(l)
- q=a(indx)
- ELSE
- indx=ind(ir)
- q=a(indx)
- ind(ir)=ind(1)
- ir=ir-1
- if (ir == 1) then
- ind(1)=indx
- return
- endif
- ENDIF
- i=l
- j=l+l
- 200 CONTINUE
- IF (J <= IR) THEN
- IF (J<IR) THEN
- IF ( A(IND(j))<A(IND(j+1)) ) j=j+1
- ENDIF
- IF (q<A(IND(j))) THEN
- IND(I)=IND(J)
- I=J
- J=J+J
- ELSE
- J=IR+1
- ENDIF
- goto 200
- ENDIF
- IND(I)=INDX
- goto 100
-
- end subroutine rank
-
-! ------------------------------------------------------------------
-
- subroutine swap_all(IA,A,B,C,IW,W,ind,n)
-!
-! swap arrays IA, A, B and C according to addressing in array IND
-!
- implicit none
-
- integer n
-
- integer IND(n)
- integer IA(n),IW(n)
- double precision A(n),B(n),C(n),W(n)
-
- integer i
-
- IW(:) = IA(:)
- W(:) = A(:)
-
- do i=1,n
- IA(i)=IW(ind(i))
- A(i)=W(ind(i))
- enddo
-
- W(:) = B(:)
-
- do i=1,n
- B(i)=W(ind(i))
- enddo
-
- W(:) = C(:)
-
- do i=1,n
- C(i)=W(ind(i))
- enddo
-
-end subroutine swap_all
-
-! ------------------------------------------------------------------
-
-
- subroutine get_global_indirect_addressing(nspec,nglob,ibool)
-
-!
-!- we can create a new indirect addressing to reduce cache misses
-! (put into this subroutine but compiler keeps on complaining that it can't vectorize loops...)
-
- implicit none
-
- include "constants.h"
-
- integer :: nspec,nglob
- integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-
-! mask to sort ibool
- integer, dimension(:), allocatable :: mask_ibool
- integer, dimension(:,:,:,:), allocatable :: copy_ibool_ori
- integer :: inumber
- integer:: i,j,k,ispec,ier
-
-! copies original array
- allocate(copy_ibool_ori(NGLLX,NGLLY,NGLLZ,nspec),stat=ier); if(ier /= 0) stop 'error in allocate'
- allocate(mask_ibool(nglob),stat=ier); if(ier /= 0) stop 'error in allocate'
-
- mask_ibool(:) = -1
- copy_ibool_ori(:,:,:,:) = ibool(:,:,:,:)
-
-! reduces misses
- inumber = 0
- do ispec=1,nspec
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- if(mask_ibool(copy_ibool_ori(i,j,k,ispec)) == -1) then
-! create a new point
- inumber = inumber + 1
- ibool(i,j,k,ispec) = inumber
- mask_ibool(copy_ibool_ori(i,j,k,ispec)) = inumber
- else
-! use an existing point created previously
- ibool(i,j,k,ispec) = mask_ibool(copy_ibool_ori(i,j,k,ispec))
- endif
- enddo
- enddo
- enddo
- enddo
-
-! cleanup
- deallocate(copy_ibool_ori,stat=ier); if(ier /= 0) stop 'error in deallocate'
- deallocate(mask_ibool,stat=ier); if(ier /= 0) stop 'error in deallocate'
-
-end subroutine get_global_indirect_addressing
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine get_global(nspec,xp,yp,zp,iglob,loc,ifseg,nglob,npointot,UTM_X_MIN,UTM_X_MAX)
+
+! this routine MUST be in double precision to avoid sensitivity
+! to roundoff errors in the coordinates of the points
+
+! non-structured global numbering software provided by Paul F. Fischer
+
+! leave the sorting subroutines in the same source file to allow for inlining
+
+ implicit none
+
+ include "constants.h"
+
+
+ integer npointot
+ integer nspec,nglob
+ integer iglob(npointot),loc(npointot)
+ logical ifseg(npointot)
+ double precision xp(npointot),yp(npointot),zp(npointot)
+ double precision UTM_X_MIN,UTM_X_MAX
+
+ integer ispec,i,j,ier
+ integer ieoff,ilocnum,nseg,ioff,iseg,ig
+
+ integer, dimension(:), allocatable :: ind,ninseg,iwork
+ double precision, dimension(:), allocatable :: work
+
+! geometry tolerance parameter to calculate number of independent grid points
+! small value for double precision and to avoid sensitivity to roundoff
+ double precision SMALLVALTOL
+
+! define geometrical tolerance based upon typical size of the model
+ SMALLVALTOL = 1.d-10 * dabs(UTM_X_MAX - UTM_X_MIN)
+
+! dynamically allocate arrays
+ allocate(ind(npointot), &
+ ninseg(npointot), &
+ iwork(npointot), &
+ work(npointot),stat=ier)
+ if( ier /= 0 ) stop 'error allocating arrays'
+
+! establish initial pointers
+ do ispec=1,nspec
+ ieoff=NGLLCUBE*(ispec-1)
+ do ilocnum=1,NGLLCUBE
+ loc(ilocnum+ieoff)=ilocnum+ieoff
+ enddo
+ enddo
+
+ ifseg(:)=.false.
+
+ nseg=1
+ ifseg(1)=.true.
+ ninseg(1)=npointot
+
+ do j=1,NDIM
+
+! sort within each segment
+ ioff=1
+ do iseg=1,nseg
+ if(j == 1) then
+ call rank(xp(ioff),ind,ninseg(iseg))
+ else if(j == 2) then
+ call rank(yp(ioff),ind,ninseg(iseg))
+ else
+ call rank(zp(ioff),ind,ninseg(iseg))
+ endif
+ call swap_all(loc(ioff),xp(ioff),yp(ioff),zp(ioff),iwork,work,ind,ninseg(iseg))
+ ioff=ioff+ninseg(iseg)
+ enddo
+
+! check for jumps in current coordinate
+! compare the coordinates of the points within a small tolerance
+ if(j == 1) then
+ do i=2,npointot
+ if(dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+ enddo
+ else if(j == 2) then
+ do i=2,npointot
+ if(dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+ enddo
+ else
+ do i=2,npointot
+ if(dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+ enddo
+ endif
+
+! count up number of different segments
+ nseg=0
+ do i=1,npointot
+ if(ifseg(i)) then
+ nseg=nseg+1
+ ninseg(nseg)=1
+ else
+ ninseg(nseg)=ninseg(nseg)+1
+ endif
+ enddo
+ enddo
+
+! assign global node numbers (now sorted lexicographically)
+ ig=0
+ do i=1,npointot
+ if(ifseg(i)) ig=ig+1
+ iglob(loc(i))=ig
+ enddo
+
+ nglob=ig
+
+! deallocate arrays
+ deallocate(ind)
+ deallocate(ninseg)
+ deallocate(iwork)
+ deallocate(work)
+
+ end subroutine get_global
+
+! -----------------------------------
+
+! sorting routines put in same file to allow for inlining
+
+ subroutine rank(A,IND,N)
+!
+! Use Heap Sort (Numerical Recipes)
+!
+ implicit none
+
+ integer n
+ double precision A(n)
+ integer IND(n)
+
+ integer i,j,l,ir,indx
+ double precision q
+
+ do j=1,n
+ IND(j)=j
+ enddo
+
+ if (n == 1) return
+
+ L=n/2+1
+ ir=n
+ 100 CONTINUE
+ IF (l>1) THEN
+ l=l-1
+ indx=ind(l)
+ q=a(indx)
+ ELSE
+ indx=ind(ir)
+ q=a(indx)
+ ind(ir)=ind(1)
+ ir=ir-1
+ if (ir == 1) then
+ ind(1)=indx
+ return
+ endif
+ ENDIF
+ i=l
+ j=l+l
+ 200 CONTINUE
+ IF (J <= IR) THEN
+ IF (J<IR) THEN
+ IF ( A(IND(j))<A(IND(j+1)) ) j=j+1
+ ENDIF
+ IF (q<A(IND(j))) THEN
+ IND(I)=IND(J)
+ I=J
+ J=J+J
+ ELSE
+ J=IR+1
+ ENDIF
+ goto 200
+ ENDIF
+ IND(I)=INDX
+ goto 100
+
+ end subroutine rank
+
+! ------------------------------------------------------------------
+
+ subroutine swap_all(IA,A,B,C,IW,W,ind,n)
+!
+! swap arrays IA, A, B and C according to addressing in array IND
+!
+ implicit none
+
+ integer n
+
+ integer IND(n)
+ integer IA(n),IW(n)
+ double precision A(n),B(n),C(n),W(n)
+
+ integer i
+
+ IW(:) = IA(:)
+ W(:) = A(:)
+
+ do i=1,n
+ IA(i)=IW(ind(i))
+ A(i)=W(ind(i))
+ enddo
+
+ W(:) = B(:)
+
+ do i=1,n
+ B(i)=W(ind(i))
+ enddo
+
+ W(:) = C(:)
+
+ do i=1,n
+ C(i)=W(ind(i))
+ enddo
+
+ end subroutine swap_all
+
+! ------------------------------------------------------------------
+
+
+ subroutine get_global_indirect_addressing(nspec,nglob,ibool)
+
+!
+!- we can create a new indirect addressing to reduce cache misses
+! (put into this subroutine but compiler keeps on complaining that it can't vectorize loops...)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: nspec,nglob
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+! mask to sort ibool
+ integer, dimension(:), allocatable :: mask_ibool
+ integer, dimension(:,:,:,:), allocatable :: copy_ibool_ori
+ integer :: inumber
+ integer:: i,j,k,ispec,ier
+
+! copies original array
+ allocate(copy_ibool_ori(NGLLX,NGLLY,NGLLZ,nspec),stat=ier); if(ier /= 0) stop 'error in allocate'
+ allocate(mask_ibool(nglob),stat=ier); if(ier /= 0) stop 'error in allocate'
+
+ mask_ibool(:) = -1
+ copy_ibool_ori(:,:,:,:) = ibool(:,:,:,:)
+
+! reduces misses
+ inumber = 0
+ do ispec=1,nspec
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ if(mask_ibool(copy_ibool_ori(i,j,k,ispec)) == -1) then
+! create a new point
+ inumber = inumber + 1
+ ibool(i,j,k,ispec) = inumber
+ mask_ibool(copy_ibool_ori(i,j,k,ispec)) = inumber
+ else
+! use an existing point created previously
+ ibool(i,j,k,ispec) = mask_ibool(copy_ibool_ori(i,j,k,ispec))
+ endif
+ enddo
+ enddo
+ enddo
+ enddo
+
+! cleanup
+ deallocate(copy_ibool_ori,stat=ier); if(ier /= 0) stop 'error in deallocate'
+ deallocate(mask_ibool,stat=ier); if(ier /= 0) stop 'error in deallocate'
+
+ end subroutine get_global_indirect_addressing
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -69,8 +69,6 @@
! use acoustic domains for simulation
logical,parameter :: USE_PURE_ACOUSTIC_MOD = .false.
-
-
! initializes element domain flags
ispec_is_acoustic(:) = .false.
ispec_is_elastic(:) = .false.
@@ -387,7 +385,7 @@
! anisotropy
logical :: ANISOTROPY
-
+
! local parameters
real(kind=CUSTOM_REAL) :: vp,vs,rho,qmu_atten
real(kind=CUSTOM_REAL) :: c11,c12,c13,c14,c15,c16,c22,c23,c24,c25, &
@@ -492,7 +490,7 @@
idomain_id = materials_ext_mesh(6,iflag)
else if ( mat_ext_mesh(2,ispec) == 2 ) then
-
+
imaterial_PB = abs(imaterial_id)
! material definition undefined, uses definition from tomography model
! GLL point location
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/memory_eval.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/memory_eval.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/memory_eval.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -152,10 +152,10 @@
! compute the approximate amount of static memory needed to run the mesher
subroutine memory_eval_mesher(myrank,nspec,npointot,nnodes_ext_mesh, &
- nelmnts_ext_mesh,nmat_ext_mesh,num_interfaces_ext_mesh, &
- max_interface_size_ext_mesh,nspec2D_xmin,nspec2D_xmax, &
- nspec2D_ymin,nspec2D_ymax,nspec2D_bottom,nspec2D_top, &
- static_memory_size_request)
+ nelmnts_ext_mesh,nmat_ext_mesh,num_interfaces_ext_mesh, &
+ max_interface_size_ext_mesh,nspec2D_xmin,nspec2D_xmax, &
+ nspec2D_ymin,nspec2D_ymax,nspec2D_bottom,nspec2D_top, &
+ static_memory_size_request)
implicit none
@@ -166,7 +166,7 @@
max_interface_size_ext_mesh,nspec2D_xmin,nspec2D_xmax, &
nspec2D_ymin,nspec2D_ymax,nspec2D_bottom,nspec2D_top
- integer :: static_memory_size_request
+ double precision :: static_memory_size_request
integer :: static_memory_size
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_external_values.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_external_values.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_external_values.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -63,7 +63,7 @@
include "constants.h"
! standard include of the MPI library
- include 'mpif.h'
+ !include 'mpif.h'
integer :: myrank
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_tomography.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_tomography.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/model_tomography.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -167,7 +167,7 @@
subroutine PREM_routine(xloc,yloc,zloc,ro_prem,vp_prem,vs_prem,idom)
-double precision, intent(in) :: xloc,yloc,zloc
+double precision, intent(in) :: xloc,yloc,zloc
integer, intent(in) :: idom
double precision :: r0,r,x_prem
!double precision :: ro_prem,vp_prem,vs_prem
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,441 +1,455 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-
-! for external mesh
-
- subroutine save_arrays_solver_ext_mesh(nspec,nglob, &
- xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, &
- gammaxstore,gammaystore,gammazstore, &
- jacobianstore, rho_vp,rho_vs,qmu_attenuation_store, &
- rhostore,kappastore,mustore, &
- rmass,rmass_acoustic,rmass_solid_poroelastic,rmass_fluid_poroelastic, &
- OCEANS,rmass_ocean_load,NGLOB_OCEAN,&
- ibool, &
- xstore_dummy,ystore_dummy,zstore_dummy, &
- abs_boundary_normal,abs_boundary_jacobian2Dw, &
- abs_boundary_ijk,abs_boundary_ispec, &
- num_abs_boundary_faces, &
- free_surface_normal,free_surface_jacobian2Dw, &
- free_surface_ijk,free_surface_ispec, &
- num_free_surface_faces, &
- coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
- coupling_ac_el_ijk,coupling_ac_el_ispec, &
- num_coupling_ac_el_faces, &
- num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
- max_interface_size_ext_mesh,ibool_interfaces_ext_mesh, &
- prname,SAVE_MESH_FILES, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store, &
- c22store,c23store,c24store,c25store,c26store,c33store, &
- c34store,c35store,c36store,c44store,c45store,c46store, &
- c55store,c56store,c66store, &
- ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
-
- implicit none
-
- include "constants.h"
-
- integer :: nspec,nglob
-
-! jacobian
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xixstore,xiystore,xizstore, &
- etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore,jacobianstore
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rho_vp,rho_vs
-
-! attenuation
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: qmu_attenuation_store
-
-! material
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rhostore,kappastore,mustore
- real(kind=CUSTOM_REAL), dimension(nglob) :: rmass,rmass_acoustic, &
- rmass_solid_poroelastic,rmass_fluid_poroelastic
-! ocean load
- logical :: OCEANS
- integer :: NGLOB_OCEAN
- real(kind=CUSTOM_REAL),dimension(NGLOB_OCEAN) :: rmass_ocean_load
-
-! mesh coordinates
- integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
- real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
-
-! absorbing boundary surface
- integer :: num_abs_boundary_faces
- real(kind=CUSTOM_REAL) :: abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces)
- real(kind=CUSTOM_REAL) :: abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces)
- integer :: abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces)
- integer :: abs_boundary_ispec(num_abs_boundary_faces)
-
-! free surface
- integer :: num_free_surface_faces
- real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
- real(kind=CUSTOM_REAL) :: free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces)
- integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
- integer :: free_surface_ispec(num_free_surface_faces)
-
-! acoustic-elastic coupling surface
- integer :: num_coupling_ac_el_faces
- real(kind=CUSTOM_REAL) :: coupling_ac_el_normal(NDIM,NGLLSQUARE,num_coupling_ac_el_faces)
- real(kind=CUSTOM_REAL) :: coupling_ac_el_jacobian2Dw(NGLLSQUARE,num_coupling_ac_el_faces)
- integer :: coupling_ac_el_ijk(3,NGLLSQUARE,num_coupling_ac_el_faces)
- integer :: coupling_ac_el_ispec(num_coupling_ac_el_faces)
-
-! MPI interfaces
- integer :: num_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
- integer :: max_interface_size_ext_mesh
- integer, dimension(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
-
-! file name
- character(len=256) prname
- logical :: SAVE_MESH_FILES
-
-! anisotropy
- logical :: ANISOTROPY
- integer :: NSPEC_ANISO
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
- c11store,c12store,c13store,c14store,c15store,c16store, &
- c22store,c23store,c24store,c25store,c26store,c33store, &
- c34store,c35store,c36store,c44store,c45store,c46store, &
- c55store,c56store,c66store
-
-! material domain flags
- logical, dimension(nspec) :: ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic
-
-! local parameters
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: v_tmp
- integer,dimension(:),allocatable :: v_tmp_i
-
- !real(kind=CUSTOM_REAL) :: minimum(1)
- integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh_dummy
- integer :: ier,i
- logical :: ACOUSTIC_SIMULATION,ELASTIC_SIMULATION,POROELASTIC_SIMULATION
- character(len=256) :: filename
-
- integer, dimension(:), allocatable :: iglob_tmp
- integer :: j,inum
-
-! saves mesh file proc***_external_mesh.bin
- filename = prname(1:len_trim(prname))//'external_mesh.bin'
- open(unit=IOUT,file=trim(filename),status='unknown',action='write',form='unformatted',iostat=ier)
- if( ier /= 0 ) stop 'error opening database proc######_external_mesh.bin'
-
- write(IOUT) nspec
- write(IOUT) nglob
-
- write(IOUT) ibool
-
- write(IOUT) xstore_dummy
- write(IOUT) ystore_dummy
- write(IOUT) zstore_dummy
-
- write(IOUT) xixstore
- write(IOUT) xiystore
- write(IOUT) xizstore
- write(IOUT) etaxstore
- write(IOUT) etaystore
- write(IOUT) etazstore
- write(IOUT) gammaxstore
- write(IOUT) gammaystore
- write(IOUT) gammazstore
- write(IOUT) jacobianstore
-
- write(IOUT) kappastore
- write(IOUT) mustore
-
- write(IOUT) ispec_is_acoustic
- write(IOUT) ispec_is_elastic
- write(IOUT) ispec_is_poroelastic
-
-! acoustic
-! all processes will have acoustic_simulation set if any flag is .true. somewhere
- call any_all_l( ANY(ispec_is_acoustic), ACOUSTIC_SIMULATION )
- if( ACOUSTIC_SIMULATION ) then
- write(IOUT) rmass_acoustic
- write(IOUT) rhostore
- endif
-
-! elastic
- call any_all_l( ANY(ispec_is_elastic), ELASTIC_SIMULATION )
- if( ELASTIC_SIMULATION ) then
- write(IOUT) rmass
- if( OCEANS) then
- write(IOUT) rmass_ocean_load
- endif
- !pll Stacey
- write(IOUT) rho_vp
- write(IOUT) rho_vs
-
- endif
-
-! poroelastic
- call any_all_l( ANY(ispec_is_poroelastic), POROELASTIC_SIMULATION )
- if( POROELASTIC_SIMULATION ) then
- write(IOUT) rmass_solid_poroelastic
- write(IOUT) rmass_fluid_poroelastic
- endif
-
-! absorbing boundary surface
- write(IOUT) num_abs_boundary_faces
- write(IOUT) abs_boundary_ispec
- write(IOUT) abs_boundary_ijk
- write(IOUT) abs_boundary_jacobian2Dw
- write(IOUT) abs_boundary_normal
-
-! free surface
- write(IOUT) num_free_surface_faces
- write(IOUT) free_surface_ispec
- write(IOUT) free_surface_ijk
- write(IOUT) free_surface_jacobian2Dw
- write(IOUT) free_surface_normal
-
-! acoustic-elastic coupling surface
- write(IOUT) num_coupling_ac_el_faces
- write(IOUT) coupling_ac_el_ispec
- write(IOUT) coupling_ac_el_ijk
- write(IOUT) coupling_ac_el_jacobian2Dw
- write(IOUT) coupling_ac_el_normal
-
-!MPI interfaces
- write(IOUT) num_interfaces_ext_mesh
- write(IOUT) maxval(nibool_interfaces_ext_mesh(:))
- write(IOUT) my_neighbours_ext_mesh
- write(IOUT) nibool_interfaces_ext_mesh
-
- allocate(ibool_interfaces_ext_mesh_dummy(maxval(nibool_interfaces_ext_mesh(:)),num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array'
-
- do i = 1, num_interfaces_ext_mesh
- ibool_interfaces_ext_mesh_dummy(:,i) = ibool_interfaces_ext_mesh(1:maxval(nibool_interfaces_ext_mesh(:)),i)
- enddo
- write(IOUT) ibool_interfaces_ext_mesh_dummy
-
-! anisotropy
- if( ANISOTROPY ) then
- write(IOUT) c11store
- write(IOUT) c12store
- write(IOUT) c13store
- write(IOUT) c14store
- write(IOUT) c15store
- write(IOUT) c16store
- write(IOUT) c22store
- write(IOUT) c23store
- write(IOUT) c24store
- write(IOUT) c25store
- write(IOUT) c26store
- write(IOUT) c33store
- write(IOUT) c34store
- write(IOUT) c35store
- write(IOUT) c36store
- write(IOUT) c44store
- write(IOUT) c45store
- write(IOUT) c46store
- write(IOUT) c55store
- write(IOUT) c56store
- write(IOUT) c66store
- endif
-
- close(IOUT)
-
-
-! stores arrays in binary files
- if( SAVE_MESH_FILES ) then
-
- ! mesh arrays used for example in combine_vol_data.f90
- !--- x coordinate
- open(unit=27,file=prname(1:len_trim(prname))//'x.bin',status='unknown',form='unformatted',iostat=ier)
- if( ier /= 0 ) stop 'error opening file x.bin'
- write(27) xstore_dummy
- close(27)
-
- !--- y coordinate
- open(unit=27,file=prname(1:len_trim(prname))//'y.bin',status='unknown',form='unformatted',iostat=ier)
- if( ier /= 0 ) stop 'error opening file y.bin'
- write(27) ystore_dummy
- close(27)
-
- !--- z coordinate
- open(unit=27,file=prname(1:len_trim(prname))//'z.bin',status='unknown',form='unformatted',iostat=ier)
- if( ier /= 0 ) stop 'error opening file z.bin'
- write(27) zstore_dummy
- close(27)
-
- ! ibool
- open(unit=27,file=prname(1:len_trim(prname))//'ibool.bin',status='unknown',form='unformatted',iostat=ier)
- if( ier /= 0 ) stop 'error opening file ibool.bin'
- write(27) ibool
- close(27)
-
- allocate( v_tmp(NGLLX,NGLLY,NGLLZ,nspec), stat=ier); if( ier /= 0 ) stop 'error allocating array '
-
- ! vp (for checking the mesh and model)
- !minimum = minval( abs(rho_vp) )
- !if( minimum(1) /= 0.0 ) then
- ! v_tmp = (FOUR_THIRDS * mustore + kappastore) / rho_vp
- !else
- ! v_tmp = 0.0
- !endif
- v_tmp = 0.0
- where( rho_vp /= 0._CUSTOM_REAL ) v_tmp = (FOUR_THIRDS * mustore + kappastore) / rho_vp
- open(unit=27,file=prname(1:len_trim(prname))//'vp.bin',status='unknown',form='unformatted',iostat=ier)
- if( ier /= 0 ) stop 'error opening file vp.bin'
- write(27) v_tmp
- close(27)
-
- ! VTK file output
- ! vp values
- filename = prname(1:len_trim(prname))//'vp'
- call write_VTK_data_gll_cr(nspec,nglob, &
- xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
- v_tmp,filename)
-
-
- ! vs (for checking the mesh and model)
- !minimum = minval( abs(rho_vs) )
- !if( minimum(1) /= 0.0 ) then
- ! v_tmp = mustore / rho_vs
- !else
- ! v_tmp = 0.0
- !endif
- v_tmp = 0.0
- where( rho_vs /= 0._CUSTOM_REAL ) v_tmp = mustore / rho_vs
- open(unit=27,file=prname(1:len_trim(prname))//'vs.bin',status='unknown',form='unformatted',iostat=ier)
- if( ier /= 0 ) stop 'error opening file vs.bin'
- write(27) v_tmp
- close(27)
-
- ! VTK file output
- ! vs values
- filename = prname(1:len_trim(prname))//'vs'
- call write_VTK_data_gll_cr(nspec,nglob, &
- xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
- v_tmp,filename)
-
- ! outputs density model for check
- v_tmp = 0.0
- where( rho_vp /= 0._CUSTOM_REAL ) v_tmp = rho_vp**2 / (FOUR_THIRDS * mustore + kappastore)
- open(unit=27,file=prname(1:len_trim(prname))//'rho.bin',status='unknown',form='unformatted',iostat=ier)
- if( ier /= 0 ) stop 'error opening file rho.bin'
- write(27) v_tmp
- close(27)
-
- ! VTK file output
- ! saves attenuation flag assigned on each gll point into a vtk file
- filename = prname(1:len_trim(prname))//'attenuation'
- call write_VTK_data_gll_cr(nspec,nglob, &
- xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
- qmu_attenuation_store,filename)
-
- ! VTK file output
- ! acoustic-elastic domains
- if( ACOUSTIC_SIMULATION .and. ELASTIC_SIMULATION ) then
- ! saves points on acoustic-elastic coupling interface
- allocate( iglob_tmp(NGLLSQUARE*num_coupling_ac_el_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array iglob_tmp'
- inum = 0
- iglob_tmp(:) = 0
- do i=1,num_coupling_ac_el_faces
- do j=1,NGLLSQUARE
- inum = inum+1
- iglob_tmp(inum) = ibool(coupling_ac_el_ijk(1,j,i), &
- coupling_ac_el_ijk(2,j,i), &
- coupling_ac_el_ijk(3,j,i), &
- coupling_ac_el_ispec(i) )
- enddo
- enddo
- filename = prname(1:len_trim(prname))//'coupling_acoustic_elastic'
- call write_VTK_data_points(nglob, &
- xstore_dummy,ystore_dummy,zstore_dummy, &
- iglob_tmp,NGLLSQUARE*num_coupling_ac_el_faces, &
- filename)
-
- ! saves acoustic/elastic flag
- allocate(v_tmp_i(nspec),stat=ier)
- if( ier /= 0 ) stop 'error allocating array v_tmp_i'
- do i=1,nspec
- if( ispec_is_acoustic(i) ) then
- v_tmp_i(i) = 1
- else if( ispec_is_elastic(i) ) then
- v_tmp_i(i) = 2
- else
- v_tmp_i(i) = 0
- endif
- enddo
- filename = prname(1:len_trim(prname))//'acoustic_elastic_flag'
- call write_VTK_data_elem_i(nspec,nglob, &
- xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
- v_tmp_i,filename)
-
- deallocate(iglob_tmp,v_tmp_i)
- endif
-
- ! saves free surface points
- if( num_free_surface_faces > 0 ) then
- ! saves free surface interface points
- allocate( iglob_tmp(NGLLSQUARE*num_free_surface_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array iglob_tmp'
- inum = 0
- iglob_tmp(:) = 0
- do i=1,num_free_surface_faces
- do j=1,NGLLSQUARE
- inum = inum+1
- iglob_tmp(inum) = ibool(free_surface_ijk(1,j,i), &
- free_surface_ijk(2,j,i), &
- free_surface_ijk(3,j,i), &
- free_surface_ispec(i) )
- enddo
- enddo
- filename = prname(1:len_trim(prname))//'free_surface'
- call write_VTK_data_points(nglob, &
- xstore_dummy,ystore_dummy,zstore_dummy, &
- iglob_tmp,NGLLSQUARE*num_free_surface_faces, &
- filename)
-
- deallocate(iglob_tmp)
- endif
-
-
- !! saves 1. MPI interface
- ! if( num_interfaces_ext_mesh >= 1 ) then
- ! filename = prname(1:len_trim(prname))//'MPI_1_points'
- ! call write_VTK_data_points(nglob, &
- ! xstore_dummy,ystore_dummy,zstore_dummy, &
- ! ibool_interfaces_ext_mesh_dummy(1:nibool_interfaces_ext_mesh(1),1), &
- ! nibool_interfaces_ext_mesh(1), &
- ! filename)
- ! endif
- !
-
- deallocate(v_tmp)
-
- endif ! SAVE_MESH_FILES
-
-! cleanup
- deallocate(ibool_interfaces_ext_mesh_dummy,stat=ier); if( ier /= 0 ) stop 'error deallocating array'
-
-
- end subroutine save_arrays_solver_ext_mesh
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+! for external mesh
+
+ subroutine save_arrays_solver_ext_mesh(nspec,nglob, &
+ xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, &
+ gammaxstore,gammaystore,gammazstore, &
+ jacobianstore, rho_vp,rho_vs,qmu_attenuation_store, &
+ rhostore,kappastore,mustore, &
+ rmass,rmass_acoustic,rmass_solid_poroelastic,rmass_fluid_poroelastic, &
+ OCEANS,rmass_ocean_load,NGLOB_OCEAN,&
+ ibool, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ abs_boundary_normal,abs_boundary_jacobian2Dw, &
+ abs_boundary_ijk,abs_boundary_ispec, &
+ num_abs_boundary_faces, &
+ free_surface_normal,free_surface_jacobian2Dw, &
+ free_surface_ijk,free_surface_ispec, &
+ num_free_surface_faces, &
+ coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
+ coupling_ac_el_ijk,coupling_ac_el_ispec, &
+ num_coupling_ac_el_faces, &
+ num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
+ max_interface_size_ext_mesh,ibool_interfaces_ext_mesh, &
+ prname,SAVE_MESH_FILES, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store, &
+ c22store,c23store,c24store,c25store,c26store,c33store, &
+ c34store,c35store,c36store,c44store,c45store,c46store, &
+ c55store,c56store,c66store, &
+ ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: nspec,nglob
+
+! jacobian
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xixstore,xiystore,xizstore, &
+ etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore,jacobianstore
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rho_vp,rho_vs
+
+! attenuation
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: qmu_attenuation_store
+
+! material
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rhostore,kappastore,mustore
+ real(kind=CUSTOM_REAL), dimension(nglob) :: rmass,rmass_acoustic, &
+ rmass_solid_poroelastic,rmass_fluid_poroelastic
+! ocean load
+ logical :: OCEANS
+ integer :: NGLOB_OCEAN
+ real(kind=CUSTOM_REAL),dimension(NGLOB_OCEAN) :: rmass_ocean_load
+
+! mesh coordinates
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+ real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+! absorbing boundary surface
+ integer :: num_abs_boundary_faces
+ real(kind=CUSTOM_REAL) :: abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces)
+ real(kind=CUSTOM_REAL) :: abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces)
+ integer :: abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces)
+ integer :: abs_boundary_ispec(num_abs_boundary_faces)
+
+! free surface
+ integer :: num_free_surface_faces
+ real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
+ real(kind=CUSTOM_REAL) :: free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces)
+ integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
+ integer :: free_surface_ispec(num_free_surface_faces)
+
+! acoustic-elastic coupling surface
+ integer :: num_coupling_ac_el_faces
+ real(kind=CUSTOM_REAL) :: coupling_ac_el_normal(NDIM,NGLLSQUARE,num_coupling_ac_el_faces)
+ real(kind=CUSTOM_REAL) :: coupling_ac_el_jacobian2Dw(NGLLSQUARE,num_coupling_ac_el_faces)
+ integer :: coupling_ac_el_ijk(3,NGLLSQUARE,num_coupling_ac_el_faces)
+ integer :: coupling_ac_el_ispec(num_coupling_ac_el_faces)
+
+! MPI interfaces
+ integer :: num_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+ integer :: max_interface_size_ext_mesh
+ integer, dimension(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer :: max_nibool_interfaces_ext_mesh
+
+! file name
+ character(len=256) prname
+ logical :: SAVE_MESH_FILES
+
+! anisotropy
+ logical :: ANISOTROPY
+ integer :: NSPEC_ANISO
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
+ c11store,c12store,c13store,c14store,c15store,c16store, &
+ c22store,c23store,c24store,c25store,c26store,c33store, &
+ c34store,c35store,c36store,c44store,c45store,c46store, &
+ c55store,c56store,c66store
+
+! material domain flags
+ logical, dimension(nspec) :: ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic
+
+! local parameters
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: v_tmp
+ integer,dimension(:),allocatable :: v_tmp_i
+
+ !real(kind=CUSTOM_REAL) :: minimum(1)
+ integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh_dummy
+ integer :: ier,i
+ logical :: ACOUSTIC_SIMULATION,ELASTIC_SIMULATION,POROELASTIC_SIMULATION
+ character(len=256) :: filename
+
+ integer, dimension(:), allocatable :: iglob_tmp
+ integer :: j,inum
+
+! saves mesh file proc***_external_mesh.bin
+ filename = prname(1:len_trim(prname))//'external_mesh.bin'
+ open(unit=IOUT,file=trim(filename),status='unknown',action='write',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening database proc######_external_mesh.bin'
+
+ write(IOUT) nspec
+ write(IOUT) nglob
+
+ write(IOUT) ibool
+
+ write(IOUT) xstore_dummy
+ write(IOUT) ystore_dummy
+ write(IOUT) zstore_dummy
+
+ write(IOUT) xixstore
+ write(IOUT) xiystore
+ write(IOUT) xizstore
+ write(IOUT) etaxstore
+ write(IOUT) etaystore
+ write(IOUT) etazstore
+ write(IOUT) gammaxstore
+ write(IOUT) gammaystore
+ write(IOUT) gammazstore
+ write(IOUT) jacobianstore
+
+ write(IOUT) kappastore
+ write(IOUT) mustore
+
+ write(IOUT) ispec_is_acoustic
+ write(IOUT) ispec_is_elastic
+ write(IOUT) ispec_is_poroelastic
+
+! acoustic
+! all processes will have acoustic_simulation set if any flag is .true. somewhere
+ call any_all_l( ANY(ispec_is_acoustic), ACOUSTIC_SIMULATION )
+ if( ACOUSTIC_SIMULATION ) then
+ write(IOUT) rmass_acoustic
+ write(IOUT) rhostore
+ endif
+
+! elastic
+ call any_all_l( ANY(ispec_is_elastic), ELASTIC_SIMULATION )
+ if( ELASTIC_SIMULATION ) then
+ write(IOUT) rmass
+
+ if( OCEANS) then
+ write(IOUT) rmass_ocean_load
+ endif
+
+ !pll Stacey
+ write(IOUT) rho_vp
+ write(IOUT) rho_vs
+
+ endif
+
+! poroelastic
+ call any_all_l( ANY(ispec_is_poroelastic), POROELASTIC_SIMULATION )
+ if( POROELASTIC_SIMULATION ) then
+ stop 'not implemented yet: write rmass_solid_poroelastic .. '
+
+ write(IOUT) rmass_solid_poroelastic
+ write(IOUT) rmass_fluid_poroelastic
+ endif
+
+! absorbing boundary surface
+ write(IOUT) num_abs_boundary_faces
+ if( num_abs_boundary_faces > 0 ) then
+ write(IOUT) abs_boundary_ispec
+ write(IOUT) abs_boundary_ijk
+ write(IOUT) abs_boundary_jacobian2Dw
+ write(IOUT) abs_boundary_normal
+ endif
+
+! free surface
+ write(IOUT) num_free_surface_faces
+ if( num_free_surface_faces > 0 ) then
+ write(IOUT) free_surface_ispec
+ write(IOUT) free_surface_ijk
+ write(IOUT) free_surface_jacobian2Dw
+ write(IOUT) free_surface_normal
+ endif
+
+! acoustic-elastic coupling surface
+ write(IOUT) num_coupling_ac_el_faces
+ if( num_coupling_ac_el_faces > 0 ) then
+ write(IOUT) coupling_ac_el_ispec
+ write(IOUT) coupling_ac_el_ijk
+ write(IOUT) coupling_ac_el_jacobian2Dw
+ write(IOUT) coupling_ac_el_normal
+ endif
+
+!MPI interfaces
+ max_nibool_interfaces_ext_mesh = maxval(nibool_interfaces_ext_mesh(:))
+ allocate(ibool_interfaces_ext_mesh_dummy(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array'
+ do i = 1, num_interfaces_ext_mesh
+ ibool_interfaces_ext_mesh_dummy(:,i) = ibool_interfaces_ext_mesh(1:max_nibool_interfaces_ext_mesh,i)
+ enddo
+
+ write(IOUT) num_interfaces_ext_mesh
+ if( num_interfaces_ext_mesh > 0 ) then
+ write(IOUT) max_nibool_interfaces_ext_mesh
+ write(IOUT) my_neighbours_ext_mesh
+ write(IOUT) nibool_interfaces_ext_mesh
+ write(IOUT) ibool_interfaces_ext_mesh_dummy
+ endif
+
+! anisotropy
+ if( ANISOTROPY ) then
+ write(IOUT) c11store
+ write(IOUT) c12store
+ write(IOUT) c13store
+ write(IOUT) c14store
+ write(IOUT) c15store
+ write(IOUT) c16store
+ write(IOUT) c22store
+ write(IOUT) c23store
+ write(IOUT) c24store
+ write(IOUT) c25store
+ write(IOUT) c26store
+ write(IOUT) c33store
+ write(IOUT) c34store
+ write(IOUT) c35store
+ write(IOUT) c36store
+ write(IOUT) c44store
+ write(IOUT) c45store
+ write(IOUT) c46store
+ write(IOUT) c55store
+ write(IOUT) c56store
+ write(IOUT) c66store
+ endif
+
+ close(IOUT)
+
+
+! stores arrays in binary files
+ if( SAVE_MESH_FILES ) then
+
+ ! mesh arrays used for example in combine_vol_data.f90
+ !--- x coordinate
+ open(unit=27,file=prname(1:len_trim(prname))//'x.bin',status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening file x.bin'
+ write(27) xstore_dummy
+ close(27)
+
+ !--- y coordinate
+ open(unit=27,file=prname(1:len_trim(prname))//'y.bin',status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening file y.bin'
+ write(27) ystore_dummy
+ close(27)
+
+ !--- z coordinate
+ open(unit=27,file=prname(1:len_trim(prname))//'z.bin',status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening file z.bin'
+ write(27) zstore_dummy
+ close(27)
+
+ ! ibool
+ open(unit=27,file=prname(1:len_trim(prname))//'ibool.bin',status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening file ibool.bin'
+ write(27) ibool
+ close(27)
+
+ allocate( v_tmp(NGLLX,NGLLY,NGLLZ,nspec), stat=ier); if( ier /= 0 ) stop 'error allocating array '
+
+ ! vp (for checking the mesh and model)
+ !minimum = minval( abs(rho_vp) )
+ !if( minimum(1) /= 0.0 ) then
+ ! v_tmp = (FOUR_THIRDS * mustore + kappastore) / rho_vp
+ !else
+ ! v_tmp = 0.0
+ !endif
+ v_tmp = 0.0
+ where( rho_vp /= 0._CUSTOM_REAL ) v_tmp = (FOUR_THIRDS * mustore + kappastore) / rho_vp
+ open(unit=27,file=prname(1:len_trim(prname))//'vp.bin',status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening file vp.bin'
+ write(27) v_tmp
+ close(27)
+
+ ! VTK file output
+ ! vp values
+ filename = prname(1:len_trim(prname))//'vp'
+ call write_VTK_data_gll_cr(nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+ v_tmp,filename)
+
+
+ ! vs (for checking the mesh and model)
+ !minimum = minval( abs(rho_vs) )
+ !if( minimum(1) /= 0.0 ) then
+ ! v_tmp = mustore / rho_vs
+ !else
+ ! v_tmp = 0.0
+ !endif
+ v_tmp = 0.0
+ where( rho_vs /= 0._CUSTOM_REAL ) v_tmp = mustore / rho_vs
+ open(unit=27,file=prname(1:len_trim(prname))//'vs.bin',status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening file vs.bin'
+ write(27) v_tmp
+ close(27)
+
+ ! VTK file output
+ ! vs values
+ filename = prname(1:len_trim(prname))//'vs'
+ call write_VTK_data_gll_cr(nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+ v_tmp,filename)
+
+ ! outputs density model for check
+ v_tmp = 0.0
+ where( rho_vp /= 0._CUSTOM_REAL ) v_tmp = rho_vp**2 / (FOUR_THIRDS * mustore + kappastore)
+ open(unit=27,file=prname(1:len_trim(prname))//'rho.bin',status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening file rho.bin'
+ write(27) v_tmp
+ close(27)
+
+ ! VTK file output
+ ! saves attenuation flag assigned on each gll point into a vtk file
+ filename = prname(1:len_trim(prname))//'attenuation'
+ call write_VTK_data_gll_cr(nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+ qmu_attenuation_store,filename)
+
+ ! VTK file output
+ ! acoustic-elastic domains
+ if( ACOUSTIC_SIMULATION .and. ELASTIC_SIMULATION ) then
+ ! saves points on acoustic-elastic coupling interface
+ allocate( iglob_tmp(NGLLSQUARE*num_coupling_ac_el_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array iglob_tmp'
+ inum = 0
+ iglob_tmp(:) = 0
+ do i=1,num_coupling_ac_el_faces
+ do j=1,NGLLSQUARE
+ inum = inum+1
+ iglob_tmp(inum) = ibool(coupling_ac_el_ijk(1,j,i), &
+ coupling_ac_el_ijk(2,j,i), &
+ coupling_ac_el_ijk(3,j,i), &
+ coupling_ac_el_ispec(i) )
+ enddo
+ enddo
+ filename = prname(1:len_trim(prname))//'coupling_acoustic_elastic'
+ call write_VTK_data_points(nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ iglob_tmp,NGLLSQUARE*num_coupling_ac_el_faces, &
+ filename)
+
+ ! saves acoustic/elastic flag
+ allocate(v_tmp_i(nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array v_tmp_i'
+ do i=1,nspec
+ if( ispec_is_acoustic(i) ) then
+ v_tmp_i(i) = 1
+ else if( ispec_is_elastic(i) ) then
+ v_tmp_i(i) = 2
+ else
+ v_tmp_i(i) = 0
+ endif
+ enddo
+ filename = prname(1:len_trim(prname))//'acoustic_elastic_flag'
+ call write_VTK_data_elem_i(nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+ v_tmp_i,filename)
+
+ deallocate(iglob_tmp,v_tmp_i)
+ endif
+
+ ! saves free surface points
+ if( num_free_surface_faces > 0 ) then
+ ! saves free surface interface points
+ allocate( iglob_tmp(NGLLSQUARE*num_free_surface_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array iglob_tmp'
+ inum = 0
+ iglob_tmp(:) = 0
+ do i=1,num_free_surface_faces
+ do j=1,NGLLSQUARE
+ inum = inum+1
+ iglob_tmp(inum) = ibool(free_surface_ijk(1,j,i), &
+ free_surface_ijk(2,j,i), &
+ free_surface_ijk(3,j,i), &
+ free_surface_ispec(i) )
+ enddo
+ enddo
+ filename = prname(1:len_trim(prname))//'free_surface'
+ call write_VTK_data_points(nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ iglob_tmp,NGLLSQUARE*num_free_surface_faces, &
+ filename)
+
+ deallocate(iglob_tmp)
+ endif
+
+
+ !! saves 1. MPI interface
+ if( num_interfaces_ext_mesh >= 1 ) then
+ filename = prname(1:len_trim(prname))//'MPI_1_points'
+ call write_VTK_data_points(nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ ibool_interfaces_ext_mesh_dummy(1:nibool_interfaces_ext_mesh(1),1), &
+ nibool_interfaces_ext_mesh(1), &
+ filename)
+ endif
+
+
+ deallocate(v_tmp)
+
+ endif ! SAVE_MESH_FILES
+
+! cleanup
+ deallocate(ibool_interfaces_ext_mesh_dummy,stat=ier); if( ier /= 0 ) stop 'error deallocating array'
+
+
+ end subroutine save_arrays_solver_ext_mesh
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -26,11 +26,11 @@
module vtk
-
+
!-------------------------------------------------------------
! USER PARAMETER
- ! outputs as VTK ASCII file
+ ! outputs as VTK ASCII file
logical,parameter :: USE_VTK_OUTPUT = .true.
!-------------------------------------------------------------
@@ -38,7 +38,7 @@
! global point data
real,dimension(:),allocatable :: total_dat
-
+
end module vtk
!
@@ -61,7 +61,7 @@
implicit none
include 'constants.h'
-
+
! data must be of dimension: (NGLLX,NGLLY,NGLLZ,NSPEC_AB)
double precision,dimension(:,:,:,:),allocatable :: data
! real array for data
@@ -182,7 +182,7 @@
mesh_file = trim(outdir) // '/' // trim(filename)//'.vtk'
open(IOVTK,file=mesh_file(1:len_trim(mesh_file)),status='unknown',iostat=ios)
if( ios /= 0 ) stop 'error opening vtk output file'
-
+
write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
write(IOVTK,'(a)') 'material model VTK file'
write(IOVTK,'(a)') 'ASCII'
@@ -192,7 +192,7 @@
mesh_file = trim(outdir) // '/' // trim(filename)//'.mesh'
call open_file(trim(mesh_file)//char(0))
endif
-
+
! counts total number of points (all slices)
npp = 0
nee = 0
@@ -354,7 +354,7 @@
! close mesh file
call close_file()
endif
-
+
print *, 'Done writing '//trim(mesh_file)
end program combine_paraview_data_ext_mesh
@@ -490,8 +490,8 @@
! writes out total number of points
if (it == 1) then
- if( USE_VTK_OUTPUT ) then
- write(IOVTK, '(a,i12,a)') 'POINTS ', npp, ' float'
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK, '(a,i12,a)') 'POINTS ', npp, ' float'
! creates array to hold point data
allocate(total_dat(npp),stat=ier)
if( ier /= 0 ) stop 'error allocating total dat array'
@@ -543,7 +543,7 @@
else
call write_real(x)
call write_real(y)
- call write_real(z)
+ call write_real(z)
call write_real(dat(NGLLX,1,1,ispec))
endif
mask_ibool(iglob2) = .true.
@@ -556,7 +556,7 @@
if( USE_VTK_OUTPUT ) then
write(IOVTK,'(3e18.6)') x,y,z
total_dat(np+numpoin) = dat(NGLLX,NGLLY,1,ispec)
- else
+ else
call write_real(x)
call write_real(y)
call write_real(z)
@@ -571,8 +571,8 @@
z = zstore(iglob4)
if( USE_VTK_OUTPUT ) then
write(IOVTK,'(3e18.6)') x,y,z
- total_dat(np+numpoin) = dat(1,NGLLY,1,ispec)
- else
+ total_dat(np+numpoin) = dat(1,NGLLY,1,ispec)
+ else
call write_real(x)
call write_real(y)
call write_real(z)
@@ -587,8 +587,8 @@
z = zstore(iglob5)
if( USE_VTK_OUTPUT ) then
write(IOVTK,'(3e18.6)') x,y,z
- total_dat(np+numpoin) = dat(1,1,NGLLZ,ispec)
- else
+ total_dat(np+numpoin) = dat(1,1,NGLLZ,ispec)
+ else
call write_real(x)
call write_real(y)
call write_real(z)
@@ -603,8 +603,8 @@
z = zstore(iglob6)
if( USE_VTK_OUTPUT ) then
write(IOVTK,'(3e18.6)') x,y,z
- total_dat(np+numpoin) = dat(NGLLX,1,NGLLZ,ispec)
- else
+ total_dat(np+numpoin) = dat(NGLLX,1,NGLLZ,ispec)
+ else
call write_real(x)
call write_real(y)
call write_real(z)
@@ -619,8 +619,8 @@
z = zstore(iglob7)
if( USE_VTK_OUTPUT ) then
write(IOVTK,'(3e18.6)') x,y,z
- total_dat(np+numpoin) = dat(NGLLX,NGLLY,NGLLZ,ispec)
- else
+ total_dat(np+numpoin) = dat(NGLLX,NGLLY,NGLLZ,ispec)
+ else
call write_real(x)
call write_real(y)
call write_real(z)
@@ -635,8 +635,8 @@
z = zstore(iglob8)
if( USE_VTK_OUTPUT ) then
write(IOVTK,'(3e18.6)') x,y,z
- total_dat(np+numpoin) = dat(1,NGLLY,NGLLZ,ispec)
- else
+ total_dat(np+numpoin) = dat(1,NGLLY,NGLLZ,ispec)
+ else
call write_real(x)
call write_real(y)
call write_real(z)
@@ -673,13 +673,13 @@
! writes out total number of points
if (it == 1) then
- if( USE_VTK_OUTPUT ) then
- write(IOVTK, '(a,i12,a)') 'POINTS ', npp, ' float'
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK, '(a,i12,a)') 'POINTS ', npp, ' float'
! creates array to hold point data
allocate(total_dat(npp),stat=ier)
if( ier /= 0 ) stop 'error allocating total dat array'
total_dat(:) = 0.0
- else
+ else
call write_integer(npp)
endif
endif
@@ -703,7 +703,7 @@
if( USE_VTK_OUTPUT ) then
write(IOVTK,'(3e18.6)') x,y,z
total_dat(np+numpoin) = dat(i,j,k,ispec)
- else
+ else
call write_real(x)
call write_real(y)
call write_real(z)
@@ -742,7 +742,7 @@
! outputs total number of elements for all slices
if (it == 1) then
- if( USE_VTK_OUTPUT ) then
+ if( USE_VTK_OUTPUT ) then
! note: indices for vtk start at 0
write(IOVTK,'(a,i12,i12)') "CELLS ",nee,nee*9
else
@@ -821,7 +821,7 @@
n7 = num_ibool(iglob7) -1 + np
n8 = num_ibool(iglob8) -1 + np
- if( USE_VTK_OUTPUT ) then
+ if( USE_VTK_OUTPUT ) then
write(IOVTK,'(9i12)') 8,n1,n2,n3,n4,n5,n6,n7,n8
else
call write_integer(n1)
@@ -833,9 +833,9 @@
call write_integer(n7)
call write_integer(n8)
endif
-
+
enddo
-
+
! elements written
nelement = NSPEC_AB
@@ -869,10 +869,10 @@
! outputs total number of elements for all slices
if (it == 1) then
- if( USE_VTK_OUTPUT ) then
+ if( USE_VTK_OUTPUT ) then
! note: indices for vtk start at 0
write(IOVTK,'(a,i12,i12)') "CELLS ",nee,nee*9
- else
+ else
!nee = nelement * num_node
call write_integer(nee)
endif
@@ -922,10 +922,10 @@
n6 = num_ibool(iglob6)+np-1
n7 = num_ibool(iglob7)+np-1
n8 = num_ibool(iglob8)+np-1
-
- if( USE_VTK_OUTPUT ) then
+
+ if( USE_VTK_OUTPUT ) then
write(IOVTK,'(9i12)') 8,n1,n2,n3,n4,n5,n6,n7,n8
- else
+ else
call write_integer(n1)
call write_integer(n2)
call write_integer(n3)
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_arrays_source.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_arrays_source.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_arrays_source.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,350 +1,352 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine compute_arrays_source(ispec_selected_source, &
- xi_source,eta_source,gamma_source,sourcearray, &
- Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- xigll,yigll,zigll,nspec)
-
- implicit none
-
- include "constants.h"
-
- integer ispec_selected_source
- integer nspec
-
- double precision xi_source,eta_source,gamma_source
- double precision Mxx,Myy,Mzz,Mxy,Mxz,Myz
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xix,xiy,xiz,etax,etay,etaz, &
- gammax,gammay,gammaz
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
-
- double precision xixd,xiyd,xizd,etaxd,etayd,etazd,gammaxd,gammayd,gammazd
-
-! Gauss-Lobatto-Legendre points of integration and weights
- double precision, dimension(NGLLX) :: xigll
- double precision, dimension(NGLLY) :: yigll
- double precision, dimension(NGLLZ) :: zigll
-
-! source arrays
- double precision, dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearrayd
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: G11,G12,G13,G21,G22,G23,G31,G32,G33
- double precision, dimension(NGLLX) :: hxis,hpxis
- double precision, dimension(NGLLY) :: hetas,hpetas
- double precision, dimension(NGLLZ) :: hgammas,hpgammas
-
- integer k,l,m
-
-! calculate G_ij for general source location
-! the source does not necessarily correspond to a Gauss-Lobatto point
- do m=1,NGLLZ
- do l=1,NGLLY
- do k=1,NGLLX
-
- xixd = dble(xix(k,l,m,ispec_selected_source))
- xiyd = dble(xiy(k,l,m,ispec_selected_source))
- xizd = dble(xiz(k,l,m,ispec_selected_source))
- etaxd = dble(etax(k,l,m,ispec_selected_source))
- etayd = dble(etay(k,l,m,ispec_selected_source))
- etazd = dble(etaz(k,l,m,ispec_selected_source))
- gammaxd = dble(gammax(k,l,m,ispec_selected_source))
- gammayd = dble(gammay(k,l,m,ispec_selected_source))
- gammazd = dble(gammaz(k,l,m,ispec_selected_source))
-
- G11(k,l,m) = Mxx*xixd+Mxy*xiyd+Mxz*xizd
- G12(k,l,m) = Mxx*etaxd+Mxy*etayd+Mxz*etazd
- G13(k,l,m) = Mxx*gammaxd+Mxy*gammayd+Mxz*gammazd
- G21(k,l,m) = Mxy*xixd+Myy*xiyd+Myz*xizd
- G22(k,l,m) = Mxy*etaxd+Myy*etayd+Myz*etazd
- G23(k,l,m) = Mxy*gammaxd+Myy*gammayd+Myz*gammazd
- G31(k,l,m) = Mxz*xixd+Myz*xiyd+Mzz*xizd
- G32(k,l,m) = Mxz*etaxd+Myz*etayd+Mzz*etazd
- G33(k,l,m) = Mxz*gammaxd+Myz*gammayd+Mzz*gammazd
-
- enddo
- enddo
- enddo
-
-! compute Lagrange polynomials at the source location
- call lagrange_any(xi_source,NGLLX,xigll,hxis,hpxis)
- call lagrange_any(eta_source,NGLLY,yigll,hetas,hpetas)
- call lagrange_any(gamma_source,NGLLZ,zigll,hgammas,hpgammas)
-
-! calculate source array
- do m=1,NGLLZ
- do l=1,NGLLY
- do k=1,NGLLX
- call multiply_arrays_source(sourcearrayd,G11,G12,G13,G21,G22,G23, &
- G31,G32,G33,hxis,hpxis,hetas,hpetas,hgammas,hpgammas,k,l,m)
- enddo
- enddo
- enddo
-
-! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- sourcearray(:,:,:,:) = sngl(sourcearrayd(:,:,:,:))
- else
- sourcearray(:,:,:,:) = sourcearrayd(:,:,:,:)
- endif
-
- end subroutine compute_arrays_source
-
-!=============================================================================
-
- subroutine compute_arrays_adjoint_source(myrank, adj_source_file, &
- xi_receiver,eta_receiver,gamma_receiver, adj_sourcearray, &
- xigll,yigll,zigll, &
- it_sub_adj,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC)
-
- implicit none
-
- include 'constants.h'
-
-! input
- integer myrank, NSTEP, it_sub_adj, NTSTEP_BETWEEN_READ_ADJSRC
-
- double precision xi_receiver, eta_receiver, gamma_receiver
-
- character(len=*) adj_source_file
-
-! output
- real(kind=CUSTOM_REAL),dimension(NTSTEP_BETWEEN_READ_ADJSRC,NDIM,NGLLX,NGLLY,NGLLZ) :: adj_sourcearray
-
-! Gauss-Lobatto-Legendre points of integration and weights
- double precision, dimension(NGLLX) :: xigll
- double precision, dimension(NGLLY) :: yigll
- double precision, dimension(NGLLZ) :: zigll
-
- double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY), &
- hgammar(NGLLZ), hpgammar(NGLLZ)
-
- real(kind=CUSTOM_REAL) :: adj_src(NTSTEP_BETWEEN_READ_ADJSRC,NDIM)
-
- integer icomp, itime, i, j, k, ios, it_start, it_end
- double precision :: junk
- ! note: should have same order as orientation in write_seismograms_to_file()
- character(len=3),dimension(NDIM) :: comp != (/ "BHE", "BHN", "BHZ" /)
- character(len=256) :: filename
-
- ! gets channel names
- do icomp=1,NDIM
- call write_channel_name(icomp,comp(icomp))
- enddo
-
- ! range of the block we need to read
- it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1
- it_end = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1
-
- !adj_sourcearray(:,:,:,:,:) = 0.
- adj_src = 0._CUSTOM_REAL
-
- ! loops over components
- do icomp = 1, NDIM
-
- filename = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/../SEM/'//trim(adj_source_file) // '.'// comp(icomp) // '.adj'
- open(unit=IIN,file=trim(filename),status='old',action='read',iostat = ios)
- if (ios /= 0) cycle ! cycles to next file
- !if (ios /= 0) call exit_MPI(myrank, ' file '//trim(filename)//'does not exist')
-
- ! reads in adjoint source trace
- !! skip unused blocks
- do itime = 1, it_start-1
- read(IIN,*,iostat=ios) junk, junk
- if( ios /= 0 ) &
- call exit_MPI(myrank, &
- 'file '//trim(filename)//' has wrong length, please check with your simulation duration (1111)')
- enddo
- !! read the block we need
- do itime = it_start, it_end
- read(IIN,*,iostat=ios) junk, adj_src(itime-it_start+1,icomp)
- !!! used to check whether we read the correct block
- ! if (icomp==1) print *, junk, adj_src(itime-it_start+1,icomp)
- if( ios /= 0 ) &
- call exit_MPI(myrank, &
- 'file '//trim(filename)//' has wrong length, please check with your simulation duration (2222)')
- enddo
- close(IIN)
-
- enddo
-
- ! lagrange interpolators for receiver location
- call lagrange_any(xi_receiver,NGLLX,xigll,hxir,hpxir)
- call lagrange_any(eta_receiver,NGLLY,yigll,hetar,hpetar)
- call lagrange_any(gamma_receiver,NGLLZ,zigll,hgammar,hpgammar)
-
- ! interpolates adjoint source onto GLL points within this element
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
- enddo
- enddo
- enddo
-
-end subroutine compute_arrays_adjoint_source
-
-
-! =======================================================================
-
-! compute array for acoustic source
- subroutine compute_arrays_source_acoustic(xi_source,eta_source,gamma_source,&
- sourcearray,xigll,yigll,zigll,factor_source)
-
- implicit none
-
- include "constants.h"
-
- double precision :: xi_source,eta_source,gamma_source
- real(kind=CUSTOM_REAL) :: factor_source
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
-
-! Gauss-Lobatto-Legendre points of integration and weights
- double precision, dimension(NGLLX) :: xigll
- double precision, dimension(NGLLY) :: yigll
- double precision, dimension(NGLLZ) :: zigll
-
-! local parameters
-! source arrays
- double precision, dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearrayd
- double precision, dimension(NGLLX) :: hxis,hpxis
- double precision, dimension(NGLLY) :: hetas,hpetas
- double precision, dimension(NGLLZ) :: hgammas,hpgammas
- integer :: i,j,k
-
-! initializes
- sourcearray(:,:,:,:) = 0._CUSTOM_REAL
- sourcearrayd(:,:,:,:) = 0.d0
-
-! computes Lagrange polynomials at the source location
- call lagrange_any(xi_source,NGLLX,xigll,hxis,hpxis)
- call lagrange_any(eta_source,NGLLY,yigll,hetas,hpetas)
- call lagrange_any(gamma_source,NGLLZ,zigll,hgammas,hpgammas)
-
-! calculates source array for interpolated location
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- ! identical source array components in x,y,z-direction
- sourcearrayd(:,i,j,k) = hxis(i)*hetas(j)*hgammas(k)*dble(factor_source)
- enddo
- enddo
- enddo
-
-! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- sourcearray(:,:,:,:) = sngl(sourcearrayd(:,:,:,:))
- else
- sourcearray(:,:,:,:) = sourcearrayd(:,:,:,:)
- endif
-
- end subroutine compute_arrays_source_acoustic
-
-
-! testing read in adjoint sources block by block
-
-!!!the original version
-!!!
-!!!subroutine compute_arrays_adjoint_source(myrank, adj_source_file, &
-!!! xi_receiver,eta_receiver,gamma_receiver, adj_sourcearray, &
-!!! xigll,yigll,zigll,NSTEP)
-!!!
-!!!
-!!! implicit none
-!!!
-!!! include 'constants.h'
-!!!
-!!!! input
-!!! integer myrank, NSTEP
-!!!
-!!! double precision xi_receiver, eta_receiver, gamma_receiver
-!!!
-!!! character(len=*) adj_source_file
-!!!
-!!!! output
-!!! real(kind=CUSTOM_REAL),dimension(NSTEP,NDIM,NGLLX,NGLLY,NGLLZ) :: adj_sourcearray
-!!!
-!!!! Gauss-Lobatto-Legendre points of integration and weights
-!!! double precision, dimension(NGLLX) :: xigll
-!!! double precision, dimension(NGLLY) :: yigll
-!!! double precision, dimension(NGLLZ) :: zigll
-!!!
-!!! double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY), &
-!!! hgammar(NGLLZ), hpgammar(NGLLZ)
-!!!
-!!! real(kind=CUSTOM_REAL) :: adj_src(NSTEP,NDIM)
-!!!
-!!! integer icomp, itime, i, j, k, ios
-!!! double precision :: junk
-!!! ! note: should have same order as orientation in write_seismograms_to_file()
-!!! character(len=3),dimension(NDIM) :: comp = (/ "BHE", "BHN", "BHZ" /)
-!!! character(len=256) :: filename
-!!!
-!!! !adj_sourcearray(:,:,:,:,:) = 0.
-!!! adj_src = 0._CUSTOM_REAL
-!!!
-!!! ! loops over components
-!!! do icomp = 1, NDIM
-!!!
-!!! filename = 'SEM/'//trim(adj_source_file) // '.'// comp(icomp) // '.adj'
-!!! open(unit=IIN,file=trim(filename),status='old',action='read',iostat = ios)
-!!! if (ios /= 0) cycle ! cycles to next file
-!!! !if (ios /= 0) call exit_MPI(myrank, ' file '//trim(filename)//'does not exist')
-!!!
-!!! ! reads in adjoint source trace
-!!! do itime = 1, NSTEP
-!!!
-!!! ! things become a bit tricky because of the Newark time scheme at
-!!! ! the very beginning of the time loop. however, when we read in the backward/reconstructed
-!!! ! wavefields at the end of the first time loop, we can use the adjoint source index from 1 to NSTEP
-!!! ! (and then access it in reverse NSTEP-it+1 down to 1, for it=1,..NSTEP; see compute_add_sources*.f90).
-!!! read(IIN,*,iostat=ios) junk, adj_src(itime,icomp)
-!!! if( ios /= 0 ) &
-!!! call exit_MPI(myrank, &
-!!! 'file '//trim(filename)//' has wrong length, please check with your simulation duration')
-!!! enddo
-!!! close(IIN)
-!!!
-!!! enddo
-!!!
-!!! ! lagrange interpolators for receiver location
-!!! call lagrange_any(xi_receiver,NGLLX,xigll,hxir,hpxir)
-!!! call lagrange_any(eta_receiver,NGLLY,yigll,hetar,hpetar)
-!!! call lagrange_any(gamma_receiver,NGLLZ,zigll,hgammar,hpgammar)
-!!!
-!!! ! interpolates adjoint source onto GLL points within this element
-!!! do k = 1, NGLLZ
-!!! do j = 1, NGLLY
-!!! do i = 1, NGLLX
-!!! adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
-!!! enddo
-!!! enddo
-!!! enddo
-!!!
-!!!end subroutine compute_arrays_adjoint_source
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine compute_arrays_source(ispec_selected_source, &
+ xi_source,eta_source,gamma_source,sourcearray, &
+ Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ xigll,yigll,zigll,nspec)
+
+ implicit none
+
+ include "constants.h"
+
+ integer ispec_selected_source
+ integer nspec
+
+ double precision xi_source,eta_source,gamma_source
+ double precision Mxx,Myy,Mzz,Mxy,Mxz,Myz
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xix,xiy,xiz,etax,etay,etaz, &
+ gammax,gammay,gammaz
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
+
+ double precision xixd,xiyd,xizd,etaxd,etayd,etazd,gammaxd,gammayd,gammazd
+
+! Gauss-Lobatto-Legendre points of integration and weights
+ double precision, dimension(NGLLX) :: xigll
+ double precision, dimension(NGLLY) :: yigll
+ double precision, dimension(NGLLZ) :: zigll
+
+! source arrays
+ double precision, dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearrayd
+ double precision, dimension(NGLLX,NGLLY,NGLLZ) :: G11,G12,G13,G21,G22,G23,G31,G32,G33
+ double precision, dimension(NGLLX) :: hxis,hpxis
+ double precision, dimension(NGLLY) :: hetas,hpetas
+ double precision, dimension(NGLLZ) :: hgammas,hpgammas
+
+ integer k,l,m
+
+! calculate G_ij for general source location
+! the source does not necessarily correspond to a Gauss-Lobatto point
+ do m=1,NGLLZ
+ do l=1,NGLLY
+ do k=1,NGLLX
+
+ xixd = dble(xix(k,l,m,ispec_selected_source))
+ xiyd = dble(xiy(k,l,m,ispec_selected_source))
+ xizd = dble(xiz(k,l,m,ispec_selected_source))
+ etaxd = dble(etax(k,l,m,ispec_selected_source))
+ etayd = dble(etay(k,l,m,ispec_selected_source))
+ etazd = dble(etaz(k,l,m,ispec_selected_source))
+ gammaxd = dble(gammax(k,l,m,ispec_selected_source))
+ gammayd = dble(gammay(k,l,m,ispec_selected_source))
+ gammazd = dble(gammaz(k,l,m,ispec_selected_source))
+
+ G11(k,l,m) = Mxx*xixd+Mxy*xiyd+Mxz*xizd
+ G12(k,l,m) = Mxx*etaxd+Mxy*etayd+Mxz*etazd
+ G13(k,l,m) = Mxx*gammaxd+Mxy*gammayd+Mxz*gammazd
+ G21(k,l,m) = Mxy*xixd+Myy*xiyd+Myz*xizd
+ G22(k,l,m) = Mxy*etaxd+Myy*etayd+Myz*etazd
+ G23(k,l,m) = Mxy*gammaxd+Myy*gammayd+Myz*gammazd
+ G31(k,l,m) = Mxz*xixd+Myz*xiyd+Mzz*xizd
+ G32(k,l,m) = Mxz*etaxd+Myz*etayd+Mzz*etazd
+ G33(k,l,m) = Mxz*gammaxd+Myz*gammayd+Mzz*gammazd
+
+ enddo
+ enddo
+ enddo
+
+! compute Lagrange polynomials at the source location
+ call lagrange_any(xi_source,NGLLX,xigll,hxis,hpxis)
+ call lagrange_any(eta_source,NGLLY,yigll,hetas,hpetas)
+ call lagrange_any(gamma_source,NGLLZ,zigll,hgammas,hpgammas)
+
+! calculate source array
+ do m=1,NGLLZ
+ do l=1,NGLLY
+ do k=1,NGLLX
+ call multiply_arrays_source(sourcearrayd,G11,G12,G13,G21,G22,G23, &
+ G31,G32,G33,hxis,hpxis,hetas,hpetas,hgammas,hpgammas,k,l,m)
+ enddo
+ enddo
+ enddo
+
+! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ sourcearray(:,:,:,:) = sngl(sourcearrayd(:,:,:,:))
+ else
+ sourcearray(:,:,:,:) = sourcearrayd(:,:,:,:)
+ endif
+
+ end subroutine compute_arrays_source
+
+!=============================================================================
+
+ subroutine compute_arrays_adjoint_source(myrank, adj_source_file, &
+ xi_receiver,eta_receiver,gamma_receiver, adj_sourcearray, &
+ xigll,yigll,zigll, &
+ it_sub_adj,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC)
+
+ implicit none
+
+ include 'constants.h'
+
+! input
+ integer myrank, NSTEP, it_sub_adj, NTSTEP_BETWEEN_READ_ADJSRC
+
+ double precision xi_receiver, eta_receiver, gamma_receiver
+
+ character(len=*) adj_source_file
+
+! output
+ real(kind=CUSTOM_REAL),dimension(NTSTEP_BETWEEN_READ_ADJSRC,NDIM,NGLLX,NGLLY,NGLLZ) :: adj_sourcearray
+
+! Gauss-Lobatto-Legendre points of integration and weights
+ double precision, dimension(NGLLX) :: xigll
+ double precision, dimension(NGLLY) :: yigll
+ double precision, dimension(NGLLZ) :: zigll
+
+ double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY), &
+ hgammar(NGLLZ), hpgammar(NGLLZ)
+
+ real(kind=CUSTOM_REAL) :: adj_src(NTSTEP_BETWEEN_READ_ADJSRC,NDIM)
+
+ integer icomp, itime, i, j, k, ios, it_start, it_end
+ double precision :: junk
+ ! note: should have same order as orientation in write_seismograms_to_file()
+ character(len=3),dimension(NDIM) :: comp != (/ "BHE", "BHN", "BHZ" /)
+ character(len=256) :: filename
+
+ ! gets channel names
+ do icomp=1,NDIM
+ call write_channel_name(icomp,comp(icomp))
+ enddo
+
+ ! range of the block we need to read
+ it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1
+ it_end = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1
+
+ !adj_sourcearray(:,:,:,:,:) = 0.
+ adj_src = 0._CUSTOM_REAL
+
+ ! loops over components
+ do icomp = 1, NDIM
+
+ filename = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/../SEM/'//trim(adj_source_file)//'.'//comp(icomp)//'.adj'
+ open(unit=IIN,file=trim(filename),status='old',action='read',iostat = ios)
+ ! cycles to next file (this might be more error prone)
+ !if (ios /= 0) cycle
+ ! requires adjoint files to exist (users will have to be more careful in setting up adjoint runs)
+ if (ios /= 0) call exit_MPI(myrank, ' file '//trim(filename)//'does not exist')
+
+ ! reads in adjoint source trace
+ !! skip unused blocks
+ do itime = 1, it_start-1
+ read(IIN,*,iostat=ios) junk, junk
+ if( ios /= 0 ) &
+ call exit_MPI(myrank, &
+ 'file '//trim(filename)//' has wrong length, please check with your simulation duration (1111)')
+ enddo
+ !! read the block we need
+ do itime = it_start, it_end
+ read(IIN,*,iostat=ios) junk, adj_src(itime-it_start+1,icomp)
+ !!! used to check whether we read the correct block
+ ! if (icomp==1) print *, junk, adj_src(itime-it_start+1,icomp)
+ if( ios /= 0 ) &
+ call exit_MPI(myrank, &
+ 'file '//trim(filename)//' has wrong length, please check with your simulation duration (2222)')
+ enddo
+ close(IIN)
+
+ enddo
+
+ ! lagrange interpolators for receiver location
+ call lagrange_any(xi_receiver,NGLLX,xigll,hxir,hpxir)
+ call lagrange_any(eta_receiver,NGLLY,yigll,hetar,hpetar)
+ call lagrange_any(gamma_receiver,NGLLZ,zigll,hgammar,hpgammar)
+
+ ! interpolates adjoint source onto GLL points within this element
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
+ enddo
+ enddo
+ enddo
+
+end subroutine compute_arrays_adjoint_source
+
+
+! =======================================================================
+
+! compute array for acoustic source
+ subroutine compute_arrays_source_acoustic(xi_source,eta_source,gamma_source,&
+ sourcearray,xigll,yigll,zigll,factor_source)
+
+ implicit none
+
+ include "constants.h"
+
+ double precision :: xi_source,eta_source,gamma_source
+ real(kind=CUSTOM_REAL) :: factor_source
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
+
+! Gauss-Lobatto-Legendre points of integration and weights
+ double precision, dimension(NGLLX) :: xigll
+ double precision, dimension(NGLLY) :: yigll
+ double precision, dimension(NGLLZ) :: zigll
+
+! local parameters
+! source arrays
+ double precision, dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearrayd
+ double precision, dimension(NGLLX) :: hxis,hpxis
+ double precision, dimension(NGLLY) :: hetas,hpetas
+ double precision, dimension(NGLLZ) :: hgammas,hpgammas
+ integer :: i,j,k
+
+! initializes
+ sourcearray(:,:,:,:) = 0._CUSTOM_REAL
+ sourcearrayd(:,:,:,:) = 0.d0
+
+! computes Lagrange polynomials at the source location
+ call lagrange_any(xi_source,NGLLX,xigll,hxis,hpxis)
+ call lagrange_any(eta_source,NGLLY,yigll,hetas,hpetas)
+ call lagrange_any(gamma_source,NGLLZ,zigll,hgammas,hpgammas)
+
+! calculates source array for interpolated location
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ ! identical source array components in x,y,z-direction
+ sourcearrayd(:,i,j,k) = hxis(i)*hetas(j)*hgammas(k)*dble(factor_source)
+ enddo
+ enddo
+ enddo
+
+! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ sourcearray(:,:,:,:) = sngl(sourcearrayd(:,:,:,:))
+ else
+ sourcearray(:,:,:,:) = sourcearrayd(:,:,:,:)
+ endif
+
+ end subroutine compute_arrays_source_acoustic
+
+
+! testing read in adjoint sources block by block
+
+!!!the original version
+!!!
+!!!subroutine compute_arrays_adjoint_source(myrank, adj_source_file, &
+!!! xi_receiver,eta_receiver,gamma_receiver, adj_sourcearray, &
+!!! xigll,yigll,zigll,NSTEP)
+!!!
+!!!
+!!! implicit none
+!!!
+!!! include 'constants.h'
+!!!
+!!!! input
+!!! integer myrank, NSTEP
+!!!
+!!! double precision xi_receiver, eta_receiver, gamma_receiver
+!!!
+!!! character(len=*) adj_source_file
+!!!
+!!!! output
+!!! real(kind=CUSTOM_REAL),dimension(NSTEP,NDIM,NGLLX,NGLLY,NGLLZ) :: adj_sourcearray
+!!!
+!!!! Gauss-Lobatto-Legendre points of integration and weights
+!!! double precision, dimension(NGLLX) :: xigll
+!!! double precision, dimension(NGLLY) :: yigll
+!!! double precision, dimension(NGLLZ) :: zigll
+!!!
+!!! double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY), &
+!!! hgammar(NGLLZ), hpgammar(NGLLZ)
+!!!
+!!! real(kind=CUSTOM_REAL) :: adj_src(NSTEP,NDIM)
+!!!
+!!! integer icomp, itime, i, j, k, ios
+!!! double precision :: junk
+!!! ! note: should have same order as orientation in write_seismograms_to_file()
+!!! character(len=3),dimension(NDIM) :: comp = (/ "BHE", "BHN", "BHZ" /)
+!!! character(len=256) :: filename
+!!!
+!!! !adj_sourcearray(:,:,:,:,:) = 0.
+!!! adj_src = 0._CUSTOM_REAL
+!!!
+!!! ! loops over components
+!!! do icomp = 1, NDIM
+!!!
+!!! filename = 'SEM/'//trim(adj_source_file) // '.'// comp(icomp) // '.adj'
+!!! open(unit=IIN,file=trim(filename),status='old',action='read',iostat = ios)
+!!! if (ios /= 0) cycle ! cycles to next file
+!!! !if (ios /= 0) call exit_MPI(myrank, ' file '//trim(filename)//'does not exist')
+!!!
+!!! ! reads in adjoint source trace
+!!! do itime = 1, NSTEP
+!!!
+!!! ! things become a bit tricky because of the Newark time scheme at
+!!! ! the very beginning of the time loop. however, when we read in the backward/reconstructed
+!!! ! wavefields at the end of the first time loop, we can use the adjoint source index from 1 to NSTEP
+!!! ! (and then access it in reverse NSTEP-it+1 down to 1, for it=1,..NSTEP; see compute_add_sources*.f90).
+!!! read(IIN,*,iostat=ios) junk, adj_src(itime,icomp)
+!!! if( ios /= 0 ) &
+!!! call exit_MPI(myrank, &
+!!! 'file '//trim(filename)//' has wrong length, please check with your simulation duration')
+!!! enddo
+!!! close(IIN)
+!!!
+!!! enddo
+!!!
+!!! ! lagrange interpolators for receiver location
+!!! call lagrange_any(xi_receiver,NGLLX,xigll,hxir,hpxir)
+!!! call lagrange_any(eta_receiver,NGLLY,yigll,hetar,hpetar)
+!!! call lagrange_any(gamma_receiver,NGLLZ,zigll,hgammar,hpgammar)
+!!!
+!!! ! interpolates adjoint source onto GLL points within this element
+!!! do k = 1, NGLLZ
+!!! do j = 1, NGLLY
+!!! do i = 1, NGLLX
+!!! adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
+!!! enddo
+!!! enddo
+!!! enddo
+!!!
+!!!end subroutine compute_arrays_adjoint_source
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in 2011-10-30 02:25:28 UTC (rev 19129)
@@ -73,6 +73,11 @@
! for optimized routines by Deville et al. (2002)
integer, parameter :: m1 = NGLLX, m2 = NGLLX * NGLLY
+!!-----------------------------------------------------------
+!!
+!! seismogram and i/o output
+!!
+!!-----------------------------------------------------------
! ouput format of seismograms, ASCII or binary
logical, parameter :: SEISMOGRAMS_BINARY = .false.
! output format of seismograms, Seismic Unix (binary with 240-byte-headers)
@@ -104,6 +109,12 @@
! ignore variable name field (junk) at the beginning of each input line
logical, parameter :: IGNORE_JUNK = .true.,DONT_IGNORE_JUNK = .false.
+!!-----------------------------------------------------------
+!!
+!! source/receiver setup
+!!
+!!-----------------------------------------------------------
+
! flag to print the details of source location
logical, parameter :: SHOW_DETAILS_LOCATE_SOURCE = .false.
@@ -126,6 +137,12 @@
! use directory OUTPUT_FILES/ for seismogram output
logical,parameter :: USE_OUTPUT_FILES_PATH = .true.
+!!-----------------------------------------------------------
+!!
+!! absorption and PML
+!!
+!!-----------------------------------------------------------
+
! absorb top surface
! (defined in mesh as 'free_surface_file')
logical,parameter :: ABSORB_FREE_SURFACE = .false.
@@ -136,6 +153,12 @@
! (user parameters can be specified in PML_init.f90)
logical,parameter :: ABSORB_USE_PML = .false.
+!!-----------------------------------------------------------
+!!
+!! directory structure
+!!
+!!-----------------------------------------------------------
+
! paths for inputs and outputs files
character(len=256), parameter :: IN_DATA_FILES_PATH = '../in_data_files/'
character(len=256), parameter :: MF_IN_DATA_FILES_PATH = '../in_data_files/meshfem3D_files/'
@@ -197,6 +220,12 @@
logical, parameter :: EXTERNAL_MESH_MOVIE_SURFACE = .false.
logical, parameter :: EXTERNAL_MESH_CREATE_SHAKEMAP = .false.
+!!-----------------------------------------------------------
+!!
+!! image outputs
+!!
+!!-----------------------------------------------------------
+
! plots VTK cross-section planes instead of model surface
! (EXPERIMENTAL feature)
! (requires EXTERNAL_MESH_MOVIE_SURFACE set to true)
@@ -218,6 +247,17 @@
! this is an absolute value for normalized coordinates in the Earth
double precision, parameter :: SMALLVAL_TOL = 1.d-10
+!!-----------------------------------------------------------
+!!
+!! mesh optimization
+!!
+!!-----------------------------------------------------------
+
+! add mesh coloring for the GPU + MPI implementation
+ logical, parameter :: USE_MESH_COLORING_GPU = .false.
+ integer, parameter :: MAX_NUMBER_OF_COLORS = 10000
+ integer, parameter :: NGNOD_HEXAHEDRA = 8
+
!------------------------------------------------------
!----------- do not modify anything below -------------
!------------------------------------------------------
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/convolve_source_timefunction.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/convolve_source_timefunction.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/convolve_source_timefunction.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,134 +1,134 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- program convolve_source_time_function
-
-!
-! convolve seismograms computed for a Heaviside with given source time function
-!
-
-! we mimic a triangle of half duration equal to half_duration_triangle
-! using a Gaussian having a very close shape, as explained in Figure 4.2
-! of the manual
-
- implicit none
-
- include "constants.h"
-
- integer :: i,j,N_j,number_remove,nlines
-
- double precision :: alpha,dt,tau_j,source,exponent,t1,t2,displ1,displ2,gamma,height,half_duration_triangle
-
- logical :: triangle
-
- double precision, dimension(:), allocatable :: time,sem,sem_fil
-
-! read file with number of lines in input
- open(unit=33,file='input_convolve_code.txt',status='old',action='read')
- read(33,*) nlines
- read(33,*) half_duration_triangle
- read(33,*) triangle
- close(33)
-
-! allocate arrays
- allocate(time(nlines),sem(nlines),sem_fil(nlines))
-
-! read the input seismogram
- do i = 1,nlines
- read(5,*) time(i),sem(i)
- enddo
-
-! define a Gaussian with the right exponent to mimic a triangle of equivalent half duration
- alpha = SOURCE_DECAY_MIMIC_TRIANGLE/half_duration_triangle
-
-! compute the time step
- dt = time(2) - time(1)
-
-! number of integers for which the source wavelet is different from zero
- if(triangle) then
- N_j = ceiling(half_duration_triangle/dt)
- else
- N_j = ceiling(1.5d0*half_duration_triangle/dt)
- endif
-
- do i = 1,nlines
-
- sem_fil(i) = 0.d0
-
- do j = -N_j,N_j
-
- if(i > j .and. i-j <= nlines) then
-
- tau_j = dble(j)*dt
-
-! convolve with a triangle
- if(triangle) then
- height = 1.d0 / half_duration_triangle
- if(abs(tau_j) > half_duration_triangle) then
- source = 0.d0
- else if (tau_j < 0.d0) then
- t1 = - N_j * dt
- displ1 = 0.d0
- t2 = 0.d0
- displ2 = height
- gamma = (tau_j - t1) / (t2 - t1)
- source= (1.d0 - gamma) * displ1 + gamma * displ2
- else
- t1 = 0.d0
- displ1 = height
- t2 = + N_j * dt
- displ2 = 0.d0
- gamma = (tau_j - t1) / (t2 - t1)
- source= (1.d0 - gamma) * displ1 + gamma * displ2
- endif
-
- else
-
-! convolve with a Gaussian
- exponent = alpha**2 * tau_j**2
- if(exponent < 50.d0) then
- source = alpha*exp(-exponent)/sqrt(PI)
- else
- source = 0.d0
- endif
-
- endif
-
- sem_fil(i) = sem_fil(i) + sem(i-j)*source*dt
-
- endif
-
- enddo
- enddo
-
-! compute number of samples to remove from end of seismograms
- number_remove = N_j + 1
- do i=1,nlines - number_remove
- write(*,*) sngl(time(i)),' ',sngl(sem_fil(i))
- enddo
-
- end program convolve_source_time_function
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ program convolve_source_time_function
+
+!
+! convolve seismograms computed for a Heaviside with given source time function
+!
+
+! we mimic a triangle of half duration equal to half_duration_triangle
+! using a Gaussian having a very close shape, as explained in Figure 4.2
+! of the manual
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: i,j,N_j,number_remove,nlines
+
+ double precision :: alpha,dt,tau_j,source,exponent,t1,t2,displ1,displ2,gamma,height,half_duration_triangle
+
+ logical :: triangle
+
+ double precision, dimension(:), allocatable :: time,sem,sem_fil
+
+! read file with number of lines in input
+ open(unit=33,file='input_convolve_code.txt',status='old',action='read')
+ read(33,*) nlines
+ read(33,*) half_duration_triangle
+ read(33,*) triangle
+ close(33)
+
+! allocate arrays
+ allocate(time(nlines),sem(nlines),sem_fil(nlines))
+
+! read the input seismogram
+ do i = 1,nlines
+ read(5,*) time(i),sem(i)
+ enddo
+
+! define a Gaussian with the right exponent to mimic a triangle of equivalent half duration
+ alpha = SOURCE_DECAY_MIMIC_TRIANGLE/half_duration_triangle
+
+! compute the time step
+ dt = time(2) - time(1)
+
+! number of integers for which the source wavelet is different from zero
+ if(triangle) then
+ N_j = ceiling(half_duration_triangle/dt)
+ else
+ N_j = ceiling(1.5d0*half_duration_triangle/dt)
+ endif
+
+ do i = 1,nlines
+
+ sem_fil(i) = 0.d0
+
+ do j = -N_j,N_j
+
+ if(i > j .and. i-j <= nlines) then
+
+ tau_j = dble(j)*dt
+
+! convolve with a triangle
+ if(triangle) then
+ height = 1.d0 / half_duration_triangle
+ if(abs(tau_j) > half_duration_triangle) then
+ source = 0.d0
+ else if (tau_j < 0.d0) then
+ t1 = - N_j * dt
+ displ1 = 0.d0
+ t2 = 0.d0
+ displ2 = height
+ gamma = (tau_j - t1) / (t2 - t1)
+ source= (1.d0 - gamma) * displ1 + gamma * displ2
+ else
+ t1 = 0.d0
+ displ1 = height
+ t2 = + N_j * dt
+ displ2 = 0.d0
+ gamma = (tau_j - t1) / (t2 - t1)
+ source= (1.d0 - gamma) * displ1 + gamma * displ2
+ endif
+
+ else
+
+! convolve with a Gaussian
+ exponent = alpha**2 * tau_j**2
+ if(exponent < 50.d0) then
+ source = alpha*exp(-exponent)/sqrt(PI)
+ else
+ source = 0.d0
+ endif
+
+ endif
+
+ sem_fil(i) = sem_fil(i) + sem(i-j)*source*dt
+
+ endif
+
+ enddo
+ enddo
+
+! compute number of samples to remove from end of seismograms
+ number_remove = N_j + 1
+ do i=1,nlines - number_remove
+ write(*,*) sngl(time(i)),' ',sngl(sem_fil(i))
+ enddo
+
+ end program convolve_source_time_function
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,1039 +1,1039 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!
-!--- create a movie of the vertical component of surface displacement or velocity
-!--- or a ShakeMap(R) (i.e. map of the maximum absolute value of the two horizontal components
-!--- of the velocity vector) in AVS, OpenDX or GMT format
-!
-
- program create_movie_shakemap
-
- implicit none
-
- include "constants.h"
- include "surface_from_mesher.h"
-
-!-------------------------------------------------------------------------------------------------
-! user parameters
-! threshold in percent of the maximum below which we cut the amplitude
- logical, parameter :: APPLY_THRESHOLD = .false.
- real(kind=CUSTOM_REAL), parameter :: THRESHOLD = 1._CUSTOM_REAL / 100._CUSTOM_REAL
-
-! coefficient of power law used for non linear scaling
- logical, parameter :: NONLINEAR_SCALING = .false.
- real(kind=CUSTOM_REAL), parameter :: POWER_SCALING = 0.13_CUSTOM_REAL
-
-!-------------------------------------------------------------------------------------------------
-
- integer it,it1,it2,ivalue,nspectot_AVS_max,ispec
- integer iformat,nframes,iframe,inumber,inorm,iscaling_shake
- integer ibool_number,ibool_number1,ibool_number2,ibool_number3,ibool_number4
-
- logical USE_OPENDX,USE_AVS,USE_GMT,plot_shaking_map
-
- real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: x,y,z,display
- real(kind=CUSTOM_REAL) xcoord,ycoord,zcoord
- real(kind=CUSTOM_REAL) vectorx,vectory,vectorz
-
- double precision min_field_current,max_field_current,max_absol
-
- character(len=256) outputname
-
- integer ipoin
-
- ! GMT
- double precision lat,long
-
- ! for sorting routine
- integer npointot,ilocnum,nglob,i,j,ielm,ieoff,ispecloc
- integer, dimension(:), allocatable :: iglob,loc,ireorder
- logical, dimension(:), allocatable :: ifseg,mask_point
- double precision, dimension(:), allocatable :: xp,yp,zp,xp_save,yp_save,zp_save,field_display
-
- ! movie files stored by solver
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
- store_val_x,store_val_y,store_val_z, &
- store_val_ux,store_val_uy,store_val_uz
-
- ! parameters read from parameter file
- integer NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,UTM_PROJECTION_ZONE,SIMULATION_TYPE
- integer NSOURCES,NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY
- logical MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
- USE_HIGHRES_FOR_MOVIES,SUPPRESS_UTM_PROJECTION
- integer NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO
- double precision DT
- double precision HDUR_MOVIE
- logical ATTENUATION,USE_OLSEN_ATTENUATION, &
- OCEANS,TOPOGRAPHY
- logical ABSORBING_CONDITIONS,SAVE_FORWARD
- logical ANISOTROPY,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
- character(len=256) OUTPUT_FILES,LOCAL_PATH
- integer NPROC
- integer ier
-
-
-!--------------------------------------------
-!!!! NL NL for external meshes
-!--------------------------------------------
- ! muting source region
- real(kind=CUSTOM_REAL), parameter :: RADIUS_TO_MUTE = 1000._CUSTOM_REAL
- logical, parameter :: MUTE_SOURCE = .true.
- real(kind=CUSTOM_REAL), parameter :: X_SOURCE_EXT_MESH = -9023.021484375
- real(kind=CUSTOM_REAL), parameter :: Y_SOURCE_EXT_MESH = 6123.611328125
- real(kind=CUSTOM_REAL), parameter :: Z_SOURCE_EXT_MESH = 17.96331405639648
-!--------------------------------------------
-!!!! NL NL
-
- ! order of points representing the 2D square element
- integer,dimension(NGNOD2D_AVS_DX),parameter :: iorder = (/1,3,2,4/)
-
-
-! ************** PROGRAM STARTS HERE **************
-
- print *
- print *,'Recombining all movie frames to create a movie'
- print *
-
- print *
- print *,'reading parameter file'
- print *
-
- ! read the parameter file
- call read_parameter_file(NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
- ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
- OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
- MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
- NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
- SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION, &
- NTSTEP_BETWEEN_OUTPUT_INFO,SIMULATION_TYPE,SAVE_FORWARD, &
- NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
-
- ! get the base pathname for output files
- call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
-
- ! only one global array for movie data, but stored for all surfaces defined
- ! in file 'surface_from_mesher.h'
- if(USE_HIGHRES_FOR_MOVIES) then
- ilocnum = NSPEC_SURFACE_EXT_MESH*NGLLSQUARE
- else
- ilocnum = NSPEC_SURFACE_EXT_MESH*NGNOD2D_AVS_DX
- endif
- print*,' moviedata element surfaces: ',NSPEC_SURFACE_EXT_MESH
- print*,' moviedata total elements all: ',ilocnum
- print *
-
- if(SAVE_DISPLACEMENT) then
- print *,'Vertical displacement will be shown in movie'
- else
- print *,'Vertical velocity will be shown in movie'
- endif
- print *
-
-
- ! user input
- print *,'1 = create files in OpenDX format'
- print *,'2 = create files in AVS UCD format'
- print *,'3 = create files in GMT xyz Ascii long/lat/Uz format'
- print *,'any other value = exit'
- print *
- print *,'enter value:'
- read(5,*) iformat
- if(iformat < 1 .or. iformat > 3) stop 'exiting...'
-
- plot_shaking_map = .false.
- print *,'movie frames have been saved every ',NTSTEP_BETWEEN_FRAMES,' time steps'
- print *
- print *,'enter first time step of movie (e.g. 1, enter -1 for shaking map)'
- read(5,*) it1
- if(it1 == 0 ) it1 = 1
- if(it1 == -1) plot_shaking_map = .true.
- if(.not. plot_shaking_map) then
- print *,'enter last time step of movie (e.g. ',NSTEP,')'
- read(5,*) it2
- print *
- print *,'1 = define file names using frame number'
- print *,'2 = define file names using time step number'
- print *,'any other value = exit'
- print *
- print *,'enter value:'
- read(5,*) inumber
- if(inumber<1 .or. inumber>2) stop 'exiting...'
- print *
- print *,'looping from ',it1,' to ',it2,' every ',NTSTEP_BETWEEN_FRAMES,' time steps'
- ! count number of movie frames
- nframes = 0
- do it = it1,it2
- if(mod(it,NTSTEP_BETWEEN_FRAMES) == 0) nframes = nframes + 1
- enddo
- else
- ! only one frame if shaking map
- nframes = 1
- it1 = 1
- it2 = 1
- endif
- print *
- print *,'total number of frames will be ',nframes
- if(nframes == 0) stop 'null number of frames'
-
- iscaling_shake = 0
- if(plot_shaking_map) then
- print *
- print *,'norm to display in shaking map:'
- print *,'1=displacement 2=velocity 3=acceleration'
- print *
- read(5,*) inorm
- if(inorm < 1 .or. inorm > 3) stop 'incorrect value of inorm'
- print *
- print *,'apply non-linear scaling to shaking map:'
- print *,'1=non-linear 2=no scaling'
- print *
- read(5,*) iscaling_shake
- if(iscaling_shake < 1 .or. iscaling_shake > 2) stop 'incorrect value of iscaling_shake'
- else
- print *
- print *,'movie data:'
- print *,'1= norm of velocity 2=velocity x-comp 3=velocity y-comp 4=velocity z-comp'
- print *
- read(5,*) inorm
- if(inorm < 1 .or. inorm > 4) stop 'incorrect value of inorm'
- endif
-
-! file format flags
- if(iformat == 1) then
- USE_OPENDX = .true.
- USE_AVS = .false.
- USE_GMT = .false.
- else if(iformat == 2) then
- USE_OPENDX = .false.
- USE_AVS = .true.
- USE_GMT = .false.
- else
- USE_OPENDX = .false.
- USE_AVS = .false.
- USE_GMT = .true.
- endif
-
- ! define the total number of elements at the surface
- if(USE_HIGHRES_FOR_MOVIES) then
- nspectot_AVS_max = NSPEC_SURFACE_EXT_MESH * (NGLLX-1) * (NGLLY-1)
- else
- nspectot_AVS_max = NSPEC_SURFACE_EXT_MESH
- endif
-
- ! maximum theoretical number of points at the surface
- npointot = NGNOD2D_AVS_DX * nspectot_AVS_max
-
- ! allocate arrays for sorting routine
- allocate(iglob(npointot),loc(npointot), &
- ifseg(npointot), &
- xp(npointot),yp(npointot),zp(npointot), &
- xp_save(npointot),yp_save(npointot),zp_save(npointot), &
- field_display(npointot), &
- mask_point(npointot), &
- ireorder(npointot),stat=ier)
- if( ier /= 0 ) stop 'error allocating arrays for sorting routine'
-
- ! allocates data arrays
- allocate(store_val_x(ilocnum), &
- store_val_y(ilocnum), &
- store_val_z(ilocnum), &
- store_val_ux(ilocnum), &
- store_val_uy(ilocnum), &
- store_val_uz(ilocnum),stat=ier)
- if( ier /= 0 ) stop 'error allocating arrays for data arrays'
-
- if(USE_HIGHRES_FOR_MOVIES) then
- allocate(x(NGLLX,NGLLY), &
- y(NGLLX,NGLLY), &
- z(NGLLX,NGLLY), &
- display(NGLLX,NGLLY),stat=ier)
- if( ier /= 0 ) stop 'error allocating arrays for highres'
- endif
-
- ! user output
- print *
- print *,'there are a total of ',nspectot_AVS_max,' elements at the surface'
- print *
- print *
- if(APPLY_THRESHOLD .and. .not. plot_shaking_map) &
- print *,'Will apply a threshold to amplitude below ',100.*THRESHOLD,' %'
- if(NONLINEAR_SCALING .and. (.not. plot_shaking_map .or. iscaling_shake == 1)) &
- print *,'Will apply a non linear scaling with coef ',POWER_SCALING
-
-
- iframe = 0
-
-! loop on all the time steps in the range entered
- do it = it1,it2
-
- ! check if time step corresponds to a movie frame
- if(mod(it,NTSTEP_BETWEEN_FRAMES) == 0 .or. plot_shaking_map) then
-
- iframe = iframe + 1
-
- print *
- if(plot_shaking_map) then
- print *,'reading shaking map snapshot'
- else
- print *,'reading snapshot time step ',it,' out of ',NSTEP
- endif
- print *
-
- ! read all the elements from the same file
- if(plot_shaking_map) then
- write(outputname,"('/shakingdata')")
- else
- write(outputname,"('/moviedata',i6.6)") it
- endif
- open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(outputname),status='old', &
- action='read',form='unformatted',iostat=ier)
- if( ier /= 0 ) then
- print*,'error: ',trim(OUTPUT_FILES)//trim(outputname)
- stop 'error opening moviedata file'
- endif
-
- read(IOUT) store_val_x
- read(IOUT) store_val_y
- read(IOUT) store_val_z
- read(IOUT) store_val_ux
- read(IOUT) store_val_uy
- read(IOUT) store_val_uz
- close(IOUT)
-
- ! clear number of elements kept
- ispec = 0
-
- ! reset point number
- ipoin = 0
-
- do ispecloc = 1, NSPEC_SURFACE_EXT_MESH
-
- if(USE_HIGHRES_FOR_MOVIES) then
- ! assign the OpenDX "elements"
- do j = 1,NGLLY
- do i = 1,NGLLX
- ipoin = ipoin + 1
-
- ! x,y,z coordinates
- xcoord = store_val_x(ipoin)
- ycoord = store_val_y(ipoin)
- zcoord = store_val_z(ipoin)
-
- ! note:
- ! for shakemaps: ux = norm displacement, uy = norm velocity, uz = norm acceleration
- ! for movies: ux = velocity x-component, uy = velocity y-component, uz = velocity z-component
- vectorx = store_val_ux(ipoin)
- vectory = store_val_uy(ipoin)
- vectorz = store_val_uz(ipoin)
-
- x(i,j) = xcoord
- y(i,j) = ycoord
- z(i,j) = zcoord
-
- ! shakemap
- if(plot_shaking_map) then
- !!!! NL NL mute value near source
- if ( (sqrt(((x(i,j) - (X_SOURCE_EXT_MESH))**2 + &
- (y(i,j) - (Y_SOURCE_EXT_MESH))**2 + &
- (z(i,j) - (Z_SOURCE_EXT_MESH))**2)) < RADIUS_TO_MUTE) &
- .and. MUTE_SOURCE) then
-
- display(i,j) = 0.
- else
- ! chooses norm
- if(inorm == 1) then
- ! norm displacement
- display(i,j) = vectorx
- else if(inorm == 2) then
- ! norm velocity
- display(i,j) = vectory
- else
- ! norm acceleration
- display(i,j) = vectorz
- endif
- endif
- else
- ! movie
- if(inorm == 1) then
- ! norm of velocity
- display(i,j) = sqrt(vectorz**2+vectory**2+vectorx**2)
- else if( inorm == 2 ) then
- ! velocity x-component
- display(i,j) = vectorx
- else if( inorm == 3 ) then
- ! velocity y-component
- display(i,j) = vectory
- else if( inorm == 4 ) then
- ! velocity z-component
- display(i,j) = vectorz
- endif
- endif
-
- enddo
- enddo
-
- ! assign the values of the corners of the OpenDX "elements"
- ispec = ispec + 1
- ielm = (NGLLX-1)*(NGLLY-1)*(ispec-1)
-
- do j = 1,NGLLY-1
- do i = 1,NGLLX-1
- ieoff = NGNOD2D_AVS_DX*(ielm+(i-1)+(j-1)*(NGLLX-1))
- do ilocnum = 1,NGNOD2D_AVS_DX
- ! do k = 1,NGNOD2D_AVS_DX
-
-
- if(ilocnum == 1) then
- xp(ieoff+ilocnum) = dble(x(i,j))
- yp(ieoff+ilocnum) = dble(y(i,j))
- zp(ieoff+ilocnum) = dble(z(i,j))
- field_display(ieoff+ilocnum) = dble(display(i,j))
- elseif(ilocnum == 2) then
-
- ! accounts for different ordering of square points
- xp(ieoff+ilocnum) = dble(x(i+1,j+1))
- yp(ieoff+ilocnum) = dble(y(i+1,j+1))
- zp(ieoff+ilocnum) = dble(z(i+1,j+1))
- field_display(ieoff+ilocnum) = dble(display(i+1,j+1))
-
- ! xp(ieoff+ilocnum) = dble(x(i+1,j))
- ! yp(ieoff+ilocnum) = dble(y(i+1,j))
- ! zp(ieoff+ilocnum) = dble(z(i+1,j))
- ! field_display(ieoff+ilocnum) = dble(display(i+1,j))
-
- elseif(ilocnum == 3) then
-
- ! accounts for different ordering of square points
- xp(ieoff+ilocnum) = dble(x(i+1,j))
- yp(ieoff+ilocnum) = dble(y(i+1,j))
- zp(ieoff+ilocnum) = dble(z(i+1,j))
- field_display(ieoff+ilocnum) = dble(display(i+1,j))
-
- ! xp(ieoff+ilocnum) = dble(x(i+1,j+1))
- ! yp(ieoff+ilocnum) = dble(y(i+1,j+1))
- ! zp(ieoff+ilocnum) = dble(z(i+1,j+1))
- ! field_display(ieoff+ilocnum) = dble(display(i+1,j+1))
- else
- xp(ieoff+ilocnum) = dble(x(i,j+1))
- yp(ieoff+ilocnum) = dble(y(i,j+1))
- zp(ieoff+ilocnum) = dble(z(i,j+1))
- field_display(ieoff+ilocnum) = dble(display(i,j+1))
- endif
-
- enddo
-
- !if( j==1 .and. ispec==1) then
- !print*,'p1',xp(ieoff+1),yp(ieoff+1),zp(ieoff+1)
- !print*,'p2',xp(ieoff+2),yp(ieoff+2),zp(ieoff+2)
- !print*,'p3',xp(ieoff+3),yp(ieoff+3),zp(ieoff+3)
- !print*,'p4',xp(ieoff+4),yp(ieoff+4),zp(ieoff+4)
- !endif
-
- enddo
- enddo
-
- else
- ! low-resolution (only spectral element corners)
- ispec = ispec + 1
- ieoff = NGNOD2D_AVS_DX*(ispec-1)
-
- ! four points for each element
- do i = 1,NGNOD2D_AVS_DX
-
- ! accounts for different ordering of square points
- ilocnum = iorder(i)
-
- ipoin = ipoin + 1
-
- xcoord = store_val_x(ipoin)
- ycoord = store_val_y(ipoin)
- zcoord = store_val_z(ipoin)
-
- vectorx = store_val_ux(ipoin)
- vectory = store_val_uy(ipoin)
- vectorz = store_val_uz(ipoin)
-
-
- xp(ilocnum+ieoff) = dble(xcoord)
- yp(ilocnum+ieoff) = dble(ycoord)
- zp(ilocnum+ieoff) = dble(zcoord)
-
- ! shakemap
- if(plot_shaking_map) then
- !!!! NL NL mute value near source
- if ( (sqrt(((dble(xcoord) - (X_SOURCE_EXT_MESH))**2 + &
- (dble(ycoord) - (Y_SOURCE_EXT_MESH))**2 + &
- (dble(zcoord) - (Z_SOURCE_EXT_MESH))**2)) < RADIUS_TO_MUTE) &
- .and. MUTE_SOURCE) then
- field_display(ilocnum+ieoff) = 0.
- else
- if(inorm == 1) then
- ! norm of displacement
- field_display(ilocnum+ieoff) = dble(vectorx)
- else if(inorm == 2) then
- ! norm of velocity
- field_display(ilocnum+ieoff) = dble(vectory)
- else
- ! norm of acceleration
- field_display(ilocnum+ieoff) = dble(vectorz)
- endif
- endif
- else
- ! movie
- if(inorm == 1) then
- ! norm of velocity
- field_display(ilocnum+ieoff) = sqrt(vectorz**2+vectory**2+vectorx**2)
- else if( inorm == 2 ) then
- ! velocity x-component
- field_display(ilocnum+ieoff) = vectorx
- else if( inorm == 3 ) then
- ! velocity y-component
- field_display(ilocnum+ieoff) = vectory
- else
- ! velocity z-component
- field_display(ilocnum+ieoff) = vectorz
- endif
- ! takes norm of velocity vector
- !field_display(ilocnum+ieoff) =sqrt(vectorz**2+vectory**2+vectorx**2)
- endif
-
- enddo
- endif ! USE_HIGHRES_FOR_MOVIES
- enddo ! NSPEC_SURFACE_EXT_MESH
-
- ! copy coordinate arrays since the sorting routine does not preserve them
- xp_save(:) = xp(:)
- yp_save(:) = yp(:)
- zp_save(:) = zp(:)
-
- ! sort the list based upon coordinates to get rid of multiples
- print *,'sorting list of points'
- call get_global_AVS(nspectot_AVS_max,xp,yp,zp,iglob,loc,ifseg,nglob,npointot, &
- dble(minval(store_val_x(:))),dble(maxval(store_val_x(:))))
-
- ! print total number of points found
- print *
- print *,'found a total of ',nglob,' points'
- print *,'initial number of points (with multiples) was ',npointot
-
-
- ! normalize and scale vector field
-
- ! compute min and max of data value to normalize
- min_field_current = minval(field_display(:))
- max_field_current = maxval(field_display(:))
-
- ! print minimum and maximum amplitude in current snapshot
- print *
- print *,'minimum amplitude in current snapshot = ',min_field_current
- print *,'maximum amplitude in current snapshot = ',max_field_current
- print *
-
- if(plot_shaking_map) then
- ! compute min and max of data value to normalize
- min_field_current = minval(field_display(:))
- max_field_current = maxval(field_display(:))
- ! print minimum and maximum amplitude in current snapshot
- print *
- print *,'minimum amplitude in current snapshot after removal = ',min_field_current
- print *,'maximum amplitude in current snapshot after removal = ',max_field_current
- print *
- endif
-
- ! apply scaling in all cases for movies
- if(.not. plot_shaking_map) then
-
- ! make sure range is always symmetric and center is in zero
- ! this assumption works only for fields that can be negative
- ! would not work for norm of vector for instance
- ! (we would lose half of the color palette if no negative values)
- max_absol = max(abs(min_field_current),abs(max_field_current))
- min_field_current = - max_absol
- max_field_current = + max_absol
-
- ! normalize field to [0:1]
- if( abs(max_field_current - min_field_current) > TINYVAL ) &
- field_display(:) = (field_display(:) - min_field_current) / (max_field_current - min_field_current)
-
- ! rescale to [-1,1]
- field_display(:) = 2.*field_display(:) - 1.
-
- ! apply threshold to normalized field
- if(APPLY_THRESHOLD) &
- where(abs(field_display(:)) <= THRESHOLD) field_display = 0.
-
- ! apply non linear scaling to normalized field if needed
- if(NONLINEAR_SCALING) then
- where(field_display(:) >= 0.)
- field_display = field_display ** POWER_SCALING
- elsewhere
- field_display = - abs(field_display) ** POWER_SCALING
- endwhere
- endif
-
- ! map back to [0,1]
- field_display(:) = (field_display(:) + 1.) / 2.
-
- ! map field to [0:255] for AVS color scale
- field_display(:) = 255. * field_display(:)
-
-
- ! apply scaling only if selected for shaking map
- else if(NONLINEAR_SCALING .and. iscaling_shake == 1) then
-
- ! normalize field to [0:1]
- if( abs(max_field_current) > TINYVAL ) &
- field_display(:) = field_display(:) / max_field_current
-
- ! apply non linear scaling to normalized field
- field_display = field_display ** POWER_SCALING
-
- ! map field to [0:255] for AVS color scale
- field_display(:) = 255. * field_display(:)
-
- endif
-
- !--- ****** create AVS file using sorted list ******
-
- if(.not. plot_shaking_map) then
- if(inumber == 1) then
- ivalue = iframe
- else
- ivalue = it
- endif
- endif
-
- ! create file name and open file
- if(plot_shaking_map) then
-
- if(USE_OPENDX) then
- write(outputname,"('/DX_shaking_map.dx')")
- open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
- write(11,*) 'object 1 class array type float rank 1 shape 3 items ',nglob,' data follows'
- else if(USE_AVS) then
- write(outputname,"('/AVS_shaking_map.inp')")
- open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
- write(11,*) nglob,' ',nspectot_AVS_max,' 1 0 0'
- else if(USE_GMT) then
- write(outputname,"('/gmt_shaking_map.xyz')")
- open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
- else
- stop 'wrong output format selected'
- endif
-
- else
-
- if(USE_OPENDX) then
- write(outputname,"('/DX_movie_',i6.6,'.dx')") ivalue
- open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
- write(11,*) 'object 1 class array type float rank 1 shape 3 items ',nglob,' data follows'
- else if(USE_AVS) then
- write(outputname,"('/AVS_movie_',i6.6,'.inp')") ivalue
- open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
- write(11,*) nglob,' ',nspectot_AVS_max,' 1 0 0'
- else if(USE_GMT) then
- write(outputname,"('/gmt_movie_',i6.6,'.xyz')") ivalue
- open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
- else
- stop 'wrong output format selected'
- endif
-
- endif
-
-
- if(USE_GMT) then
-
-! output list of points
- mask_point = .false.
- do ispec=1,nspectot_AVS_max
- ieoff = NGNOD2D_AVS_DX*(ispec-1)
-! four points for each element
- do ilocnum = 1,NGNOD2D_AVS_DX
- ibool_number = iglob(ilocnum+ieoff)
- if(.not. mask_point(ibool_number)) then
- call utm_geo(long,lat,xp_save(ilocnum+ieoff),yp_save(ilocnum+ieoff), &
- UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
- write(11,*) long,lat,field_display(ilocnum+ieoff)
- endif
- mask_point(ibool_number) = .true.
- enddo
- enddo
-
- else
-
- ! output list of points
- mask_point = .false.
- ipoin = 0
- do ispec=1,nspectot_AVS_max
- ieoff = NGNOD2D_AVS_DX*(ispec-1)
- ! four points for each element
- do ilocnum = 1,NGNOD2D_AVS_DX
- ibool_number = iglob(ilocnum+ieoff)
- if(.not. mask_point(ibool_number)) then
- ipoin = ipoin + 1
- ireorder(ibool_number) = ipoin
- if(USE_OPENDX) then
- write(11,*) xp_save(ilocnum+ieoff),yp_save(ilocnum+ieoff),zp_save(ilocnum+ieoff)
- else if(USE_AVS) then
- write(11,'(i9,3f16.6)') ireorder(ibool_number),xp_save(ilocnum+ieoff), &
- yp_save(ilocnum+ieoff),zp_save(ilocnum+ieoff)
- endif
- endif
- mask_point(ibool_number) = .true.
- enddo
- enddo
-
- if(USE_OPENDX) &
- write(11,*) 'object 2 class array type int rank 1 shape 4 items ',nspectot_AVS_max,' data follows'
-
- ! output list of elements
- do ispec=1,nspectot_AVS_max
- ieoff = NGNOD2D_AVS_DX*(ispec-1)
- ! four points for each element
- ibool_number1 = iglob(ieoff + 1)
- ibool_number2 = iglob(ieoff + 2)
- ibool_number3 = iglob(ieoff + 3)
- ibool_number4 = iglob(ieoff + 4)
- if(USE_OPENDX) then
- ! point order in OpenDX is 1,4,2,3 *not* 1,2,3,4 as in AVS
- write(11,"(i10,1x,i10,1x,i10,1x,i10)") ireorder(ibool_number1)-1, &
- ireorder(ibool_number4)-1,ireorder(ibool_number2)-1,ireorder(ibool_number3)-1
- else
- write(11,"(i10,' 1 quad ',i10,1x,i10,1x,i10,1x,i10)") ispec,ireorder(ibool_number1), &
- ireorder(ibool_number4),ireorder(ibool_number2),ireorder(ibool_number3)
- endif
- enddo
-
- if(USE_OPENDX) then
- write(11,*) 'attribute "element type" string "quads"'
- write(11,*) 'attribute "ref" string "positions"'
- write(11,*) 'object 3 class array type float rank 0 items ',nglob,' data follows'
- else
- ! dummy text for labels
- write(11,*) '1 1'
- write(11,*) 'a, b'
- endif
-
- ! output data values
- mask_point = .false.
- do ispec=1,nspectot_AVS_max
- ieoff = NGNOD2D_AVS_DX*(ispec-1)
- ! four points for each element
- do ilocnum = 1,NGNOD2D_AVS_DX
- ibool_number = iglob(ilocnum+ieoff)
- if(.not. mask_point(ibool_number)) then
- if(USE_OPENDX) then
- if(plot_shaking_map) then
- write(11,*) sngl(field_display(ilocnum+ieoff))
- else
- write(11,"(f7.2)") field_display(ilocnum+ieoff)
- endif
- else
- if(plot_shaking_map) then
- write(11,*) ireorder(ibool_number),field_display(ilocnum+ieoff)
- else
- write(11,"(i10,1x,f7.2)") ireorder(ibool_number),field_display(ilocnum+ieoff)
- endif
- endif
- endif
- mask_point(ibool_number) = .true.
- enddo
- enddo
-
- ! define OpenDX field
- if(USE_OPENDX) then
- write(11,*) 'attribute "dep" string "positions"'
- write(11,*) 'object "irregular positions irregular connections" class field'
- write(11,*) 'component "positions" value 1'
- write(11,*) 'component "connections" value 2'
- write(11,*) 'component "data" value 3'
- write(11,*) 'end'
- endif
-
- ! end of test for GMT format
- endif
-
- close(11)
-
- ! end of loop and test on all the time steps for all the movie images
- endif
-enddo ! it
-
- print *
- print *,'done creating movie or shaking map'
- print *
- if(USE_OPENDX) print *,'DX files are stored in ', trim(OUTPUT_FILES), '/DX_*.dx'
- if(USE_AVS) print *,'AVS files are stored in ', trim(OUTPUT_FILES), '/AVS_*.inp'
- if(USE_GMT) print *,'GMT files are stored in ', trim(OUTPUT_FILES), '/gmt_*.xyz'
- print *
-
-
- deallocate(store_val_x)
- deallocate(store_val_y)
- deallocate(store_val_z)
- deallocate(store_val_ux)
- deallocate(store_val_uy)
- deallocate(store_val_uz)
-
- ! deallocate arrays for sorting routine
- deallocate(iglob,loc)
- deallocate(ifseg)
- deallocate(xp,yp,zp)
- deallocate(xp_save,yp_save,zp_save)
- deallocate(field_display)
- deallocate(mask_point)
- deallocate(ireorder)
-
- if(USE_HIGHRES_FOR_MOVIES) then
- deallocate(x)
- deallocate(y)
- deallocate(z)
- deallocate(display)
- endif
-
- end program create_movie_shakemap
-
-!
-!=====================================================================
-!
-
- subroutine get_global_AVS(nspec,xp,yp,zp,iglob,loc,ifseg,nglob,npointot,UTM_X_MIN,UTM_X_MAX)
-
-! this routine MUST be in double precision to avoid sensitivity
-! to roundoff errors in the coordinates of the points
-
-! leave sorting subroutines in same source file to allow for inlining
-
- implicit none
-
- include "constants.h"
-
-! geometry tolerance parameter to calculate number of independent grid points
-! small value for double precision and to avoid sensitivity to roundoff
- double precision SMALLVALTOL
-
- integer npointot
- integer iglob(npointot),loc(npointot)
- logical ifseg(npointot)
- double precision xp(npointot),yp(npointot),zp(npointot)
- integer nspec,nglob
-
- integer ispec,i,j,ier
- integer ieoff,ilocnum,nseg,ioff,iseg,ig
-
- integer, dimension(:), allocatable :: ind,ninseg,iwork
- double precision, dimension(:), allocatable :: work
-
- double precision UTM_X_MIN,UTM_X_MAX
-
-! define geometrical tolerance based upon typical size of the model
- SMALLVALTOL = 1.d-10 * dabs(UTM_X_MAX - UTM_X_MIN)
- print *, 'UTM_X_MAX', UTM_X_MAX
- print *, 'UTM_X_MIN', UTM_X_MIN
- print *, 'SMALLVALTOL', SMALLVALTOL
-
-! dynamically allocate arrays
- allocate(ind(npointot), &
- ninseg(npointot), &
- iwork(npointot), &
- work(npointot),stat=ier)
- if( ier /= 0 ) stop 'error allocating arrays ind etc.'
-
-! establish initial pointers
- do ispec=1,nspec
- ieoff=NGNOD2D_AVS_DX*(ispec-1)
- do ilocnum=1,NGNOD2D_AVS_DX
- loc(ilocnum+ieoff)=ilocnum+ieoff
- enddo
- enddo
-
- ifseg(:)=.false.
-
- nseg=1
- ifseg(1)=.true.
- ninseg(1)=npointot
-
- do j=1,NDIM
-
-! sort within each segment
- ioff=1
- do iseg=1,nseg
- if(j == 1) then
- call rank(xp(ioff),ind,ninseg(iseg))
- else if(j == 2) then
- call rank(yp(ioff),ind,ninseg(iseg))
- else
- call rank(zp(ioff),ind,ninseg(iseg))
- endif
- call swap_all(loc(ioff),xp(ioff),yp(ioff),zp(ioff),iwork,work,ind,ninseg(iseg))
- ioff=ioff+ninseg(iseg)
- enddo
-
-! check for jumps in current coordinate
-! compare the coordinates of the points within a small tolerance
- if(j == 1) then
- do i=2,npointot
- if(dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
- enddo
- else if(j == 2) then
- do i=2,npointot
- if(dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
- enddo
- else
- do i=2,npointot
- if(dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
- enddo
- endif
-
-! count up number of different segments
- nseg=0
- do i=1,npointot
- if(ifseg(i)) then
- nseg=nseg+1
- ninseg(nseg)=1
- else
- ninseg(nseg)=ninseg(nseg)+1
- endif
- enddo
- enddo
-
-! assign global node numbers (now sorted lexicographically)
- ig=0
- do i=1,npointot
- if(ifseg(i)) ig=ig+1
- iglob(loc(i))=ig
- enddo
-
- nglob=ig
-
-! deallocate arrays
- deallocate(ind)
- deallocate(ninseg)
- deallocate(iwork)
- deallocate(work)
-
- end subroutine get_global_AVS
-
-! -----------------------------------
-
-! sorting routines put in same file to allow for inlining
-
- subroutine rank(A,IND,N)
-!
-! Use Heap Sort (Numerical Recipes)
-!
- implicit none
-
- integer n
- double precision A(n)
- integer IND(n)
-
- integer i,j,l,ir,indx
- double precision q
-
- do j=1,n
- IND(j)=j
- enddo
-
- if (n == 1) return
-
- L=n/2+1
- ir=n
- 100 CONTINUE
- IF (l>1) THEN
- l=l-1
- indx=ind(l)
- q=a(indx)
- ELSE
- indx=ind(ir)
- q=a(indx)
- ind(ir)=ind(1)
- ir=ir-1
- if (ir == 1) then
- ind(1)=indx
- return
- endif
- ENDIF
- i=l
- j=l+l
- 200 CONTINUE
- IF (J <= IR) THEN
- IF (J<IR) THEN
- IF ( A(IND(j))<A(IND(j+1)) ) j=j+1
- ENDIF
- IF (q<A(IND(j))) THEN
- IND(I)=IND(J)
- I=J
- J=J+J
- ELSE
- J=IR+1
- ENDIF
- goto 200
- ENDIF
- IND(I)=INDX
- goto 100
- end subroutine rank
-
-! ------------------------------------------------------------------
-
- subroutine swap_all(IA,A,B,C,IW,W,ind,n)
-!
-! swap arrays IA, A, B and C according to addressing in array IND
-!
- implicit none
-
- integer n
-
- integer IND(n)
- integer IA(n),IW(n)
- double precision A(n),B(n),C(n),W(n)
-
- integer i
-
- IW(:) = IA(:)
- W(:) = A(:)
-
- do i=1,n
- IA(i)=IW(ind(i))
- A(i)=W(ind(i))
- enddo
-
- W(:) = B(:)
-
- do i=1,n
- B(i)=W(ind(i))
- enddo
-
- W(:) = C(:)
-
- do i=1,n
- C(i)=W(ind(i))
- enddo
-
- end subroutine swap_all
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!
+!--- create a movie of the vertical component of surface displacement or velocity
+!--- or a ShakeMap(R) (i.e. map of the maximum absolute value of the two horizontal components
+!--- of the velocity vector) in AVS, OpenDX or GMT format
+!
+
+ program create_movie_shakemap
+
+ implicit none
+
+ include "constants.h"
+ include "surface_from_mesher.h"
+
+!-------------------------------------------------------------------------------------------------
+! user parameters
+! threshold in percent of the maximum below which we cut the amplitude
+ logical, parameter :: APPLY_THRESHOLD = .false.
+ real(kind=CUSTOM_REAL), parameter :: THRESHOLD = 1._CUSTOM_REAL / 100._CUSTOM_REAL
+
+! coefficient of power law used for non linear scaling
+ logical, parameter :: NONLINEAR_SCALING = .false.
+ real(kind=CUSTOM_REAL), parameter :: POWER_SCALING = 0.13_CUSTOM_REAL
+
+!-------------------------------------------------------------------------------------------------
+
+ integer it,it1,it2,ivalue,nspectot_AVS_max,ispec
+ integer iformat,nframes,iframe,inumber,inorm,iscaling_shake
+ integer ibool_number,ibool_number1,ibool_number2,ibool_number3,ibool_number4
+
+ logical USE_OPENDX,USE_AVS,USE_GMT,plot_shaking_map
+
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: x,y,z,display
+ real(kind=CUSTOM_REAL) xcoord,ycoord,zcoord
+ real(kind=CUSTOM_REAL) vectorx,vectory,vectorz
+
+ double precision min_field_current,max_field_current,max_absol
+
+ character(len=256) outputname
+
+ integer ipoin
+
+ ! GMT
+ double precision lat,long
+
+ ! for sorting routine
+ integer npointot,ilocnum,nglob,i,j,ielm,ieoff,ispecloc
+ integer, dimension(:), allocatable :: iglob,loc,ireorder
+ logical, dimension(:), allocatable :: ifseg,mask_point
+ double precision, dimension(:), allocatable :: xp,yp,zp,xp_save,yp_save,zp_save,field_display
+
+ ! movie files stored by solver
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
+ store_val_x,store_val_y,store_val_z, &
+ store_val_ux,store_val_uy,store_val_uz
+
+ ! parameters read from parameter file
+ integer NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,UTM_PROJECTION_ZONE,SIMULATION_TYPE
+ integer NSOURCES,NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY
+ logical MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+ USE_HIGHRES_FOR_MOVIES,SUPPRESS_UTM_PROJECTION
+ integer NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO
+ double precision DT
+ double precision HDUR_MOVIE
+ logical ATTENUATION,USE_OLSEN_ATTENUATION, &
+ OCEANS,TOPOGRAPHY
+ logical ABSORBING_CONDITIONS,SAVE_FORWARD
+ logical ANISOTROPY,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
+ character(len=256) OUTPUT_FILES,LOCAL_PATH
+ integer NPROC
+ integer ier
+
+
+!--------------------------------------------
+!!!! NL NL for external meshes
+!--------------------------------------------
+ ! muting source region
+ real(kind=CUSTOM_REAL), parameter :: RADIUS_TO_MUTE = 1000._CUSTOM_REAL
+ logical, parameter :: MUTE_SOURCE = .true.
+ real(kind=CUSTOM_REAL), parameter :: X_SOURCE_EXT_MESH = -9023.021484375
+ real(kind=CUSTOM_REAL), parameter :: Y_SOURCE_EXT_MESH = 6123.611328125
+ real(kind=CUSTOM_REAL), parameter :: Z_SOURCE_EXT_MESH = 17.96331405639648
+!--------------------------------------------
+!!!! NL NL
+
+ ! order of points representing the 2D square element
+ integer,dimension(NGNOD2D_AVS_DX),parameter :: iorder = (/1,3,2,4/)
+
+
+! ************** PROGRAM STARTS HERE **************
+
+ print *
+ print *,'Recombining all movie frames to create a movie'
+ print *
+
+ print *
+ print *,'reading parameter file'
+ print *
+
+ ! read the parameter file
+ call read_parameter_file(NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
+ UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+ ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
+ OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
+ MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+ NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
+ SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION, &
+ NTSTEP_BETWEEN_OUTPUT_INFO,SIMULATION_TYPE,SAVE_FORWARD, &
+ NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
+
+ ! get the base pathname for output files
+ call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
+
+ ! only one global array for movie data, but stored for all surfaces defined
+ ! in file 'surface_from_mesher.h'
+ if(USE_HIGHRES_FOR_MOVIES) then
+ ilocnum = NSPEC_SURFACE_EXT_MESH*NGLLSQUARE
+ else
+ ilocnum = NSPEC_SURFACE_EXT_MESH*NGNOD2D_AVS_DX
+ endif
+ print*,' moviedata element surfaces: ',NSPEC_SURFACE_EXT_MESH
+ print*,' moviedata total elements all: ',ilocnum
+ print *
+
+ if(SAVE_DISPLACEMENT) then
+ print *,'Vertical displacement will be shown in movie'
+ else
+ print *,'Vertical velocity will be shown in movie'
+ endif
+ print *
+
+
+ ! user input
+ print *,'1 = create files in OpenDX format'
+ print *,'2 = create files in AVS UCD format'
+ print *,'3 = create files in GMT xyz Ascii long/lat/Uz format'
+ print *,'any other value = exit'
+ print *
+ print *,'enter value:'
+ read(5,*) iformat
+ if(iformat < 1 .or. iformat > 3) stop 'exiting...'
+
+ plot_shaking_map = .false.
+ print *,'movie frames have been saved every ',NTSTEP_BETWEEN_FRAMES,' time steps'
+ print *
+ print *,'enter first time step of movie (e.g. 1, enter -1 for shaking map)'
+ read(5,*) it1
+ if(it1 == 0 ) it1 = 1
+ if(it1 == -1) plot_shaking_map = .true.
+ if(.not. plot_shaking_map) then
+ print *,'enter last time step of movie (e.g. ',NSTEP,')'
+ read(5,*) it2
+ print *
+ print *,'1 = define file names using frame number'
+ print *,'2 = define file names using time step number'
+ print *,'any other value = exit'
+ print *
+ print *,'enter value:'
+ read(5,*) inumber
+ if(inumber<1 .or. inumber>2) stop 'exiting...'
+ print *
+ print *,'looping from ',it1,' to ',it2,' every ',NTSTEP_BETWEEN_FRAMES,' time steps'
+ ! count number of movie frames
+ nframes = 0
+ do it = it1,it2
+ if(mod(it,NTSTEP_BETWEEN_FRAMES) == 0) nframes = nframes + 1
+ enddo
+ else
+ ! only one frame if shaking map
+ nframes = 1
+ it1 = 1
+ it2 = 1
+ endif
+ print *
+ print *,'total number of frames will be ',nframes
+ if(nframes == 0) stop 'null number of frames'
+
+ iscaling_shake = 0
+ if(plot_shaking_map) then
+ print *
+ print *,'norm to display in shaking map:'
+ print *,'1=displacement 2=velocity 3=acceleration'
+ print *
+ read(5,*) inorm
+ if(inorm < 1 .or. inorm > 3) stop 'incorrect value of inorm'
+ print *
+ print *,'apply non-linear scaling to shaking map:'
+ print *,'1=non-linear 2=no scaling'
+ print *
+ read(5,*) iscaling_shake
+ if(iscaling_shake < 1 .or. iscaling_shake > 2) stop 'incorrect value of iscaling_shake'
+ else
+ print *
+ print *,'movie data:'
+ print *,'1= norm of velocity 2=velocity x-comp 3=velocity y-comp 4=velocity z-comp'
+ print *
+ read(5,*) inorm
+ if(inorm < 1 .or. inorm > 4) stop 'incorrect value of inorm'
+ endif
+
+! file format flags
+ if(iformat == 1) then
+ USE_OPENDX = .true.
+ USE_AVS = .false.
+ USE_GMT = .false.
+ else if(iformat == 2) then
+ USE_OPENDX = .false.
+ USE_AVS = .true.
+ USE_GMT = .false.
+ else
+ USE_OPENDX = .false.
+ USE_AVS = .false.
+ USE_GMT = .true.
+ endif
+
+ ! define the total number of elements at the surface
+ if(USE_HIGHRES_FOR_MOVIES) then
+ nspectot_AVS_max = NSPEC_SURFACE_EXT_MESH * (NGLLX-1) * (NGLLY-1)
+ else
+ nspectot_AVS_max = NSPEC_SURFACE_EXT_MESH
+ endif
+
+ ! maximum theoretical number of points at the surface
+ npointot = NGNOD2D_AVS_DX * nspectot_AVS_max
+
+ ! allocate arrays for sorting routine
+ allocate(iglob(npointot),loc(npointot), &
+ ifseg(npointot), &
+ xp(npointot),yp(npointot),zp(npointot), &
+ xp_save(npointot),yp_save(npointot),zp_save(npointot), &
+ field_display(npointot), &
+ mask_point(npointot), &
+ ireorder(npointot),stat=ier)
+ if( ier /= 0 ) stop 'error allocating arrays for sorting routine'
+
+ ! allocates data arrays
+ allocate(store_val_x(ilocnum), &
+ store_val_y(ilocnum), &
+ store_val_z(ilocnum), &
+ store_val_ux(ilocnum), &
+ store_val_uy(ilocnum), &
+ store_val_uz(ilocnum),stat=ier)
+ if( ier /= 0 ) stop 'error allocating arrays for data arrays'
+
+ if(USE_HIGHRES_FOR_MOVIES) then
+ allocate(x(NGLLX,NGLLY), &
+ y(NGLLX,NGLLY), &
+ z(NGLLX,NGLLY), &
+ display(NGLLX,NGLLY),stat=ier)
+ if( ier /= 0 ) stop 'error allocating arrays for highres'
+ endif
+
+ ! user output
+ print *
+ print *,'there are a total of ',nspectot_AVS_max,' elements at the surface'
+ print *
+ print *
+ if(APPLY_THRESHOLD .and. .not. plot_shaking_map) &
+ print *,'Will apply a threshold to amplitude below ',100.*THRESHOLD,' %'
+ if(NONLINEAR_SCALING .and. (.not. plot_shaking_map .or. iscaling_shake == 1)) &
+ print *,'Will apply a non linear scaling with coef ',POWER_SCALING
+
+
+ iframe = 0
+
+! loop on all the time steps in the range entered
+ do it = it1,it2
+
+ ! check if time step corresponds to a movie frame
+ if(mod(it,NTSTEP_BETWEEN_FRAMES) == 0 .or. plot_shaking_map) then
+
+ iframe = iframe + 1
+
+ print *
+ if(plot_shaking_map) then
+ print *,'reading shaking map snapshot'
+ else
+ print *,'reading snapshot time step ',it,' out of ',NSTEP
+ endif
+ print *
+
+ ! read all the elements from the same file
+ if(plot_shaking_map) then
+ write(outputname,"('/shakingdata')")
+ else
+ write(outputname,"('/moviedata',i6.6)") it
+ endif
+ open(unit=IOUT,file=trim(OUTPUT_FILES)//trim(outputname),status='old', &
+ action='read',form='unformatted',iostat=ier)
+ if( ier /= 0 ) then
+ print*,'error: ',trim(OUTPUT_FILES)//trim(outputname)
+ stop 'error opening moviedata file'
+ endif
+
+ read(IOUT) store_val_x
+ read(IOUT) store_val_y
+ read(IOUT) store_val_z
+ read(IOUT) store_val_ux
+ read(IOUT) store_val_uy
+ read(IOUT) store_val_uz
+ close(IOUT)
+
+ ! clear number of elements kept
+ ispec = 0
+
+ ! reset point number
+ ipoin = 0
+
+ do ispecloc = 1, NSPEC_SURFACE_EXT_MESH
+
+ if(USE_HIGHRES_FOR_MOVIES) then
+ ! assign the OpenDX "elements"
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ ipoin = ipoin + 1
+
+ ! x,y,z coordinates
+ xcoord = store_val_x(ipoin)
+ ycoord = store_val_y(ipoin)
+ zcoord = store_val_z(ipoin)
+
+ ! note:
+ ! for shakemaps: ux = norm displacement, uy = norm velocity, uz = norm acceleration
+ ! for movies: ux = velocity x-component, uy = velocity y-component, uz = velocity z-component
+ vectorx = store_val_ux(ipoin)
+ vectory = store_val_uy(ipoin)
+ vectorz = store_val_uz(ipoin)
+
+ x(i,j) = xcoord
+ y(i,j) = ycoord
+ z(i,j) = zcoord
+
+ ! shakemap
+ if(plot_shaking_map) then
+ !!!! NL NL mute value near source
+ if ( (sqrt(((x(i,j) - (X_SOURCE_EXT_MESH))**2 + &
+ (y(i,j) - (Y_SOURCE_EXT_MESH))**2 + &
+ (z(i,j) - (Z_SOURCE_EXT_MESH))**2)) < RADIUS_TO_MUTE) &
+ .and. MUTE_SOURCE) then
+
+ display(i,j) = 0.
+ else
+ ! chooses norm
+ if(inorm == 1) then
+ ! norm displacement
+ display(i,j) = vectorx
+ else if(inorm == 2) then
+ ! norm velocity
+ display(i,j) = vectory
+ else
+ ! norm acceleration
+ display(i,j) = vectorz
+ endif
+ endif
+ else
+ ! movie
+ if(inorm == 1) then
+ ! norm of velocity
+ display(i,j) = sqrt(vectorz**2+vectory**2+vectorx**2)
+ else if( inorm == 2 ) then
+ ! velocity x-component
+ display(i,j) = vectorx
+ else if( inorm == 3 ) then
+ ! velocity y-component
+ display(i,j) = vectory
+ else if( inorm == 4 ) then
+ ! velocity z-component
+ display(i,j) = vectorz
+ endif
+ endif
+
+ enddo
+ enddo
+
+ ! assign the values of the corners of the OpenDX "elements"
+ ispec = ispec + 1
+ ielm = (NGLLX-1)*(NGLLY-1)*(ispec-1)
+
+ do j = 1,NGLLY-1
+ do i = 1,NGLLX-1
+ ieoff = NGNOD2D_AVS_DX*(ielm+(i-1)+(j-1)*(NGLLX-1))
+ do ilocnum = 1,NGNOD2D_AVS_DX
+ ! do k = 1,NGNOD2D_AVS_DX
+
+
+ if(ilocnum == 1) then
+ xp(ieoff+ilocnum) = dble(x(i,j))
+ yp(ieoff+ilocnum) = dble(y(i,j))
+ zp(ieoff+ilocnum) = dble(z(i,j))
+ field_display(ieoff+ilocnum) = dble(display(i,j))
+ elseif(ilocnum == 2) then
+
+ ! accounts for different ordering of square points
+ xp(ieoff+ilocnum) = dble(x(i+1,j+1))
+ yp(ieoff+ilocnum) = dble(y(i+1,j+1))
+ zp(ieoff+ilocnum) = dble(z(i+1,j+1))
+ field_display(ieoff+ilocnum) = dble(display(i+1,j+1))
+
+ ! xp(ieoff+ilocnum) = dble(x(i+1,j))
+ ! yp(ieoff+ilocnum) = dble(y(i+1,j))
+ ! zp(ieoff+ilocnum) = dble(z(i+1,j))
+ ! field_display(ieoff+ilocnum) = dble(display(i+1,j))
+
+ elseif(ilocnum == 3) then
+
+ ! accounts for different ordering of square points
+ xp(ieoff+ilocnum) = dble(x(i+1,j))
+ yp(ieoff+ilocnum) = dble(y(i+1,j))
+ zp(ieoff+ilocnum) = dble(z(i+1,j))
+ field_display(ieoff+ilocnum) = dble(display(i+1,j))
+
+ ! xp(ieoff+ilocnum) = dble(x(i+1,j+1))
+ ! yp(ieoff+ilocnum) = dble(y(i+1,j+1))
+ ! zp(ieoff+ilocnum) = dble(z(i+1,j+1))
+ ! field_display(ieoff+ilocnum) = dble(display(i+1,j+1))
+ else
+ xp(ieoff+ilocnum) = dble(x(i,j+1))
+ yp(ieoff+ilocnum) = dble(y(i,j+1))
+ zp(ieoff+ilocnum) = dble(z(i,j+1))
+ field_display(ieoff+ilocnum) = dble(display(i,j+1))
+ endif
+
+ enddo
+
+ !if( j==1 .and. ispec==1) then
+ !print*,'p1',xp(ieoff+1),yp(ieoff+1),zp(ieoff+1)
+ !print*,'p2',xp(ieoff+2),yp(ieoff+2),zp(ieoff+2)
+ !print*,'p3',xp(ieoff+3),yp(ieoff+3),zp(ieoff+3)
+ !print*,'p4',xp(ieoff+4),yp(ieoff+4),zp(ieoff+4)
+ !endif
+
+ enddo
+ enddo
+
+ else
+ ! low-resolution (only spectral element corners)
+ ispec = ispec + 1
+ ieoff = NGNOD2D_AVS_DX*(ispec-1)
+
+ ! four points for each element
+ do i = 1,NGNOD2D_AVS_DX
+
+ ! accounts for different ordering of square points
+ ilocnum = iorder(i)
+
+ ipoin = ipoin + 1
+
+ xcoord = store_val_x(ipoin)
+ ycoord = store_val_y(ipoin)
+ zcoord = store_val_z(ipoin)
+
+ vectorx = store_val_ux(ipoin)
+ vectory = store_val_uy(ipoin)
+ vectorz = store_val_uz(ipoin)
+
+
+ xp(ilocnum+ieoff) = dble(xcoord)
+ yp(ilocnum+ieoff) = dble(ycoord)
+ zp(ilocnum+ieoff) = dble(zcoord)
+
+ ! shakemap
+ if(plot_shaking_map) then
+ !!!! NL NL mute value near source
+ if ( (sqrt(((dble(xcoord) - (X_SOURCE_EXT_MESH))**2 + &
+ (dble(ycoord) - (Y_SOURCE_EXT_MESH))**2 + &
+ (dble(zcoord) - (Z_SOURCE_EXT_MESH))**2)) < RADIUS_TO_MUTE) &
+ .and. MUTE_SOURCE) then
+ field_display(ilocnum+ieoff) = 0.
+ else
+ if(inorm == 1) then
+ ! norm of displacement
+ field_display(ilocnum+ieoff) = dble(vectorx)
+ else if(inorm == 2) then
+ ! norm of velocity
+ field_display(ilocnum+ieoff) = dble(vectory)
+ else
+ ! norm of acceleration
+ field_display(ilocnum+ieoff) = dble(vectorz)
+ endif
+ endif
+ else
+ ! movie
+ if(inorm == 1) then
+ ! norm of velocity
+ field_display(ilocnum+ieoff) = sqrt(vectorz**2+vectory**2+vectorx**2)
+ else if( inorm == 2 ) then
+ ! velocity x-component
+ field_display(ilocnum+ieoff) = vectorx
+ else if( inorm == 3 ) then
+ ! velocity y-component
+ field_display(ilocnum+ieoff) = vectory
+ else
+ ! velocity z-component
+ field_display(ilocnum+ieoff) = vectorz
+ endif
+ ! takes norm of velocity vector
+ !field_display(ilocnum+ieoff) =sqrt(vectorz**2+vectory**2+vectorx**2)
+ endif
+
+ enddo
+ endif ! USE_HIGHRES_FOR_MOVIES
+ enddo ! NSPEC_SURFACE_EXT_MESH
+
+ ! copy coordinate arrays since the sorting routine does not preserve them
+ xp_save(:) = xp(:)
+ yp_save(:) = yp(:)
+ zp_save(:) = zp(:)
+
+ ! sort the list based upon coordinates to get rid of multiples
+ print *,'sorting list of points'
+ call get_global_AVS(nspectot_AVS_max,xp,yp,zp,iglob,loc,ifseg,nglob,npointot, &
+ dble(minval(store_val_x(:))),dble(maxval(store_val_x(:))))
+
+ ! print total number of points found
+ print *
+ print *,'found a total of ',nglob,' points'
+ print *,'initial number of points (with multiples) was ',npointot
+
+
+ ! normalize and scale vector field
+
+ ! compute min and max of data value to normalize
+ min_field_current = minval(field_display(:))
+ max_field_current = maxval(field_display(:))
+
+ ! print minimum and maximum amplitude in current snapshot
+ print *
+ print *,'minimum amplitude in current snapshot = ',min_field_current
+ print *,'maximum amplitude in current snapshot = ',max_field_current
+ print *
+
+ if(plot_shaking_map) then
+ ! compute min and max of data value to normalize
+ min_field_current = minval(field_display(:))
+ max_field_current = maxval(field_display(:))
+ ! print minimum and maximum amplitude in current snapshot
+ print *
+ print *,'minimum amplitude in current snapshot after removal = ',min_field_current
+ print *,'maximum amplitude in current snapshot after removal = ',max_field_current
+ print *
+ endif
+
+ ! apply scaling in all cases for movies
+ if(.not. plot_shaking_map) then
+
+ ! make sure range is always symmetric and center is in zero
+ ! this assumption works only for fields that can be negative
+ ! would not work for norm of vector for instance
+ ! (we would lose half of the color palette if no negative values)
+ max_absol = max(abs(min_field_current),abs(max_field_current))
+ min_field_current = - max_absol
+ max_field_current = + max_absol
+
+ ! normalize field to [0:1]
+ if( abs(max_field_current - min_field_current) > TINYVAL ) &
+ field_display(:) = (field_display(:) - min_field_current) / (max_field_current - min_field_current)
+
+ ! rescale to [-1,1]
+ field_display(:) = 2.*field_display(:) - 1.
+
+ ! apply threshold to normalized field
+ if(APPLY_THRESHOLD) &
+ where(abs(field_display(:)) <= THRESHOLD) field_display = 0.
+
+ ! apply non linear scaling to normalized field if needed
+ if(NONLINEAR_SCALING) then
+ where(field_display(:) >= 0.)
+ field_display = field_display ** POWER_SCALING
+ elsewhere
+ field_display = - abs(field_display) ** POWER_SCALING
+ endwhere
+ endif
+
+ ! map back to [0,1]
+ field_display(:) = (field_display(:) + 1.) / 2.
+
+ ! map field to [0:255] for AVS color scale
+ field_display(:) = 255. * field_display(:)
+
+
+ ! apply scaling only if selected for shaking map
+ else if(NONLINEAR_SCALING .and. iscaling_shake == 1) then
+
+ ! normalize field to [0:1]
+ if( abs(max_field_current) > TINYVAL ) &
+ field_display(:) = field_display(:) / max_field_current
+
+ ! apply non linear scaling to normalized field
+ field_display = field_display ** POWER_SCALING
+
+ ! map field to [0:255] for AVS color scale
+ field_display(:) = 255. * field_display(:)
+
+ endif
+
+ !--- ****** create AVS file using sorted list ******
+
+ if(.not. plot_shaking_map) then
+ if(inumber == 1) then
+ ivalue = iframe
+ else
+ ivalue = it
+ endif
+ endif
+
+ ! create file name and open file
+ if(plot_shaking_map) then
+
+ if(USE_OPENDX) then
+ write(outputname,"('/DX_shaking_map.dx')")
+ open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+ write(11,*) 'object 1 class array type float rank 1 shape 3 items ',nglob,' data follows'
+ else if(USE_AVS) then
+ write(outputname,"('/AVS_shaking_map.inp')")
+ open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+ write(11,*) nglob,' ',nspectot_AVS_max,' 1 0 0'
+ else if(USE_GMT) then
+ write(outputname,"('/gmt_shaking_map.xyz')")
+ open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+ else
+ stop 'wrong output format selected'
+ endif
+
+ else
+
+ if(USE_OPENDX) then
+ write(outputname,"('/DX_movie_',i6.6,'.dx')") ivalue
+ open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+ write(11,*) 'object 1 class array type float rank 1 shape 3 items ',nglob,' data follows'
+ else if(USE_AVS) then
+ write(outputname,"('/AVS_movie_',i6.6,'.inp')") ivalue
+ open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+ write(11,*) nglob,' ',nspectot_AVS_max,' 1 0 0'
+ else if(USE_GMT) then
+ write(outputname,"('/gmt_movie_',i6.6,'.xyz')") ivalue
+ open(unit=11,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+ else
+ stop 'wrong output format selected'
+ endif
+
+ endif
+
+
+ if(USE_GMT) then
+
+! output list of points
+ mask_point = .false.
+ do ispec=1,nspectot_AVS_max
+ ieoff = NGNOD2D_AVS_DX*(ispec-1)
+! four points for each element
+ do ilocnum = 1,NGNOD2D_AVS_DX
+ ibool_number = iglob(ilocnum+ieoff)
+ if(.not. mask_point(ibool_number)) then
+ call utm_geo(long,lat,xp_save(ilocnum+ieoff),yp_save(ilocnum+ieoff), &
+ UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
+ write(11,*) long,lat,field_display(ilocnum+ieoff)
+ endif
+ mask_point(ibool_number) = .true.
+ enddo
+ enddo
+
+ else
+
+ ! output list of points
+ mask_point = .false.
+ ipoin = 0
+ do ispec=1,nspectot_AVS_max
+ ieoff = NGNOD2D_AVS_DX*(ispec-1)
+ ! four points for each element
+ do ilocnum = 1,NGNOD2D_AVS_DX
+ ibool_number = iglob(ilocnum+ieoff)
+ if(.not. mask_point(ibool_number)) then
+ ipoin = ipoin + 1
+ ireorder(ibool_number) = ipoin
+ if(USE_OPENDX) then
+ write(11,*) xp_save(ilocnum+ieoff),yp_save(ilocnum+ieoff),zp_save(ilocnum+ieoff)
+ else if(USE_AVS) then
+ write(11,'(i9,3f16.6)') ireorder(ibool_number),xp_save(ilocnum+ieoff), &
+ yp_save(ilocnum+ieoff),zp_save(ilocnum+ieoff)
+ endif
+ endif
+ mask_point(ibool_number) = .true.
+ enddo
+ enddo
+
+ if(USE_OPENDX) &
+ write(11,*) 'object 2 class array type int rank 1 shape 4 items ',nspectot_AVS_max,' data follows'
+
+ ! output list of elements
+ do ispec=1,nspectot_AVS_max
+ ieoff = NGNOD2D_AVS_DX*(ispec-1)
+ ! four points for each element
+ ibool_number1 = iglob(ieoff + 1)
+ ibool_number2 = iglob(ieoff + 2)
+ ibool_number3 = iglob(ieoff + 3)
+ ibool_number4 = iglob(ieoff + 4)
+ if(USE_OPENDX) then
+ ! point order in OpenDX is 1,4,2,3 *not* 1,2,3,4 as in AVS
+ write(11,"(i10,1x,i10,1x,i10,1x,i10)") ireorder(ibool_number1)-1, &
+ ireorder(ibool_number4)-1,ireorder(ibool_number2)-1,ireorder(ibool_number3)-1
+ else
+ write(11,"(i10,' 1 quad ',i10,1x,i10,1x,i10,1x,i10)") ispec,ireorder(ibool_number1), &
+ ireorder(ibool_number4),ireorder(ibool_number2),ireorder(ibool_number3)
+ endif
+ enddo
+
+ if(USE_OPENDX) then
+ write(11,*) 'attribute "element type" string "quads"'
+ write(11,*) 'attribute "ref" string "positions"'
+ write(11,*) 'object 3 class array type float rank 0 items ',nglob,' data follows'
+ else
+ ! dummy text for labels
+ write(11,*) '1 1'
+ write(11,*) 'a, b'
+ endif
+
+ ! output data values
+ mask_point = .false.
+ do ispec=1,nspectot_AVS_max
+ ieoff = NGNOD2D_AVS_DX*(ispec-1)
+ ! four points for each element
+ do ilocnum = 1,NGNOD2D_AVS_DX
+ ibool_number = iglob(ilocnum+ieoff)
+ if(.not. mask_point(ibool_number)) then
+ if(USE_OPENDX) then
+ if(plot_shaking_map) then
+ write(11,*) sngl(field_display(ilocnum+ieoff))
+ else
+ write(11,"(f7.2)") field_display(ilocnum+ieoff)
+ endif
+ else
+ if(plot_shaking_map) then
+ write(11,*) ireorder(ibool_number),field_display(ilocnum+ieoff)
+ else
+ write(11,"(i10,1x,f7.2)") ireorder(ibool_number),field_display(ilocnum+ieoff)
+ endif
+ endif
+ endif
+ mask_point(ibool_number) = .true.
+ enddo
+ enddo
+
+ ! define OpenDX field
+ if(USE_OPENDX) then
+ write(11,*) 'attribute "dep" string "positions"'
+ write(11,*) 'object "irregular positions irregular connections" class field'
+ write(11,*) 'component "positions" value 1'
+ write(11,*) 'component "connections" value 2'
+ write(11,*) 'component "data" value 3'
+ write(11,*) 'end'
+ endif
+
+ ! end of test for GMT format
+ endif
+
+ close(11)
+
+ ! end of loop and test on all the time steps for all the movie images
+ endif
+enddo ! it
+
+ print *
+ print *,'done creating movie or shaking map'
+ print *
+ if(USE_OPENDX) print *,'DX files are stored in ', trim(OUTPUT_FILES), '/DX_*.dx'
+ if(USE_AVS) print *,'AVS files are stored in ', trim(OUTPUT_FILES), '/AVS_*.inp'
+ if(USE_GMT) print *,'GMT files are stored in ', trim(OUTPUT_FILES), '/gmt_*.xyz'
+ print *
+
+
+ deallocate(store_val_x)
+ deallocate(store_val_y)
+ deallocate(store_val_z)
+ deallocate(store_val_ux)
+ deallocate(store_val_uy)
+ deallocate(store_val_uz)
+
+ ! deallocate arrays for sorting routine
+ deallocate(iglob,loc)
+ deallocate(ifseg)
+ deallocate(xp,yp,zp)
+ deallocate(xp_save,yp_save,zp_save)
+ deallocate(field_display)
+ deallocate(mask_point)
+ deallocate(ireorder)
+
+ if(USE_HIGHRES_FOR_MOVIES) then
+ deallocate(x)
+ deallocate(y)
+ deallocate(z)
+ deallocate(display)
+ endif
+
+ end program create_movie_shakemap
+
+!
+!=====================================================================
+!
+
+ subroutine get_global_AVS(nspec,xp,yp,zp,iglob,loc,ifseg,nglob,npointot,UTM_X_MIN,UTM_X_MAX)
+
+! this routine MUST be in double precision to avoid sensitivity
+! to roundoff errors in the coordinates of the points
+
+! leave sorting subroutines in same source file to allow for inlining
+
+ implicit none
+
+ include "constants.h"
+
+! geometry tolerance parameter to calculate number of independent grid points
+! small value for double precision and to avoid sensitivity to roundoff
+ double precision SMALLVALTOL
+
+ integer npointot
+ integer iglob(npointot),loc(npointot)
+ logical ifseg(npointot)
+ double precision xp(npointot),yp(npointot),zp(npointot)
+ integer nspec,nglob
+
+ integer ispec,i,j,ier
+ integer ieoff,ilocnum,nseg,ioff,iseg,ig
+
+ integer, dimension(:), allocatable :: ind,ninseg,iwork
+ double precision, dimension(:), allocatable :: work
+
+ double precision UTM_X_MIN,UTM_X_MAX
+
+! define geometrical tolerance based upon typical size of the model
+ SMALLVALTOL = 1.d-10 * dabs(UTM_X_MAX - UTM_X_MIN)
+ print *, 'UTM_X_MAX', UTM_X_MAX
+ print *, 'UTM_X_MIN', UTM_X_MIN
+ print *, 'SMALLVALTOL', SMALLVALTOL
+
+! dynamically allocate arrays
+ allocate(ind(npointot), &
+ ninseg(npointot), &
+ iwork(npointot), &
+ work(npointot),stat=ier)
+ if( ier /= 0 ) stop 'error allocating arrays ind etc.'
+
+! establish initial pointers
+ do ispec=1,nspec
+ ieoff=NGNOD2D_AVS_DX*(ispec-1)
+ do ilocnum=1,NGNOD2D_AVS_DX
+ loc(ilocnum+ieoff)=ilocnum+ieoff
+ enddo
+ enddo
+
+ ifseg(:)=.false.
+
+ nseg=1
+ ifseg(1)=.true.
+ ninseg(1)=npointot
+
+ do j=1,NDIM
+
+! sort within each segment
+ ioff=1
+ do iseg=1,nseg
+ if(j == 1) then
+ call rank(xp(ioff),ind,ninseg(iseg))
+ else if(j == 2) then
+ call rank(yp(ioff),ind,ninseg(iseg))
+ else
+ call rank(zp(ioff),ind,ninseg(iseg))
+ endif
+ call swap_all(loc(ioff),xp(ioff),yp(ioff),zp(ioff),iwork,work,ind,ninseg(iseg))
+ ioff=ioff+ninseg(iseg)
+ enddo
+
+! check for jumps in current coordinate
+! compare the coordinates of the points within a small tolerance
+ if(j == 1) then
+ do i=2,npointot
+ if(dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+ enddo
+ else if(j == 2) then
+ do i=2,npointot
+ if(dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+ enddo
+ else
+ do i=2,npointot
+ if(dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+ enddo
+ endif
+
+! count up number of different segments
+ nseg=0
+ do i=1,npointot
+ if(ifseg(i)) then
+ nseg=nseg+1
+ ninseg(nseg)=1
+ else
+ ninseg(nseg)=ninseg(nseg)+1
+ endif
+ enddo
+ enddo
+
+! assign global node numbers (now sorted lexicographically)
+ ig=0
+ do i=1,npointot
+ if(ifseg(i)) ig=ig+1
+ iglob(loc(i))=ig
+ enddo
+
+ nglob=ig
+
+! deallocate arrays
+ deallocate(ind)
+ deallocate(ninseg)
+ deallocate(iwork)
+ deallocate(work)
+
+ end subroutine get_global_AVS
+
+! -----------------------------------
+
+! sorting routines put in same file to allow for inlining
+
+ subroutine rank(A,IND,N)
+!
+! Use Heap Sort (Numerical Recipes)
+!
+ implicit none
+
+ integer n
+ double precision A(n)
+ integer IND(n)
+
+ integer i,j,l,ir,indx
+ double precision q
+
+ do j=1,n
+ IND(j)=j
+ enddo
+
+ if (n == 1) return
+
+ L=n/2+1
+ ir=n
+ 100 CONTINUE
+ IF (l>1) THEN
+ l=l-1
+ indx=ind(l)
+ q=a(indx)
+ ELSE
+ indx=ind(ir)
+ q=a(indx)
+ ind(ir)=ind(1)
+ ir=ir-1
+ if (ir == 1) then
+ ind(1)=indx
+ return
+ endif
+ ENDIF
+ i=l
+ j=l+l
+ 200 CONTINUE
+ IF (J <= IR) THEN
+ IF (J<IR) THEN
+ IF ( A(IND(j))<A(IND(j+1)) ) j=j+1
+ ENDIF
+ IF (q<A(IND(j))) THEN
+ IND(I)=IND(J)
+ I=J
+ J=J+J
+ ELSE
+ J=IR+1
+ ENDIF
+ goto 200
+ ENDIF
+ IND(I)=INDX
+ goto 100
+ end subroutine rank
+
+! ------------------------------------------------------------------
+
+ subroutine swap_all(IA,A,B,C,IW,W,ind,n)
+!
+! swap arrays IA, A, B and C according to addressing in array IND
+!
+ implicit none
+
+ integer n
+
+ integer IND(n)
+ integer IA(n),IW(n)
+ double precision A(n),B(n),C(n),W(n)
+
+ integer i
+
+ IW(:) = IA(:)
+ W(:) = A(:)
+
+ do i=1,n
+ IA(i)=IW(ind(i))
+ A(i)=W(ind(i))
+ enddo
+
+ W(:) = B(:)
+
+ do i=1,n
+ B(i)=W(ind(i))
+ enddo
+
+ W(:) = C(:)
+
+ do i=1,n
+ C(i)=W(ind(i))
+ enddo
+
+ end subroutine swap_all
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_name_database.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_name_database.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_name_database.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,48 +1,48 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine create_name_database(prname,iproc,LOCAL_PATH)
-
-! create the name of the database for the mesher and the solver
-
- implicit none
-
- integer iproc
-
-! name of the database file
- character(len=256) prname,procname,LOCAL_PATH,clean_LOCAL_PATH
-
-! create the name for the database of the current slide and region
- write(procname,"('/proc',i6.6,'_')") iproc
-
-! suppress white spaces if any
- clean_LOCAL_PATH = adjustl(LOCAL_PATH)
-
-! create full name with path
- prname = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
-
- end subroutine create_name_database
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine create_name_database(prname,iproc,LOCAL_PATH)
+
+! create the name of the database for the mesher and the solver
+
+ implicit none
+
+ integer iproc
+
+! name of the database file
+ character(len=256) prname,procname,LOCAL_PATH,clean_LOCAL_PATH
+
+! create the name for the database of the current slide and region
+ write(procname,"('/proc',i6.6,'_')") iproc
+
+! suppress white spaces if any
+ clean_LOCAL_PATH = adjustl(LOCAL_PATH)
+
+! create full name with path
+ prname = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
+
+ end subroutine create_name_database
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_serial_name_database.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_serial_name_database.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_serial_name_database.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,88 +1,88 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine create_serial_name_database(prname,iproc,LOCAL_PATH,NPROC,OUTPUT_FILES)
-
-! create name of the database for serial codes (AVS_DX and codes to check buffers)
-
- implicit none
-
- include "constants.h"
-
- integer iproc,NPROC
-
-! name of the database file
- character(len=256) prname,procname,LOCAL_PATH,clean_LOCAL_PATH,serial_prefix,OUTPUT_FILES
-
- integer iprocloop,nproc_max_loop
- integer, dimension(:), allocatable :: num_active_proc
- integer ier
- nproc_max_loop = NPROC-1
-
-! create the name for the database of the current slide and region
- write(procname,"('/proc',i6.6,'_')") iproc
-
-! on a Beowulf-type machine, path on frontend can be different from local paths
- if(.not. LOCAL_PATH_IS_ALSO_GLOBAL) then
-
-! allocate array for active processors
- allocate(num_active_proc(0:nproc_max_loop),stat=ier)
- if( ier /= 0 ) stop 'error allocating array num_active_proc'
-
-! read filtered file with name of active machines
- open(unit=48,file=trim(OUTPUT_FILES)//'/filtered_machines.txt',status='old',action='read')
- do iprocloop = 0,nproc_max_loop
- read(48,*) num_active_proc(iprocloop)
- enddo
- close(48)
-
-! create the serial prefix pointing to the correct machine
- write(serial_prefix,"('/auto/scratch_n',i6.6,'/')") num_active_proc(iproc)
-
-! suppress everything until the last "/" to define the base name of local path
-! this is system dependent since it assumes the disks are mounted
-! as on our Beowulf (Unix and NFS)
- clean_LOCAL_PATH = LOCAL_PATH(index(LOCAL_PATH,'/',.true.)+1:len_trim(LOCAL_PATH))
-
-! create full name with path
- prname = serial_prefix(1:len_trim(serial_prefix)) // clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
-
-! deallocate array
- deallocate(num_active_proc)
-
-! on shared-memory machines, global path is the same as local path
- else
-
-! suppress white spaces if any
- clean_LOCAL_PATH = adjustl(LOCAL_PATH)
-
-! create full name with path
- prname = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
-
- endif
-
- end subroutine create_serial_name_database
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine create_serial_name_database(prname,iproc,LOCAL_PATH,NPROC,OUTPUT_FILES)
+
+! create name of the database for serial codes (AVS_DX and codes to check buffers)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iproc,NPROC
+
+! name of the database file
+ character(len=256) prname,procname,LOCAL_PATH,clean_LOCAL_PATH,serial_prefix,OUTPUT_FILES
+
+ integer iprocloop,nproc_max_loop
+ integer, dimension(:), allocatable :: num_active_proc
+ integer ier
+ nproc_max_loop = NPROC-1
+
+! create the name for the database of the current slide and region
+ write(procname,"('/proc',i6.6,'_')") iproc
+
+! on a Beowulf-type machine, path on frontend can be different from local paths
+ if(.not. LOCAL_PATH_IS_ALSO_GLOBAL) then
+
+! allocate array for active processors
+ allocate(num_active_proc(0:nproc_max_loop),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array num_active_proc'
+
+! read filtered file with name of active machines
+ open(unit=48,file=trim(OUTPUT_FILES)//'/filtered_machines.txt',status='old',action='read')
+ do iprocloop = 0,nproc_max_loop
+ read(48,*) num_active_proc(iprocloop)
+ enddo
+ close(48)
+
+! create the serial prefix pointing to the correct machine
+ write(serial_prefix,"('/auto/scratch_n',i6.6,'/')") num_active_proc(iproc)
+
+! suppress everything until the last "/" to define the base name of local path
+! this is system dependent since it assumes the disks are mounted
+! as on our Beowulf (Unix and NFS)
+ clean_LOCAL_PATH = LOCAL_PATH(index(LOCAL_PATH,'/',.true.)+1:len_trim(LOCAL_PATH))
+
+! create full name with path
+ prname = serial_prefix(1:len_trim(serial_prefix)) // clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
+
+! deallocate array
+ deallocate(num_active_proc)
+
+! on shared-memory machines, global path is the same as local path
+ else
+
+! suppress white spaces if any
+ clean_LOCAL_PATH = adjustl(LOCAL_PATH)
+
+! create full name with path
+ prname = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // procname
+
+ endif
+
+ end subroutine create_serial_name_database
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/define_derivation_matrices.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/define_derivation_matrices.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/define_derivation_matrices.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,158 +1,158 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz)
-
- implicit none
-
- include "constants.h"
-
-! Gauss-Lobatto-Legendre points of integration and weights
- double precision, dimension(NGLLX) :: xigll,wxgll
- double precision, dimension(NGLLY) :: yigll,wygll
- double precision, dimension(NGLLZ) :: zigll,wzgll
-
-! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
- real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! function for calculating derivatives of Lagrange polynomials
- double precision, external :: lagrange_deriv_GLL
-
- integer i,j,k,i1,i2,j1,j2,k1,k2
-
-! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
-
-! set up coordinates of the Gauss-Lobatto-Legendre points
- call zwgljd(xigll,wxgll,NGLLX,GAUSSALPHA,GAUSSBETA)
- call zwgljd(yigll,wygll,NGLLY,GAUSSALPHA,GAUSSBETA)
- call zwgljd(zigll,wzgll,NGLLZ,GAUSSALPHA,GAUSSBETA)
-
-! if number of points is odd, the middle abscissa is exactly ZERO
- if(mod(NGLLX,2) /= 0) xigll((NGLLX-1)/2+1) = ZERO
- if(mod(NGLLY,2) /= 0) yigll((NGLLY-1)/2+1) = ZERO
- if(mod(NGLLZ,2) /= 0) zigll((NGLLZ-1)/2+1) = ZERO
-
-! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
-
-! calculate derivatives of the Lagrange polynomials
-! and precalculate some products in double precision
-! hprime(i,j) = h'_j(xigll_i) by definition of the derivation matrix
- do i1=1,NGLLX
- do i2=1,NGLLX
- hprime_xx(i2,i1) = sngl(lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX))
- hprimewgll_xx(i2,i1) = sngl(lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)*wxgll(i2))
- enddo
- enddo
-
- do j1=1,NGLLY
- do j2=1,NGLLY
- hprime_yy(j2,j1) = sngl(lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY))
- hprimewgll_yy(j2,j1) = sngl(lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)*wygll(j2))
- enddo
- enddo
-
- do k1=1,NGLLZ
- do k2=1,NGLLZ
- hprime_zz(k2,k1) = sngl(lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ))
- hprimewgll_zz(k2,k1) = sngl(lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)*wzgll(k2))
- enddo
- enddo
-
- do i=1,NGLLX
- do j=1,NGLLY
- wgllwgll_xy(i,j) = sngl(wxgll(i)*wygll(j))
- enddo
- enddo
-
- do i=1,NGLLX
- do k=1,NGLLZ
- wgllwgll_xz(i,k) = sngl(wxgll(i)*wzgll(k))
- enddo
- enddo
-
- do j=1,NGLLY
- do k=1,NGLLZ
- wgllwgll_yz(j,k) = sngl(wygll(j)*wzgll(k))
- enddo
- enddo
-
- else ! double precision version
-
-! calculate derivatives of the Lagrange polynomials
-! and precalculate some products in double precision
-! hprime(i,j) = h'_j(xigll_i) by definition of the derivation matrix
- do i1=1,NGLLX
- do i2=1,NGLLX
- hprime_xx(i2,i1) = lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)
- hprimewgll_xx(i2,i1) = lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)*wxgll(i2)
- enddo
- enddo
-
- do j1=1,NGLLY
- do j2=1,NGLLY
- hprime_yy(j2,j1) = lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)
- hprimewgll_yy(j2,j1) = lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)*wygll(j2)
- enddo
- enddo
-
- do k1=1,NGLLZ
- do k2=1,NGLLZ
- hprime_zz(k2,k1) = lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)
- hprimewgll_zz(k2,k1) = lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)*wzgll(k2)
- enddo
- enddo
-
- do i=1,NGLLX
- do j=1,NGLLY
- wgllwgll_xy(i,j) = wxgll(i)*wygll(j)
- enddo
- enddo
-
- do i=1,NGLLX
- do k=1,NGLLZ
- wgllwgll_xz(i,k) = wxgll(i)*wzgll(k)
- enddo
- enddo
-
- do j=1,NGLLY
- do k=1,NGLLZ
- wgllwgll_yz(j,k) = wygll(j)*wzgll(k)
- enddo
- enddo
-
- endif
-
- end subroutine define_derivation_matrices
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz)
+
+ implicit none
+
+ include "constants.h"
+
+! Gauss-Lobatto-Legendre points of integration and weights
+ double precision, dimension(NGLLX) :: xigll,wxgll
+ double precision, dimension(NGLLY) :: yigll,wygll
+ double precision, dimension(NGLLZ) :: zigll,wzgll
+
+! array with derivatives of Lagrange polynomials and precalculated products
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+ real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+ real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+ real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+! function for calculating derivatives of Lagrange polynomials
+ double precision, external :: lagrange_deriv_GLL
+
+ integer i,j,k,i1,i2,j1,j2,k1,k2
+
+! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+! set up coordinates of the Gauss-Lobatto-Legendre points
+ call zwgljd(xigll,wxgll,NGLLX,GAUSSALPHA,GAUSSBETA)
+ call zwgljd(yigll,wygll,NGLLY,GAUSSALPHA,GAUSSBETA)
+ call zwgljd(zigll,wzgll,NGLLZ,GAUSSALPHA,GAUSSBETA)
+
+! if number of points is odd, the middle abscissa is exactly ZERO
+ if(mod(NGLLX,2) /= 0) xigll((NGLLX-1)/2+1) = ZERO
+ if(mod(NGLLY,2) /= 0) yigll((NGLLY-1)/2+1) = ZERO
+ if(mod(NGLLZ,2) /= 0) zigll((NGLLZ-1)/2+1) = ZERO
+
+! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+
+! calculate derivatives of the Lagrange polynomials
+! and precalculate some products in double precision
+! hprime(i,j) = h'_j(xigll_i) by definition of the derivation matrix
+ do i1=1,NGLLX
+ do i2=1,NGLLX
+ hprime_xx(i2,i1) = sngl(lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX))
+ hprimewgll_xx(i2,i1) = sngl(lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)*wxgll(i2))
+ enddo
+ enddo
+
+ do j1=1,NGLLY
+ do j2=1,NGLLY
+ hprime_yy(j2,j1) = sngl(lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY))
+ hprimewgll_yy(j2,j1) = sngl(lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)*wygll(j2))
+ enddo
+ enddo
+
+ do k1=1,NGLLZ
+ do k2=1,NGLLZ
+ hprime_zz(k2,k1) = sngl(lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ))
+ hprimewgll_zz(k2,k1) = sngl(lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)*wzgll(k2))
+ enddo
+ enddo
+
+ do i=1,NGLLX
+ do j=1,NGLLY
+ wgllwgll_xy(i,j) = sngl(wxgll(i)*wygll(j))
+ enddo
+ enddo
+
+ do i=1,NGLLX
+ do k=1,NGLLZ
+ wgllwgll_xz(i,k) = sngl(wxgll(i)*wzgll(k))
+ enddo
+ enddo
+
+ do j=1,NGLLY
+ do k=1,NGLLZ
+ wgllwgll_yz(j,k) = sngl(wygll(j)*wzgll(k))
+ enddo
+ enddo
+
+ else ! double precision version
+
+! calculate derivatives of the Lagrange polynomials
+! and precalculate some products in double precision
+! hprime(i,j) = h'_j(xigll_i) by definition of the derivation matrix
+ do i1=1,NGLLX
+ do i2=1,NGLLX
+ hprime_xx(i2,i1) = lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)
+ hprimewgll_xx(i2,i1) = lagrange_deriv_GLL(i1-1,i2-1,xigll,NGLLX)*wxgll(i2)
+ enddo
+ enddo
+
+ do j1=1,NGLLY
+ do j2=1,NGLLY
+ hprime_yy(j2,j1) = lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)
+ hprimewgll_yy(j2,j1) = lagrange_deriv_GLL(j1-1,j2-1,yigll,NGLLY)*wygll(j2)
+ enddo
+ enddo
+
+ do k1=1,NGLLZ
+ do k2=1,NGLLZ
+ hprime_zz(k2,k1) = lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)
+ hprimewgll_zz(k2,k1) = lagrange_deriv_GLL(k1-1,k2-1,zigll,NGLLZ)*wzgll(k2)
+ enddo
+ enddo
+
+ do i=1,NGLLX
+ do j=1,NGLLY
+ wgllwgll_xy(i,j) = wxgll(i)*wygll(j)
+ enddo
+ enddo
+
+ do i=1,NGLLX
+ do k=1,NGLLZ
+ wgllwgll_xz(i,k) = wxgll(i)*wzgll(k)
+ enddo
+ enddo
+
+ do j=1,NGLLY
+ do k=1,NGLLZ
+ wgllwgll_yz(j,k) = wygll(j)*wzgll(k)
+ enddo
+ enddo
+
+ endif
+
+ end subroutine define_derivation_matrices
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/exit_mpi.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/exit_mpi.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/exit_mpi.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,83 +1,83 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! end the simulation and exit MPI
-
- subroutine exit_MPI(myrank,error_msg)
-
- implicit none
-
- include "constants.h"
-
-! identifier for error message file
- integer, parameter :: IERROR = 30
-
- integer myrank
- character(len=*) error_msg
-
- character(len=80) outputname
- character(len=256) OUTPUT_FILES
-
-! write error message to screen
- write(*,*) error_msg(1:len(error_msg))
- write(*,*) 'Error detected, aborting MPI... proc ',myrank
-
-! write error message to file
- call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
- write(outputname,"('/error_message',i6.6,'.txt')") myrank
- open(unit=IERROR,file=trim(OUTPUT_FILES)//outputname,status='unknown')
- write(IERROR,*) error_msg(1:len(error_msg))
- write(IERROR,*) 'Error detected, aborting MPI... proc ',myrank
- close(IERROR)
-
-! close output file
- if(myrank == 0 .and. IMAIN /= ISTANDARD_OUTPUT) close(IMAIN)
-
- call stop_all()
-
- end subroutine exit_MPI
-
-!
-!----
-!
-
-! version without rank number printed in the error message
- subroutine exit_MPI_without_rank(error_msg)
-
- implicit none
-
- include "constants.h"
-
- character(len=*) error_msg
-
-! write error message to screen
- write(*,*) error_msg(1:len(error_msg))
- write(*,*) 'Error detected, aborting MPI...'
-
- call stop_all()
-
- end subroutine exit_MPI_without_rank
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! end the simulation and exit MPI
+
+ subroutine exit_MPI(myrank,error_msg)
+
+ implicit none
+
+ include "constants.h"
+
+! identifier for error message file
+ integer, parameter :: IERROR = 30
+
+ integer myrank
+ character(len=*) error_msg
+
+ character(len=80) outputname
+ character(len=256) OUTPUT_FILES
+
+! write error message to screen
+ write(*,*) error_msg(1:len(error_msg))
+ write(*,*) 'Error detected, aborting MPI... proc ',myrank
+
+! write error message to file
+ call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
+ write(outputname,"('/error_message',i6.6,'.txt')") myrank
+ open(unit=IERROR,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+ write(IERROR,*) error_msg(1:len(error_msg))
+ write(IERROR,*) 'Error detected, aborting MPI... proc ',myrank
+ close(IERROR)
+
+! close output file
+ if(myrank == 0 .and. IMAIN /= ISTANDARD_OUTPUT) close(IMAIN)
+
+ call stop_all()
+
+ end subroutine exit_MPI
+
+!
+!----
+!
+
+! version without rank number printed in the error message
+ subroutine exit_MPI_without_rank(error_msg)
+
+ implicit none
+
+ include "constants.h"
+
+ character(len=*) error_msg
+
+! write error message to screen
+ write(*,*) error_msg(1:len(error_msg))
+ write(*,*) 'Error detected, aborting MPI...'
+
+ call stop_all()
+
+ end subroutine exit_MPI_without_rank
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_attenuation_model.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_attenuation_model.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_attenuation_model.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,1524 +1,1524 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-
- subroutine get_attenuation_model_olsen( vs_val, Q_mu )
-
-! uses scaling rule similar to Olsen et al. (2003) to determine attenuation medium
-!
-! returns: selected (sediment) Q_mu
-!
-! refers to:
-! K. B. Olsen, S. M. Day and C. R. Bradley, 2003.
-! Estimation of Q for Long-Period (>2 sec) Waves in the Los Angeles Basin
-! BSSA, 93, 2, p. 627-638
-
- implicit none
-
- include "constants.h"
-
- real(kind=CUSTOM_REAL) :: vs_val
- double precision :: Q_mu
-
- !local parameters
- integer :: int_Q_mu
-
- ! two variations of scaling rule handling
- logical,parameter :: USE_SIMPLE_OLSEN = .false.
- logical,parameter :: USE_DISCRETE_OLSEN = .true.
-
- ! uses rule Q_mu = constant * v_s
- ! v_s in m/s
- Q_mu = OLSEN_ATTENUATION_RATIO * vs_val
-
- ! uses a simple, 2-constant model mentioned in Olsen et al. (2003)
- if( USE_SIMPLE_OLSEN ) then
- ! vs (in m/s)
- if( vs_val < 2000.0_CUSTOM_REAL ) then
- Q_mu = 0.02 * vs_val
- else
- Q_mu = 0.1 * vs_val
- endif
- endif
-
- ! uses discrete values in sediment range
- if( USE_DISCRETE_OLSEN ) then
- int_Q_mu = 10 * nint(Q_mu / 10.)
-
- if(int_Q_mu < 40) int_Q_mu = 40
- if(int_Q_mu > 150) int_Q_mu = 150
-
- if(int_Q_mu == 40) then
- Q_mu = 40.0d0
- else if(int_Q_mu == 50) then
- Q_mu = 50.0d0
- else if(int_Q_mu == 60) then
- Q_mu = 60.0d0
- else if(int_Q_mu == 70) then
- Q_mu = 70.0d0
- else if(int_Q_mu == 80) then
- Q_mu = 80.0d0
- else if(int_Q_mu == 90) then
- Q_mu = 90.0d0
- else if(int_Q_mu == 100) then
- Q_mu = 100.0d0
- else if(int_Q_mu == 110) then
- Q_mu = 110.0d0
- else if(int_Q_mu == 120) then
- Q_mu = 120.0d0
- else if(int_Q_mu == 130) then
- Q_mu = 130.0d0
- else if(int_Q_mu == 140) then
- Q_mu = 140.0d0
- else if(int_Q_mu == 150) then
- Q_mu = 150.0d0
- else
- stop 'incorrect attenuation coefficient'
- endif
- endif
-
- ! limits Q_mu value range
- if( Q_mu < 1.0d0 ) Q_mu = 1.0d0
- if( Q_mu > ATTENUATION_COMP_MAXIMUM ) Q_mu = ATTENUATION_COMP_MAXIMUM
-
-
- end subroutine get_attenuation_model_olsen
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine get_attenuation_model(myrank,nspec,USE_OLSEN_ATTENUATION, &
- mustore,rho_vs,qmu_attenuation_store, &
- ispec_is_elastic,min_resolved_period,prname)
-
-! precalculates attenuation arrays and stores arrays into files
-
- implicit none
-
- include "constants.h"
-
- integer :: myrank,nspec
- logical :: USE_OLSEN_ATTENUATION
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: mustore
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rho_vs
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: qmu_attenuation_store
-
- logical, dimension(nspec) :: ispec_is_elastic
- real(kind=CUSTOM_REAL) :: min_resolved_period
- character(len=256) :: prname
-
- ! local parameters
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: one_minus_sum_beta
- real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: factor_common
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: scale_factor
- double precision, dimension(N_SLS) :: tau_sigma_dble,beta_dble
- double precision factor_scale_dble,one_minus_sum_beta_dble
- double precision :: Q_mu
- double precision :: f_c_source
- double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: tau_sigma
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: tauinv
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: beta
- real(kind=CUSTOM_REAL):: vs_val
- integer :: i,j,k,ispec,ier
- double precision :: qmin,qmax,qmin_all,qmax_all
-
- ! initializes arrays
- allocate(one_minus_sum_beta(NGLLX,NGLLY,NGLLZ,nspec), &
- factor_common(N_SLS,NGLLX,NGLLY,NGLLZ,nspec), &
- scale_factor(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocation attenuation arrays')
-
- one_minus_sum_beta(:,:,:,:) = 1._CUSTOM_REAL
- factor_common(:,:,:,:,:) = 1._CUSTOM_REAL
- scale_factor(:,:,:,:) = 1._CUSTOM_REAL
-
-
- ! gets stress relaxation times tau_sigma, i.e.
- ! precalculates tau_sigma depending on period band (constant for all Q_mu), and
- ! determines central frequency f_c_source of attenuation period band
- call get_attenuation_constants(min_resolved_period,tau_sigma_dble,&
- f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
- ! user output
- if( myrank == 0 ) then
- write(IMAIN,*)
- write(IMAIN,*) "attenuation: "
- write(IMAIN,*) " reference period (s) : ",sngl(1.0/ATTENUATION_f0_REFERENCE), &
- " frequency: ",sngl(ATTENUATION_f0_REFERENCE)
- write(IMAIN,*) " period band min/max (s): ",sngl(MIN_ATTENUATION_PERIOD),sngl(MAX_ATTENUATION_PERIOD)
- write(IMAIN,*) " central period (s) : ",sngl(1.0/f_c_source), &
- " frequency: ",sngl(f_c_source)
- endif
-
- ! determines inverse of tau_sigma
- if(CUSTOM_REAL == SIZE_REAL) then
- tau_sigma(:) = sngl(tau_sigma_dble(:))
- else
- tau_sigma(:) = tau_sigma_dble(:)
- endif
- ! precalculates the negative inverse of tau_sigma
- tauinv(:) = - 1._CUSTOM_REAL / tau_sigma(:)
-
- ! precalculates factors for shear modulus scaling according to attenuation model
- qmin = HUGEVAL
- qmax = 0.0
- do ispec = 1,nspec
-
- ! skips non elastic elements
- if( ispec_is_elastic(ispec) .eqv. .false. ) cycle
-
- ! determines attenuation factors for each GLL point
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
-
- ! shear moduli attenuation
- ! gets Q_mu value
- if(USE_OLSEN_ATTENUATION) then
- ! use scaling rule similar to Olsen et al. (2003)
- vs_val = mustore(i,j,k,ispec) / rho_vs(i,j,k,ispec)
- call get_attenuation_model_olsen( vs_val, Q_mu )
- else
- ! takes Q set in (CUBIT) mesh
- Q_mu = qmu_attenuation_store(i,j,k,ispec)
-
- ! attenuation zero
- if( Q_mu <= 1.e-5 ) cycle
-
- ! limits Q
- if( Q_mu < 1.0d0 ) Q_mu = 1.0d0
- if( Q_mu > ATTENUATION_COMP_MAXIMUM ) Q_mu = ATTENUATION_COMP_MAXIMUM
-
- endif
- ! statistics
- if( Q_mu < qmin ) qmin = Q_mu
- if( Q_mu > qmax ) qmax = Q_mu
-
- ! gets beta, on_minus_sum_beta and factor_scale
- ! based on calculation of strain relaxation times tau_eps
- call get_attenuation_factors(myrank,Q_mu,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD, &
- f_c_source,tau_sigma_dble, &
- beta_dble,one_minus_sum_beta_dble,factor_scale_dble)
-
- ! stores factor for unrelaxed parameter
- one_minus_sum_beta(i,j,k,ispec) = one_minus_sum_beta_dble
-
- ! stores factor for runge-kutta scheme
- ! using factor for modulus defect Delta M_i = - M_relaxed
- ! see e.g. Savage et al. (BSSA, 2010): eq. 11
- ! precomputes factor: 2 ( 1 - tau_eps_i / tau_sigma_i ) / tau_sigma_i
- beta(:) = beta_dble(:)
- factor_common(:,i,j,k,ispec) = 2._CUSTOM_REAL * beta(:) * tauinv(:)
-
- ! stores scale factor for mu moduli
- scale_factor(i,j,k,ispec) = factor_scale_dble
-
- enddo
- enddo
- enddo
- enddo
-
- ! stores attenuation arrays into files
- open(unit=27, file=prname(1:len_trim(prname))//'attenuation.bin', &
- status='unknown',action='write',form='unformatted',iostat=ier)
- if( ier /= 0 ) then
- print*,'error: could not open ',prname(1:len_trim(prname))//'attenuation.bin'
- call exit_mpi(myrank,'error opening attenuation.bin file')
- endif
- write(27) nspec
- write(27) one_minus_sum_beta
- write(27) factor_common
- write(27) scale_factor
- close(27)
-
- deallocate(one_minus_sum_beta,factor_common,scale_factor)
-
- ! statistics
- call min_all_dp(qmin,qmin_all)
- call max_all_dp(qmax,qmax_all)
- ! user output
- if( myrank == 0 ) then
- write(IMAIN,*) " Q_mu min/max : ",sngl(qmin_all),sngl(qmax_all)
- write(IMAIN,*)
- endif
-
- end subroutine get_attenuation_model
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
- subroutine get_attenuation_memory_values(tau_s, deltat, alphaval,betaval,gammaval)
-
-! returns: runge-kutta scheme terms alphaval, betaval and gammaval
-
- implicit none
-
- include 'constants.h'
-
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: tau_s, alphaval, betaval,gammaval
- real(kind=CUSTOM_REAL) :: deltat
-
- ! local parameter
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: tauinv
-
- ! inverse of tau_s
- tauinv(:) = - 1._CUSTOM_REAL / tau_s(:)
-
- ! runge-kutta coefficients
- ! see e.g.: Savage et al. (BSSA, 2010): eq. (11)
- alphaval(:) = 1.0 + deltat*tauinv(:) + deltat**2 * tauinv(:)**2 / 2._CUSTOM_REAL &
- + deltat**3 * tauinv(:)**3 / 6._CUSTOM_REAL &
- + deltat**4 * tauinv(:)**4 / 24._CUSTOM_REAL
- betaval(:) = deltat / 2._CUSTOM_REAL + deltat**2 * tauinv(:) / 3._CUSTOM_REAL &
- + deltat**3 * tauinv(:)**2 / 8._CUSTOM_REAL &
- + deltat**4 * tauinv(:)**3 / 24._CUSTOM_REAL
- gammaval(:) = deltat / 2._CUSTOM_REAL + deltat**2 * tauinv(:) / 6._CUSTOM_REAL &
- + deltat**3 * tauinv(:)**2 / 24._CUSTOM_REAL
-
- end subroutine get_attenuation_memory_values
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine get_attenuation_constants(min_resolved_period,tau_sigma, &
- f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
-! returns: period band constants tau_sigma and center frequency f_c_source
-
- implicit none
-
- include "constants.h"
-
- real(kind=CUSTOM_REAL) :: min_resolved_period
- double precision, dimension(N_SLS) :: tau_sigma
- double precision :: f_c_source
- double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
-
- ! local parameters
- real(kind=CUSTOM_REAL) :: min_period
-
- ! determines min/max periods for attenuation band based on minimum resolved period of mesh
- min_period = 0.99 * min_resolved_period ! uses a small margin
- ! debug for comparison with fix values from above
- !min_resolved_period = 0.943
- call get_attenuation_periods(min_period,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
- ! sets up stress relaxation times tau_sigma,
- ! equally spaced based on number of standard linear solids and period band
- call get_attenuation_tau_sigma(tau_sigma,N_SLS,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
- ! sets up central frequency
- ! logarithmic mean of frequency interval of absorption band
- call get_attenuation_source_freq(f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
- ! debug
- !f_c_source = 0.141421d0
- !tau_sigma(1) = 7.957747154594766669788441504352d0
- !tau_sigma(2) = 1.125395395196382652969191440206d0
- !tau_sigma(3) = 0.159154943091895345608222100964d0
-
- end subroutine get_attenuation_constants
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine get_attenuation_factors(myrank,Q_mu,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD, &
- f_c_source,tau_sigma, &
- beta,one_minus_sum_beta,factor_scale)
-
-! returns: attenuation mechanisms beta,one_minus_sum_beta,factor_scale
-
-! variable frequency range
-! variable period range
-! variable central logarithmic frequency
-
-! in the future when more memory is available on computers
-! it would be more accurate to use four mechanisms instead of three
-
-
- implicit none
-
- include "constants.h"
-
- integer:: myrank
- double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
- double precision :: f_c_source,Q_mu
- double precision, dimension(N_SLS) :: tau_sigma
- double precision, dimension(N_SLS) :: beta
- double precision :: one_minus_sum_beta
- double precision :: factor_scale
- ! local parameters
- double precision, dimension(N_SLS) :: tau_eps
-
- ! determines tau_eps for Q_mu
- call get_attenuation_tau_eps(Q_mu,tau_sigma,tau_eps, &
- MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
- ! determines one_minus_sum_beta
- call get_attenuation_property_values(tau_sigma,tau_eps,beta,one_minus_sum_beta)
-
- ! determines the "scale factor"
- call get_attenuation_scale_factor(myrank,f_c_source,tau_eps,tau_sigma,Q_mu,factor_scale)
-
- end subroutine get_attenuation_factors
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
- subroutine get_attenuation_property_values(tau_s, tau_eps, beta, one_minus_sum_beta)
-
-! coefficients useful for calculation between relaxed and unrelaxed moduli
-!
-! returns: coefficients beta, one_minus_sum_beta
-
- implicit none
-
- include "constants.h"
-
- double precision,dimension(N_SLS),intent(in) :: tau_s, tau_eps
- double precision,dimension(N_SLS),intent(out) :: beta
- double precision,intent(out):: one_minus_sum_beta
-
- ! local parameters
- double precision,dimension(N_SLS) :: tauinv
- integer :: i
-
- ! inverse of stress relaxation times
- tauinv(:) = -1.0d0 / tau_s(:)
-
- ! see e.g. Komatitsch & Tromp 1999, eq. (7)
-
- ! coefficients beta
- beta(:) = 1.0d0 - tau_eps(:) / tau_s(:)
-
- ! sum of coefficients beta
- one_minus_sum_beta = 1.0d0
- do i = 1,N_SLS
- one_minus_sum_beta = one_minus_sum_beta - beta(i)
- enddo
-
- end subroutine get_attenuation_property_values
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine get_attenuation_scale_factor(myrank, f_c_source, tau_eps, tau_sigma, Q_mu, scale_factor)
-
-! returns: physical dispersion scaling factor scale_factor
-
- implicit none
-
- include "constants.h"
-
- integer :: myrank
- double precision :: scale_factor, Q_mu, f_c_source
- ! strain and stress relaxation times
- double precision, dimension(N_SLS) :: tau_eps, tau_sigma
-
- ! local parameters
- double precision w_c_source
- double precision factor_scale_mu0, factor_scale_mu
- double precision a_val, b_val
- double precision big_omega
- integer i
-
-
- !--- compute central angular frequency of source (non dimensionalized)
- w_c_source = TWO_PI * f_c_source
-
-
- !--- quantity by which to scale mu_0 to get mu
- ! this formula can be found for instance in
- ! Liu, H. P., Anderson, D. L. and Kanamori, H., Velocity dispersion due to
- ! anelasticity: implications for seismology and mantle composition,
- ! Geophys. J. R. Astron. Soc., vol. 47, pp. 41-58 (1976)
- ! and in Aki, K. and Richards, P. G., Quantitative seismology, theory and methods,
- ! W. H. Freeman, (1980), second edition, sections 5.5 and 5.5.2, eq. (5.81) p. 170
- factor_scale_mu0 = ONE + TWO * log(f_c_source / ATTENUATION_f0_REFERENCE ) / (PI * Q_mu)
-
- !--- compute a, b and Omega parameters
- ! see e.g.:
- ! Liu et al. (1976): eq. 25
- ! using complex modulus Mc = M_R / ( A - i B )
- ! or
- ! Savage et al. (BSSA, 2010): eq. (5) and (6)
- ! complex modulus: M(t) = M_1(t) + i M_2(t)
- a_val = ONE
- b_val = ZERO
- do i = 1,N_SLS
- ! real part M_1 of complex modulus
- a_val = a_val - w_c_source * w_c_source * tau_eps(i) * &
- (tau_eps(i) - tau_sigma(i)) / (1.d0 + w_c_source * w_c_source * tau_eps(i) * tau_eps(i))
- ! imaginary part M_2 of complex modulus
- b_val = b_val + w_c_source * (tau_eps(i) - tau_sigma(i)) / &
- (1.d0 + w_c_source * w_c_source * tau_eps(i) * tau_eps(i))
- enddo
-
- ! see e.g. Liu et al. (1976): Omega used in equation (20)
- big_omega = a_val * ( sqrt(1.d0 + b_val*b_val/(a_val*a_val)) - 1.d0 )
-
- !--- quantity by which to scale mu to get mu_relaxed
- factor_scale_mu = b_val * b_val / (TWO * big_omega)
-
- !--- total factor by which to scale mu0
- scale_factor = factor_scale_mu * factor_scale_mu0
-
- !--- check that the correction factor is close to one
- if(scale_factor < 0.7 .or. scale_factor > 1.3) then
- write(*,*) "error : in get_attenuation_scale_factor() "
- write(*,*) " scale factor: ", scale_factor, " should be between 0.7 and 1.3"
- write(*,*) " please check your reference frequency ATTENUATION_f0_REFERENCE in constants.h"
- call exit_MPI(myrank,'incorrect correction factor in attenuation model')
- endif
-
- end subroutine get_attenuation_scale_factor
-
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-!compare: auto_ner.f90, GLOBE package
-
- subroutine get_attenuation_periods(min_resolved_period,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
-! determines min/max periods for attenuation based upon mininum resolved period of mesh
-
- implicit none
-
- include "constants.h"
-
- real(kind=CUSTOM_REAL),intent(in) :: min_resolved_period
- double precision,intent(out) :: MIN_ATTENUATION_PERIOD, MAX_ATTENUATION_PERIOD
-
- ! local parameters
- double precision :: THETA(5)
-
- ! checks number of standard linear solids
- if(N_SLS < 2 .OR. N_SLS > 5) then
- stop 'N_SLS must be greater than 1 or less than 6'
- endif
-
- ! THETA defines the width of the Attenation Range in Decades
- ! The number defined here were determined by minimizing
- ! the "flatness" of the absoption spectrum. Each THETA
- ! is defined for a particular N_SLS (constants.h)
- ! THETA(2) is for N_SLS = 2
- THETA(1) = 0.00d0
- THETA(2) = 0.75d0
- THETA(3) = 1.75d0
- THETA(4) = 2.25d0
- THETA(5) = 2.85d0
-
- ! Compute Min Attenuation Period
- !
- ! The Minimum attenuation period = (Grid Spacing in km) / V_min
- ! Grid spacing in km = Width of an element in km * spacing for GLL point * points per wavelength
-
- MIN_ATTENUATION_PERIOD = min_resolved_period
-
-
- ! Compute Max Attenuation Period
- !
- ! The max attenuation period for 3 SLS is optimally
- ! 1.75 decades from the min attenuation period, see THETA above
- !
- ! this uses: theta = log( T_max / T_min ) to calculate T_max for a given T_min
-
- MAX_ATTENUATION_PERIOD = MIN_ATTENUATION_PERIOD * 10.0d0**THETA(N_SLS)
-
- end subroutine get_attenuation_periods
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine get_attenuation_tau_sigma(tau_s, nsls, min_period, max_period)
-
-! Determines stress relaxation times tau_sigma
-! Sets the Tau_sigma (tau_s) to be equally spaced in log10 frequency
-
- implicit none
-
- integer :: nsls
- double precision,intent(in) :: min_period, max_period
- double precision,intent(out) :: tau_s(nsls)
- ! local parameters
- double precision :: f1, f2
- double precision :: exp1, exp2
- double precision :: dexp
- integer :: i
- double precision, parameter :: PI = 3.14159265358979d0
-
- ! min/max frequencies
- f1 = 1.0d0 / max_period
- f2 = 1.0d0 / min_period
-
- ! logarithms
- exp1 = log10(f1)
- exp2 = log10(f2)
-
- ! equally spaced in log10 frequency
- dexp = (exp2-exp1) / ((nsls*1.0d0) - 1)
- do i = 1,nsls
- tau_s(i) = 1.0 / (PI * 2.0d0 * 10**(exp1 + (i - 1)* 1.0d0 *dexp))
- enddo
-
- end subroutine get_attenuation_tau_sigma
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
- subroutine get_attenuation_source_freq(f_c_source,min_period,max_period)
-
-! Determines the Source Frequency
-
- implicit none
-
- double precision,intent(out) :: f_c_source
- double precision,intent(in) :: min_period, max_period
-
- ! local parameters
- double precision f1, f2,T_c_source
-
- ! min/max frequencies
- f1 = 1.0d0 / max_period
- f2 = 1.0d0 / min_period
-
- T_c_source = 1.0e+03 * 10.0d0**(0.5 * (log10(f1) + log10(f2)))
- ! central frequency
- f_c_source = T_c_source / 1000.0d0
-
- end subroutine get_attenuation_source_freq
-
-!--------------------------------------------------------------------------------------------------
-!
-! This portion of the SPECFEM3D Code was written by:
-! Brian Savage while at
-! California Institute of Technology
-! Department of Terrestrial Magnetism / Carnegie Institute of Washington
-! Univeristy of Rhode Island
-!
-! <savage at uri.edu>.
-! <savage13 at gps.caltech.edu>
-! <savage13 at dtm.ciw.edu>
-!
-! It is based upon formulation in the following references:
-!
-! Dahlen and Tromp, 1998
-! Theoretical Global Seismology
-!
-! Liu et al. 1976
-! Velocity dispersion due to anelasticity: implications for seismology and mantle composition
-! Geophys, J. R. asts. Soc, Vol 47, pp. 41-58
-!
-! The methodology can be found in:
-! Savage, B, D. Komatitsch and J. Tromp, 2010.
-! Effects of 3D Attenuation on Seismic Wave Amplitude and Phase Measurements
-! BSSA, 100, 3, p. 1241-1251.
-!
-! modifications:
-! - minor modifications by Daniel Peter, november 2010
-!--------------------------------------------------------------------------------------------------
-
- subroutine get_attenuation_tau_eps(Qmu_in,tau_s,tau_eps,min_period,max_period)
-
-! includes min_period, max_period, and N_SLS
-!
-! returns: determines strain relaxation times tau_eps
-
- implicit none
-
- include 'constants.h'
-
-! model_attenuation_variables
-!...
-
- double precision :: Qmu_in
- double precision, dimension(N_SLS) :: tau_s, tau_eps
- double precision :: min_period,max_period
-
- ! local parameters
- integer :: rw
- ! model_attenuation_storage_var
- type model_attenuation_storage_var
- sequence
- double precision, dimension(:,:), pointer :: tau_eps_storage
- double precision, dimension(:), pointer :: Qmu_storage
- integer Q_resolution
- integer Q_max
- end type model_attenuation_storage_var
- type (model_attenuation_storage_var) AM_S
- ! attenuation_simplex_variables
- type attenuation_simplex_variables
- sequence
- double precision Q ! Q = Desired Value of Attenuation or Q
- double precision iQ ! iQ = 1/Q
- double precision, dimension(:), pointer :: f
- ! f = Frequencies at which to evaluate the solution
- double precision, dimension(:), pointer :: tau_s
- ! tau_s = Tau_sigma defined by the frequency range and
- ! number of standard linear solids
- integer nf ! nf = Number of Frequencies
- integer nsls ! nsls = Number of Standard Linear Solids
- end type attenuation_simplex_variables
- type(attenuation_simplex_variables) AS_V
-
- ! READ
- rw = 1
- call model_attenuation_storage(Qmu_in, tau_eps, rw, AM_S)
- if(rw > 0) return
-
- call attenuation_invert_by_simplex(min_period, max_period, N_SLS, Qmu_in, tau_s, tau_eps, AS_V)
-
- ! WRITE
- rw = -1
- call model_attenuation_storage(Qmu_in, tau_eps, rw, AM_S)
-
- end subroutine get_attenuation_tau_eps
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
- subroutine model_attenuation_storage(Qmu, tau_eps, rw, AM_S)
-
- implicit none
- include 'constants.h'
-
-! model_attenuation_storage_var
- type model_attenuation_storage_var
- sequence
- double precision, dimension(:,:), pointer :: tau_eps_storage
- double precision, dimension(:), pointer :: Qmu_storage
- integer Q_resolution
- integer Q_max
- end type model_attenuation_storage_var
-
- type (model_attenuation_storage_var) AM_S
-! model_attenuation_storage_var
-
- double precision Qmu, Qmu_new
- double precision, dimension(N_SLS) :: tau_eps
- integer rw
-
- integer Qtmp
- integer, save :: first_time_called = 1
- double precision, parameter :: ZERO_TOL = 1.e-5
- integer ier
-
- if(first_time_called == 1) then
- first_time_called = 0
- AM_S%Q_resolution = 10**ATTENUATION_COMP_RESOLUTION
- AM_S%Q_max = ATTENUATION_COMP_MAXIMUM
- Qtmp = AM_S%Q_resolution * AM_S%Q_max
-
- allocate(AM_S%tau_eps_storage(N_SLS, Qtmp), &
- AM_S%Qmu_storage(Qtmp),stat=ier)
- if( ier /= 0 ) stop 'error allocating arrays for attenuation storage'
- AM_S%Qmu_storage(:) = -1
- endif
-
- if(Qmu < 0.0d0 .OR. Qmu > AM_S%Q_max) then
- write(IMAIN,*) 'Error attenuation_storage()'
- write(IMAIN,*) 'Attenuation Value out of Range: ', Qmu
- write(IMAIN,*) 'Attenuation Value out of Range: Min, Max ', 0, AM_S%Q_max
- call exit_MPI(0, 'Attenuation Value out of Range')
- endif
-
- if(rw > 0 .AND. Qmu <= ZERO_TOL) then
- Qmu = 0.0d0;
- tau_eps(:) = 0.0d0;
- return
- endif
- ! Generate index for Storage Array
- ! and Recast Qmu using this index
- ! Accroding to Brian, use float
- !Qtmp = Qmu * Q_resolution
- !Qmu = Qtmp / Q_resolution;
-
- ! by default: resolution is Q_resolution = 10
- ! converts Qmu to an array integer index:
- ! e.g. Qmu = 150.31 -> Qtmp = 150.31 * 10 = int( 1503.10 ) = 1503
- Qtmp = Qmu * dble(AM_S%Q_resolution)
-
- ! rounds to corresponding double value:
- ! e.g. Qmu_new = dble( 1503 ) / dble(10) = 150.30
- ! but Qmu_new is not used any further...
- Qmu_new = dble(Qtmp) / dble(AM_S%Q_resolution)
-
- if(rw > 0) then
- ! READ
- if(AM_S%Qmu_storage(Qtmp) > 0) then
- ! READ SUCCESSFUL
- tau_eps(:) = AM_S%tau_eps_storage(:, Qtmp)
- Qmu = AM_S%Qmu_storage(Qtmp)
- rw = 1
- else
- ! READ NOT SUCCESSFUL
- rw = -1
- endif
- else
- ! WRITE SUCCESSFUL
- AM_S%tau_eps_storage(:,Qtmp) = tau_eps(:)
- AM_S%Qmu_storage(Qtmp) = Qmu
- rw = 1
- endif
-
- end subroutine model_attenuation_storage
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine attenuation_invert_by_simplex(t2, t1, n, Q_real, tau_s, tau_eps, AS_V)
-
- implicit none
-
- ! attenuation_simplex_variables
- type attenuation_simplex_variables
- sequence
- double precision Q ! Q = Desired Value of Attenuation or Q
- double precision iQ ! iQ = 1/Q
- double precision, dimension(:), pointer :: f
- ! f = Frequencies at which to evaluate the solution
- double precision, dimension(:), pointer :: tau_s
- ! tau_s = Tau_sigma defined by the frequency range and
- ! number of standard linear solids
- integer nf ! nf = Number of Frequencies
- integer nsls ! nsls = Number of Standard Linear Solids
- end type attenuation_simplex_variables
- type(attenuation_simplex_variables) AS_V
- ! attenuation_simplex_variables
-
- ! Input / Output
- double precision t1, t2
- double precision Q_real
-! double precision omega_not
- integer n
- double precision, dimension(n) :: tau_s, tau_eps
-
- ! Internal
- integer i, iterations, err,prnt
- double precision f1, f2, exp1,exp2, min_value !, dexp
- integer, parameter :: nf = 100
- double precision, dimension(nf) :: f
- double precision, parameter :: PI = 3.14159265358979d0
- double precision, external :: attenuation_eval
-
- ! Values to be passed into the simplex minimization routine
- iterations = -1
- min_value = -1.0e-4
- err = 0
- prnt = 0
-
- !allocate(f(nf))
-
- ! Determine the min and max frequencies
- f1 = 1.0d0 / t1
- f2 = 1.0d0 / t2
-
- ! Determine the exponents of the frequencies
- exp1 = log10(f1)
- exp2 = log10(f2)
-
-! if(f2 < f1 .OR. Q_real < 0.0d0 .OR. n < 1) then
-! call exit_MPI(0, 'frequencies flipped or Q less than zero or N_SLS < 0')
-! endif
-
- ! Determine the Source frequency
-! omega_not = 1.0e+03 * 10.0d0**(0.5 * (log10(f1) + log10(f2)))
-
-
- ! Determine the Frequencies at which to compare solutions
- ! The frequencies should be equally spaced in log10 frequency
- do i = 1,nf
- f(i) = exp1 + ((i-1)*1.0d0 * (exp2-exp1) / ((nf-1)*1.0d0))
- enddo
-
- ! Set the Tau_sigma (tau_s) to be equally spaced in log10 frequency
-! dexp = (exp2-exp1) / ((n*1.0d0) - 1)
-! do i = 1,n
-! tau_s(i) = 1.0 / (PI * 2.0d0 * 10**(exp1 + (i - 1)* 1.0d0 *dexp))
-! enddo
-
-
- ! Shove the paramters into the module
- call attenuation_simplex_setup(nf,n,f,Q_real,tau_s,AS_V)
-
- ! Set the Tau_epsilon (tau_eps) to an initial value at omega*tau = 1
- ! tan_delta = 1/Q = (tau_eps - tau_s)/(2 * sqrt(tau e*tau_s))
- ! if we assume tau_eps =~ tau_s
- ! we get the equation below
- do i = 1,n
- tau_eps(i) = tau_s(i) + (tau_s(i) * 2.0d0/Q_real)
- enddo
-
- ! Run a simplex search to determine the optimum values of tau_eps
- call fminsearch(attenuation_eval, tau_eps, n, iterations, min_value, prnt, err,AS_V)
- if(err > 0) then
- write(*,*)'Search did not converge for an attenuation of ', Q_real
- write(*,*)' Iterations: ', iterations
- write(*,*)' Min Value: ', min_value
- write(*,*)' Aborting program'
- call exit_MPI(0,'attenuation_simplex: Search for Strain relaxation times did not converge')
- endif
-
- !deallocate(f)
-
- call attenuation_simplex_finish(AS_V)
-
- end subroutine attenuation_invert_by_simplex
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
- subroutine attenuation_simplex_setup(nf_in,nsls_in,f_in,Q_in,tau_s_in,AS_V)
-
-! - Inserts necessary parameters into the module attenuation_simplex_variables
-! - See module for explaination
-
- implicit none
-
- ! attenuation_simplex_variables
- type attenuation_simplex_variables
- sequence
- double precision Q ! Q = Desired Value of Attenuation or Q
- double precision iQ ! iQ = 1/Q
- double precision, dimension(:), pointer :: f
- ! f = Frequencies at which to evaluate the solution
- double precision, dimension(:), pointer :: tau_s
- ! tau_s = Tau_sigma defined by the frequency range and
- ! number of standard linear solids
- integer nf ! nf = Number of Frequencies
- integer nsls ! nsls = Number of Standard Linear Solids
- end type attenuation_simplex_variables
- type(attenuation_simplex_variables) AS_V
- ! attenuation_simplex_variables
-
- integer nf_in, nsls_in
- double precision Q_in
- double precision, dimension(nf_in) :: f_in
- double precision, dimension(nsls_in) :: tau_s_in
- integer ier
-
- allocate(AS_V%f(nf_in), &
- AS_V%tau_s(nsls_in),stat=ier)
- if( ier /= 0 ) stop 'error allocating arrays for attenuation simplex'
-
- AS_V%nf = nf_in
- AS_V%nsls = nsls_in
- AS_V%f = f_in
- AS_V%Q = Q_in
- AS_V%iQ = 1.0d0/AS_V%Q
- AS_V%tau_s = tau_s_in
-
- end subroutine attenuation_simplex_setup
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
- double precision function attenuation_eval(Xin,AS_V)
-
-! - Computes the misfit from a set of relaxation paramters
-! given a set of frequencies and target attenuation
-! - Evaluates only at the given frequencies
-! - Evaluation is done with an L2 norm
-!
-! Input
-! Xin = Tau_epsilon, Strain Relaxation Time
-! Note: Tau_sigma the Stress Relaxation Time is loaded
-! with attenuation_simplex_setup and stored in
-! attenuation_simplex_variables
-!
-! Xi = Sum_i^N sqrt [ (1/Qc_i - 1/Qt_i)^2 / 1/Qt_i^2 ]
-!
-! where Qc_i is the computed attenuation at a specific frequency
-! Qt_i is the desired attenuaiton at that frequency
-!
-! Uses attenuation_simplex_variables to store constant values
-!
-! See atteunation_simplex_setup
-!
-
- implicit none
-
- ! attenuation_simplex_variables
- type attenuation_simplex_variables
- sequence
- double precision Q ! Q = Desired Value of Attenuation or Q
- double precision iQ ! iQ = 1/Q
- double precision, dimension(:), pointer :: f
- ! f = Frequencies at which to evaluate the solution
- double precision, dimension(:), pointer :: tau_s
- ! tau_s = Tau_sigma defined by the frequency range and
- ! number of standard linear solids
- integer nf ! nf = Number of Frequencies
- integer nsls ! nsls = Number of Standard Linear Solids
- end type attenuation_simplex_variables
- type(attenuation_simplex_variables) AS_V
- ! attenuation_simplex_variables
-
- ! Input
- double precision, dimension(AS_V%nsls) :: Xin
- double precision, dimension(AS_V%nsls) :: tau_eps
-
- double precision, dimension(AS_V%nf) :: A, B, tan_delta
-
- integer i
- double precision xi, iQ2
-
- tau_eps = Xin
-
- call attenuation_maxwell(AS_V%nf,AS_V%nsls,AS_V%f,AS_V%tau_s,tau_eps,B,A)
-
- tan_delta = B / A
-
- attenuation_eval = 0.0d0
- iQ2 = AS_V%iQ**2
- do i = 1,AS_V%nf
- xi = sqrt(( ( (tan_delta(i) - AS_V%iQ) ** 2 ) / iQ2 ))
- attenuation_eval = attenuation_eval + xi
- enddo
-
- end function attenuation_eval
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine attenuation_maxwell(nf,nsls,f,tau_s,tau_eps,B,A)
-
-! - Computes the Moduli (Maxwell Solid) for a series of
-! Standard Linear Solids
-! - Computes M1 and M2 parameters after Dahlen and Tromp pp.203
-! here called B and A after Liu et al. 1976
-! - Another formulation uses Kelvin-Voigt Solids and computes
-! Compliences J1 and J2 after Dahlen and Tromp pp.203
-!
-! Input
-! nf = Number of Frequencies
-! nsls = Number of Standard Linear Solids
-! f = Frequencies (in log10 of frequencies)
-! dimension(nf)
-! tau_s = Tau_sigma Stress relaxation time (see References)
-! dimension(nsls)
-! tau_eps = Tau_epislon Strain relaxation time (see References)
-! dimension(nsls)!
-! Output
-! B = Real Moduli ( M2 Dahlen and Tromp pp.203 )
-! dimension(nf)
-! A = Imaginary Moduli ( M1 Dahlen and Tromp pp.203 )
-! dimension(nf)
-!
-! Dahlen and Tromp, 1998
-! Theoretical Global Seismology
-!
-! Liu et al. 1976
-! Velocity dispersion due to anelasticity: implications for seismology and mantle composition
-! Geophys, J. R. asts. Soc, Vol 47, pp. 41-58
-
- implicit none
-
- ! Input
- integer nf, nsls
- double precision, dimension(nf) :: f
- double precision, dimension(nsls) :: tau_s, tau_eps
- ! Output
- double precision, dimension(nf) :: A,B
-
- integer i,j
- double precision w, pi, demon
-
- PI = 3.14159265358979d0
-
- A(:) = 1.0d0 - nsls*1.0d0
- B(:) = 0.0d0
- do i = 1,nf
- w = 2.0d0 * PI * 10**f(i)
- do j = 1,nsls
- ! write(*,*)j,tau_s(j),tau_eps(j)
- demon = 1.0d0 + w**2 * tau_s(j)**2
- A(i) = A(i) + ((1.0d0 + (w**2 * tau_eps(j) * tau_s(j)))/ demon)
- B(i) = B(i) + ((w * (tau_eps(j) - tau_s(j))) / demon)
- end do
- ! write(*,*)A(i),B(i),10**f(i)
- enddo
-
- end subroutine attenuation_maxwell
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
- subroutine fminsearch(funk, x, n, itercount, tolf, prnt, err, AS_V)
-
-! subroutine fminsearch
-! - Computes the minimization of funk(x(n)) using the simplex method
-! - This subroutine is copied from Matlab fminsearch.m
-! and modified to suit my nefarious needs
-! Input
-! funk = double precision function with one input parameter
-! double precision function the_funk(x)
-! x = Input/Output
-! variables to be minimized
-! dimension(n)
-! Input: Initial Value
-! Output: Mimimized Value
-! n = number of variables
-! itercount = Input/Output
-! Input: maximum number of iterations
-! if < 0 default is used (200 * n)
-! Output: total number of iterations on output
-! tolf = Input/Output
-! Input: minimium tolerance of the function funk(x)
-! Output: minimium value of funk(x)(i.e. "a" solution)
-! prnt = Input
-! 3 => report every iteration
-! 4 => report every iteration, total simplex
-! err = Output
-! 0 => Normal exeecution, converged within desired range
-! 1 => Function Evaluation exceeded limit
-! 2 => Iterations exceeded limit
-!
-! See Matlab fminsearch
-
- implicit none
-
-! attenuation_simplex_variables
- type attenuation_simplex_variables
- sequence
- double precision Q ! Q = Desired Value of Attenuation or Q
- double precision iQ ! iQ = 1/Q
- double precision, dimension(:), pointer :: f
- ! f = Frequencies at which to evaluate the solution
- double precision, dimension(:), pointer :: tau_s
- ! tau_s = Tau_sigma defined by the frequency range and
- ! number of standard linear solids
- integer nf ! nf = Number of Frequencies
- integer nsls ! nsls = Number of Standard Linear Solids
- end type attenuation_simplex_variables
-
- type(attenuation_simplex_variables) AS_V
-! attenuation_simplex_variables
-
- ! Input
- double precision, external :: funk
-
- integer n
- double precision x(n) ! Also Output
- integer itercount, prnt, err
- double precision tolf
-
- !Internal
- integer i,j, how
- integer, parameter :: none = 0
- integer, parameter :: initial = 1
- integer, parameter :: expand = 2
- integer, parameter :: reflect = 3
- integer, parameter :: contract_outside = 4
- integer, parameter :: contract_inside = 5
- integer, parameter :: shrink = 6
-
- integer maxiter, maxfun
- integer func_evals
- double precision tolx
-
- double precision rho, chi, psi, sigma
- double precision xin(n), y(n), v(n,n+1), fv(n+1)
- double precision vtmp(n,n+1)
- double precision usual_delta, zero_term_delta
- double precision xbar(n), xr(n), fxr, xe(n), fxe, xc(n), fxc, fxcc, xcc(n)
- integer place(n+1)
-
- double precision max_size_simplex, max_value
-
- rho = 1.0d0
- chi = 2.0d0
- psi = 0.5d0
- sigma = 0.5d0
-
-
- if(itercount > 0) then
- maxiter = itercount
- else
- maxiter = 200 * n
- endif
- itercount = 0
- maxfun = 200 * n
-
- if(tolf > 0.0d0) then
- tolx = 1.0e-4
- else
- tolx = 1.0e-4
- tolf = 1.0e-4
- endif
-
- err = 0
-
- xin = x
- v(:,:) = 0.0d0
- fv(:) = 0.0d0
-
- v(:,1) = xin
- x = xin
-
- fv(1) = funk(xin,AS_V)
-
- usual_delta = 0.05
- zero_term_delta = 0.00025
-
- do j = 1,n
- y = xin
- if(y(j) /= 0.0d0) then
- y(j) = (1.0d0 + usual_delta) * y(j)
- else
- y(j) = zero_term_delta
- endif
- v(:,j+1) = y
- x(:) = y
- fv(j+1) = funk(x,AS_V)
- enddo
-
- call qsort_local(fv,n+1,place)
-
- do i = 1,n+1
- vtmp(:,i) = v(:,place(i))
- enddo
- v = vtmp
-
- how = initial
- itercount = 1
- func_evals = n+1
- if(prnt == 3) then
- write(*,*)'Iterations Funk Evals Value How'
- write(*,*)itercount, func_evals, fv(1), how
- endif
- if(prnt == 4) then
- write(*,*)'How: ',how
- write(*,*)'V: ', v
- write(*,*)'fv: ',fv
- write(*,*)'evals: ',func_evals
- endif
-
- do while (func_evals < maxfun .AND. itercount < maxiter)
-
- if(max_size_simplex(v,n) <= tolx .AND. &
- max_value(fv,n+1) <= tolf) then
- goto 666
- endif
- how = none
-
- ! xbar = average of the n (NOT n+1) best points
- ! xbar = sum(v(:,1:n), 2)/n
- xbar(:) = 0.0d0
- do i = 1,n
- do j = 1,n
- xbar(i) = xbar(i) + v(i,j)
- enddo
- xbar(i) = xbar(i) / (n*1.0d0)
- enddo
- xr = (1 + rho)*xbar - rho*v(:,n+1)
- x(:) = xr
- fxr = funk(x,AS_V)
- func_evals = func_evals + 1
- if (fxr < fv(1)) then
- ! Calculate the expansion point
- xe = (1 + rho*chi)*xbar - rho*chi*v(:,n+1)
- x = xe
- fxe = funk(x,AS_V)
- func_evals = func_evals+1
- if (fxe < fxr) then
- v(:,n+1) = xe
- fv(n+1) = fxe
- how = expand
- else
- v(:,n+1) = xr
- fv(n+1) = fxr
- how = reflect
- endif
- else ! fv(:,1) <= fxr
- if (fxr < fv(n)) then
- v(:,n+1) = xr
- fv(n+1) = fxr
- how = reflect
- else ! fxr >= fv(:,n)
- ! Perform contraction
- if (fxr < fv(n+1)) then
- ! Perform an outside contraction
- xc = (1 + psi*rho)*xbar - psi*rho*v(:,n+1)
- x(:) = xc
- fxc = funk(x,AS_V)
- func_evals = func_evals+1
-
- if (fxc <= fxr) then
- v(:,n+1) = xc
- fv(n+1) = fxc
- how = contract_outside
- else
- ! perform a shrink
- how = shrink
- endif
- else
- ! Perform an inside contraction
- xcc = (1-psi)*xbar + psi*v(:,n+1)
- x(:) = xcc
- fxcc = funk(x,AS_V)
- func_evals = func_evals+1
-
- if (fxcc < fv(n+1)) then
- v(:,n+1) = xcc
- fv(n+1) = fxcc
- how = contract_inside
- else
- ! perform a shrink
- how = shrink
- endif
- endif
- if (how == shrink) then
- do j=2,n+1
- v(:,j)=v(:,1)+sigma*(v(:,j) - v(:,1))
- x(:) = v(:,j)
- fv(j) = funk(x,AS_V)
- enddo
- func_evals = func_evals + n
- endif
- endif
- endif
-
- call qsort_local(fv,n+1,place)
- do i = 1,n+1
- vtmp(:,i) = v(:,place(i))
- enddo
- v = vtmp
-
- itercount = itercount + 1
- if (prnt == 3) then
- write(*,*)itercount, func_evals, fv(1), how
- elseif (prnt == 4) then
- write(*,*)
- write(*,*)'How: ',how
- write(*,*)'v: ',v
- write(*,*)'fv: ',fv
- write(*,*)'evals: ',func_evals
- endif
- enddo
-
- if(func_evals > maxfun) then
- write(*,*)'function evaluations exceeded prescribed limit', maxfun
- err = 1
- endif
- if(itercount > maxiter) then
- write(*,*)'iterations exceeded prescribed limit', maxiter
- err = 2
- endif
-
-666 continue
- x = v(:,1)
- tolf = fv(1)
-
- end subroutine fminsearch
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- double precision function max_value(fv,n)
-
-! - Finds the maximim value of the difference of between the first
-! value and the remaining values of a vector
-! Input
-! fv = Input
-! Vector
-! dimension(n)
-! n = Input
-! Length of fv
-!
-! Returns:
-! Xi = max( || fv(1)- fv(i) || ) for i=2:n
-!
-
- implicit none
- integer n
- double precision fv(n)
-
- integer i
- double precision m, z
-
- m = 0.0d0
- do i = 2,n
- z = abs(fv(1) - fv(i))
- if(z > m) then
- m = z
- endif
- enddo
-
- max_value = m
-
- end function max_value
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- double precision function max_size_simplex(v,n)
-
-! - Determines the maximum distance between two point in a simplex
-! Input
-! v = Input
-! Simplex Verticies
-! dimension(n, n+1)
-! n = Pseudo Length of n
-!
-! Returns:
-! Xi = max( max( || v(:,1) - v(:,i) || ) ) for i=2:n+1
-!
-
- implicit none
- integer n
- double precision v(n,n+1)
-
- integer i,j
- double precision m, z
-
- m = 0.0d0
- do i = 1,n
- do j = 2,n+1
- z = abs(v(i,j) - v(i,1))
- if(z > m) then
- m = z
- endif
- enddo
- enddo
-
- max_size_simplex = m
-
- end function max_size_simplex
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
- subroutine qsort_local(X,n,I)
-
-! - Implementation of a Bubble Sort Routine
-! Input
-! X = Input/Output
-! Vector to be sorted
-! dimension(n)
-! n = Input
-! Length of X
-! I = Output
-! Sorted Indicies of vecotr X
-!
-! Example:
-! X = [ 4 3 1 2 ] on Input
-! I = [ 1 2 3 4 ] Computed Internally (in order)
-!
-! X = [ 1 2 3 4 ] on Output
-! I = [ 3 4 2 1 ] on Output
-!
-
- implicit none
-
- integer n
- double precision X(n)
- integer I(n)
-
- integer j,k
- double precision rtmp
- integer itmp
-
- do j = 1,n
- I(j) = j
- enddo
-
- do j = 1,n
- do k = 1,n-j
- if(X(k+1) < X(k)) then
- rtmp = X(k)
- X(k) = X(k+1)
- X(k+1) = rtmp
-
- itmp = I(k)
- I(k) = I(k+1)
- I(k+1) = itmp
- endif
- enddo
- enddo
-
- end subroutine qsort_local
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine attenuation_simplex_finish(AS_V)
-
- implicit none
-
- ! attenuation_simplex_variables
- type attenuation_simplex_variables
- sequence
- double precision Q ! Q = Desired Value of Attenuation or Q
- double precision iQ ! iQ = 1/Q
- double precision, dimension(:), pointer :: f
- ! f = Frequencies at which to evaluate the solution
- double precision, dimension(:), pointer :: tau_s
- ! tau_s = Tau_sigma defined by the frequency range and
- ! number of standard linear solids
- integer nf ! nf = Number of Frequencies
- integer nsls ! nsls = Number of Standard Linear Solids
- end type attenuation_simplex_variables
- type(attenuation_simplex_variables) AS_V
- ! attenuation_simplex_variables
-
- deallocate(AS_V%f)
- deallocate(AS_V%tau_s)
-
- end subroutine attenuation_simplex_finish
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+ subroutine get_attenuation_model_olsen( vs_val, Q_mu )
+
+! uses scaling rule similar to Olsen et al. (2003) to determine attenuation medium
+!
+! returns: selected (sediment) Q_mu
+!
+! refers to:
+! K. B. Olsen, S. M. Day and C. R. Bradley, 2003.
+! Estimation of Q for Long-Period (>2 sec) Waves in the Los Angeles Basin
+! BSSA, 93, 2, p. 627-638
+
+ implicit none
+
+ include "constants.h"
+
+ real(kind=CUSTOM_REAL) :: vs_val
+ double precision :: Q_mu
+
+ !local parameters
+ integer :: int_Q_mu
+
+ ! two variations of scaling rule handling
+ logical,parameter :: USE_SIMPLE_OLSEN = .false.
+ logical,parameter :: USE_DISCRETE_OLSEN = .true.
+
+ ! uses rule Q_mu = constant * v_s
+ ! v_s in m/s
+ Q_mu = OLSEN_ATTENUATION_RATIO * vs_val
+
+ ! uses a simple, 2-constant model mentioned in Olsen et al. (2003)
+ if( USE_SIMPLE_OLSEN ) then
+ ! vs (in m/s)
+ if( vs_val < 2000.0_CUSTOM_REAL ) then
+ Q_mu = 0.02 * vs_val
+ else
+ Q_mu = 0.1 * vs_val
+ endif
+ endif
+
+ ! uses discrete values in sediment range
+ if( USE_DISCRETE_OLSEN ) then
+ int_Q_mu = 10 * nint(Q_mu / 10.)
+
+ if(int_Q_mu < 40) int_Q_mu = 40
+ if(int_Q_mu > 150) int_Q_mu = 150
+
+ if(int_Q_mu == 40) then
+ Q_mu = 40.0d0
+ else if(int_Q_mu == 50) then
+ Q_mu = 50.0d0
+ else if(int_Q_mu == 60) then
+ Q_mu = 60.0d0
+ else if(int_Q_mu == 70) then
+ Q_mu = 70.0d0
+ else if(int_Q_mu == 80) then
+ Q_mu = 80.0d0
+ else if(int_Q_mu == 90) then
+ Q_mu = 90.0d0
+ else if(int_Q_mu == 100) then
+ Q_mu = 100.0d0
+ else if(int_Q_mu == 110) then
+ Q_mu = 110.0d0
+ else if(int_Q_mu == 120) then
+ Q_mu = 120.0d0
+ else if(int_Q_mu == 130) then
+ Q_mu = 130.0d0
+ else if(int_Q_mu == 140) then
+ Q_mu = 140.0d0
+ else if(int_Q_mu == 150) then
+ Q_mu = 150.0d0
+ else
+ stop 'incorrect attenuation coefficient'
+ endif
+ endif
+
+ ! limits Q_mu value range
+ if( Q_mu < 1.0d0 ) Q_mu = 1.0d0
+ if( Q_mu > ATTENUATION_COMP_MAXIMUM ) Q_mu = ATTENUATION_COMP_MAXIMUM
+
+
+ end subroutine get_attenuation_model_olsen
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine get_attenuation_model(myrank,nspec,USE_OLSEN_ATTENUATION, &
+ mustore,rho_vs,qmu_attenuation_store, &
+ ispec_is_elastic,min_resolved_period,prname)
+
+! precalculates attenuation arrays and stores arrays into files
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: myrank,nspec
+ logical :: USE_OLSEN_ATTENUATION
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: mustore
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rho_vs
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: qmu_attenuation_store
+
+ logical, dimension(nspec) :: ispec_is_elastic
+ real(kind=CUSTOM_REAL) :: min_resolved_period
+ character(len=256) :: prname
+
+ ! local parameters
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: one_minus_sum_beta
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: factor_common
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: scale_factor
+ double precision, dimension(N_SLS) :: tau_sigma_dble,beta_dble
+ double precision factor_scale_dble,one_minus_sum_beta_dble
+ double precision :: Q_mu
+ double precision :: f_c_source
+ double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: tau_sigma
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: tauinv
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: beta
+ real(kind=CUSTOM_REAL):: vs_val
+ integer :: i,j,k,ispec,ier
+ double precision :: qmin,qmax,qmin_all,qmax_all
+
+ ! initializes arrays
+ allocate(one_minus_sum_beta(NGLLX,NGLLY,NGLLZ,nspec), &
+ factor_common(N_SLS,NGLLX,NGLLY,NGLLZ,nspec), &
+ scale_factor(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocation attenuation arrays')
+
+ one_minus_sum_beta(:,:,:,:) = 1._CUSTOM_REAL
+ factor_common(:,:,:,:,:) = 1._CUSTOM_REAL
+ scale_factor(:,:,:,:) = 1._CUSTOM_REAL
+
+
+ ! gets stress relaxation times tau_sigma, i.e.
+ ! precalculates tau_sigma depending on period band (constant for all Q_mu), and
+ ! determines central frequency f_c_source of attenuation period band
+ call get_attenuation_constants(min_resolved_period,tau_sigma_dble,&
+ f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+ ! user output
+ if( myrank == 0 ) then
+ write(IMAIN,*)
+ write(IMAIN,*) "attenuation: "
+ write(IMAIN,*) " reference period (s) : ",sngl(1.0/ATTENUATION_f0_REFERENCE), &
+ " frequency: ",sngl(ATTENUATION_f0_REFERENCE)
+ write(IMAIN,*) " period band min/max (s): ",sngl(MIN_ATTENUATION_PERIOD),sngl(MAX_ATTENUATION_PERIOD)
+ write(IMAIN,*) " central period (s) : ",sngl(1.0/f_c_source), &
+ " frequency: ",sngl(f_c_source)
+ endif
+
+ ! determines inverse of tau_sigma
+ if(CUSTOM_REAL == SIZE_REAL) then
+ tau_sigma(:) = sngl(tau_sigma_dble(:))
+ else
+ tau_sigma(:) = tau_sigma_dble(:)
+ endif
+ ! precalculates the negative inverse of tau_sigma
+ tauinv(:) = - 1._CUSTOM_REAL / tau_sigma(:)
+
+ ! precalculates factors for shear modulus scaling according to attenuation model
+ qmin = HUGEVAL
+ qmax = 0.0
+ do ispec = 1,nspec
+
+ ! skips non elastic elements
+ if( ispec_is_elastic(ispec) .eqv. .false. ) cycle
+
+ ! determines attenuation factors for each GLL point
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+
+ ! shear moduli attenuation
+ ! gets Q_mu value
+ if(USE_OLSEN_ATTENUATION) then
+ ! use scaling rule similar to Olsen et al. (2003)
+ vs_val = mustore(i,j,k,ispec) / rho_vs(i,j,k,ispec)
+ call get_attenuation_model_olsen( vs_val, Q_mu )
+ else
+ ! takes Q set in (CUBIT) mesh
+ Q_mu = qmu_attenuation_store(i,j,k,ispec)
+
+ ! attenuation zero
+ if( Q_mu <= 1.e-5 ) cycle
+
+ ! limits Q
+ if( Q_mu < 1.0d0 ) Q_mu = 1.0d0
+ if( Q_mu > ATTENUATION_COMP_MAXIMUM ) Q_mu = ATTENUATION_COMP_MAXIMUM
+
+ endif
+ ! statistics
+ if( Q_mu < qmin ) qmin = Q_mu
+ if( Q_mu > qmax ) qmax = Q_mu
+
+ ! gets beta, on_minus_sum_beta and factor_scale
+ ! based on calculation of strain relaxation times tau_eps
+ call get_attenuation_factors(myrank,Q_mu,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD, &
+ f_c_source,tau_sigma_dble, &
+ beta_dble,one_minus_sum_beta_dble,factor_scale_dble)
+
+ ! stores factor for unrelaxed parameter
+ one_minus_sum_beta(i,j,k,ispec) = one_minus_sum_beta_dble
+
+ ! stores factor for runge-kutta scheme
+ ! using factor for modulus defect Delta M_i = - M_relaxed
+ ! see e.g. Savage et al. (BSSA, 2010): eq. 11
+ ! precomputes factor: 2 ( 1 - tau_eps_i / tau_sigma_i ) / tau_sigma_i
+ beta(:) = beta_dble(:)
+ factor_common(:,i,j,k,ispec) = 2._CUSTOM_REAL * beta(:) * tauinv(:)
+
+ ! stores scale factor for mu moduli
+ scale_factor(i,j,k,ispec) = factor_scale_dble
+
+ enddo
+ enddo
+ enddo
+ enddo
+
+ ! stores attenuation arrays into files
+ open(unit=27, file=prname(1:len_trim(prname))//'attenuation.bin', &
+ status='unknown',action='write',form='unformatted',iostat=ier)
+ if( ier /= 0 ) then
+ print*,'error: could not open ',prname(1:len_trim(prname))//'attenuation.bin'
+ call exit_mpi(myrank,'error opening attenuation.bin file')
+ endif
+ write(27) nspec
+ write(27) one_minus_sum_beta
+ write(27) factor_common
+ write(27) scale_factor
+ close(27)
+
+ deallocate(one_minus_sum_beta,factor_common,scale_factor)
+
+ ! statistics
+ call min_all_dp(qmin,qmin_all)
+ call max_all_dp(qmax,qmax_all)
+ ! user output
+ if( myrank == 0 ) then
+ write(IMAIN,*) " Q_mu min/max : ",sngl(qmin_all),sngl(qmax_all)
+ write(IMAIN,*)
+ endif
+
+ end subroutine get_attenuation_model
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine get_attenuation_memory_values(tau_s, deltat, alphaval,betaval,gammaval)
+
+! returns: runge-kutta scheme terms alphaval, betaval and gammaval
+
+ implicit none
+
+ include 'constants.h'
+
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: tau_s, alphaval, betaval,gammaval
+ real(kind=CUSTOM_REAL) :: deltat
+
+ ! local parameter
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: tauinv
+
+ ! inverse of tau_s
+ tauinv(:) = - 1._CUSTOM_REAL / tau_s(:)
+
+ ! runge-kutta coefficients
+ ! see e.g.: Savage et al. (BSSA, 2010): eq. (11)
+ alphaval(:) = 1.0 + deltat*tauinv(:) + deltat**2 * tauinv(:)**2 / 2._CUSTOM_REAL &
+ + deltat**3 * tauinv(:)**3 / 6._CUSTOM_REAL &
+ + deltat**4 * tauinv(:)**4 / 24._CUSTOM_REAL
+ betaval(:) = deltat / 2._CUSTOM_REAL + deltat**2 * tauinv(:) / 3._CUSTOM_REAL &
+ + deltat**3 * tauinv(:)**2 / 8._CUSTOM_REAL &
+ + deltat**4 * tauinv(:)**3 / 24._CUSTOM_REAL
+ gammaval(:) = deltat / 2._CUSTOM_REAL + deltat**2 * tauinv(:) / 6._CUSTOM_REAL &
+ + deltat**3 * tauinv(:)**2 / 24._CUSTOM_REAL
+
+ end subroutine get_attenuation_memory_values
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine get_attenuation_constants(min_resolved_period,tau_sigma, &
+ f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+! returns: period band constants tau_sigma and center frequency f_c_source
+
+ implicit none
+
+ include "constants.h"
+
+ real(kind=CUSTOM_REAL) :: min_resolved_period
+ double precision, dimension(N_SLS) :: tau_sigma
+ double precision :: f_c_source
+ double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
+
+ ! local parameters
+ real(kind=CUSTOM_REAL) :: min_period
+
+ ! determines min/max periods for attenuation band based on minimum resolved period of mesh
+ min_period = 0.99 * min_resolved_period ! uses a small margin
+ ! debug for comparison with fix values from above
+ !min_resolved_period = 0.943
+ call get_attenuation_periods(min_period,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+ ! sets up stress relaxation times tau_sigma,
+ ! equally spaced based on number of standard linear solids and period band
+ call get_attenuation_tau_sigma(tau_sigma,N_SLS,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+ ! sets up central frequency
+ ! logarithmic mean of frequency interval of absorption band
+ call get_attenuation_source_freq(f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+ ! debug
+ !f_c_source = 0.141421d0
+ !tau_sigma(1) = 7.957747154594766669788441504352d0
+ !tau_sigma(2) = 1.125395395196382652969191440206d0
+ !tau_sigma(3) = 0.159154943091895345608222100964d0
+
+ end subroutine get_attenuation_constants
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine get_attenuation_factors(myrank,Q_mu,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD, &
+ f_c_source,tau_sigma, &
+ beta,one_minus_sum_beta,factor_scale)
+
+! returns: attenuation mechanisms beta,one_minus_sum_beta,factor_scale
+
+! variable frequency range
+! variable period range
+! variable central logarithmic frequency
+
+! in the future when more memory is available on computers
+! it would be more accurate to use four mechanisms instead of three
+
+
+ implicit none
+
+ include "constants.h"
+
+ integer:: myrank
+ double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
+ double precision :: f_c_source,Q_mu
+ double precision, dimension(N_SLS) :: tau_sigma
+ double precision, dimension(N_SLS) :: beta
+ double precision :: one_minus_sum_beta
+ double precision :: factor_scale
+ ! local parameters
+ double precision, dimension(N_SLS) :: tau_eps
+
+ ! determines tau_eps for Q_mu
+ call get_attenuation_tau_eps(Q_mu,tau_sigma,tau_eps, &
+ MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+ ! determines one_minus_sum_beta
+ call get_attenuation_property_values(tau_sigma,tau_eps,beta,one_minus_sum_beta)
+
+ ! determines the "scale factor"
+ call get_attenuation_scale_factor(myrank,f_c_source,tau_eps,tau_sigma,Q_mu,factor_scale)
+
+ end subroutine get_attenuation_factors
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine get_attenuation_property_values(tau_s, tau_eps, beta, one_minus_sum_beta)
+
+! coefficients useful for calculation between relaxed and unrelaxed moduli
+!
+! returns: coefficients beta, one_minus_sum_beta
+
+ implicit none
+
+ include "constants.h"
+
+ double precision,dimension(N_SLS),intent(in) :: tau_s, tau_eps
+ double precision,dimension(N_SLS),intent(out) :: beta
+ double precision,intent(out):: one_minus_sum_beta
+
+ ! local parameters
+ double precision,dimension(N_SLS) :: tauinv
+ integer :: i
+
+ ! inverse of stress relaxation times
+ tauinv(:) = -1.0d0 / tau_s(:)
+
+ ! see e.g. Komatitsch & Tromp 1999, eq. (7)
+
+ ! coefficients beta
+ beta(:) = 1.0d0 - tau_eps(:) / tau_s(:)
+
+ ! sum of coefficients beta
+ one_minus_sum_beta = 1.0d0
+ do i = 1,N_SLS
+ one_minus_sum_beta = one_minus_sum_beta - beta(i)
+ enddo
+
+ end subroutine get_attenuation_property_values
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine get_attenuation_scale_factor(myrank, f_c_source, tau_eps, tau_sigma, Q_mu, scale_factor)
+
+! returns: physical dispersion scaling factor scale_factor
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: myrank
+ double precision :: scale_factor, Q_mu, f_c_source
+ ! strain and stress relaxation times
+ double precision, dimension(N_SLS) :: tau_eps, tau_sigma
+
+ ! local parameters
+ double precision w_c_source
+ double precision factor_scale_mu0, factor_scale_mu
+ double precision a_val, b_val
+ double precision big_omega
+ integer i
+
+
+ !--- compute central angular frequency of source (non dimensionalized)
+ w_c_source = TWO_PI * f_c_source
+
+
+ !--- quantity by which to scale mu_0 to get mu
+ ! this formula can be found for instance in
+ ! Liu, H. P., Anderson, D. L. and Kanamori, H., Velocity dispersion due to
+ ! anelasticity: implications for seismology and mantle composition,
+ ! Geophys. J. R. Astron. Soc., vol. 47, pp. 41-58 (1976)
+ ! and in Aki, K. and Richards, P. G., Quantitative seismology, theory and methods,
+ ! W. H. Freeman, (1980), second edition, sections 5.5 and 5.5.2, eq. (5.81) p. 170
+ factor_scale_mu0 = ONE + TWO * log(f_c_source / ATTENUATION_f0_REFERENCE ) / (PI * Q_mu)
+
+ !--- compute a, b and Omega parameters
+ ! see e.g.:
+ ! Liu et al. (1976): eq. 25
+ ! using complex modulus Mc = M_R / ( A - i B )
+ ! or
+ ! Savage et al. (BSSA, 2010): eq. (5) and (6)
+ ! complex modulus: M(t) = M_1(t) + i M_2(t)
+ a_val = ONE
+ b_val = ZERO
+ do i = 1,N_SLS
+ ! real part M_1 of complex modulus
+ a_val = a_val - w_c_source * w_c_source * tau_eps(i) * &
+ (tau_eps(i) - tau_sigma(i)) / (1.d0 + w_c_source * w_c_source * tau_eps(i) * tau_eps(i))
+ ! imaginary part M_2 of complex modulus
+ b_val = b_val + w_c_source * (tau_eps(i) - tau_sigma(i)) / &
+ (1.d0 + w_c_source * w_c_source * tau_eps(i) * tau_eps(i))
+ enddo
+
+ ! see e.g. Liu et al. (1976): Omega used in equation (20)
+ big_omega = a_val * ( sqrt(1.d0 + b_val*b_val/(a_val*a_val)) - 1.d0 )
+
+ !--- quantity by which to scale mu to get mu_relaxed
+ factor_scale_mu = b_val * b_val / (TWO * big_omega)
+
+ !--- total factor by which to scale mu0
+ scale_factor = factor_scale_mu * factor_scale_mu0
+
+ !--- check that the correction factor is close to one
+ if(scale_factor < 0.7 .or. scale_factor > 1.3) then
+ write(*,*) "error : in get_attenuation_scale_factor() "
+ write(*,*) " scale factor: ", scale_factor, " should be between 0.7 and 1.3"
+ write(*,*) " please check your reference frequency ATTENUATION_f0_REFERENCE in constants.h"
+ call exit_MPI(myrank,'incorrect correction factor in attenuation model')
+ endif
+
+ end subroutine get_attenuation_scale_factor
+
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+!compare: auto_ner.f90, GLOBE package
+
+ subroutine get_attenuation_periods(min_resolved_period,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+! determines min/max periods for attenuation based upon mininum resolved period of mesh
+
+ implicit none
+
+ include "constants.h"
+
+ real(kind=CUSTOM_REAL),intent(in) :: min_resolved_period
+ double precision,intent(out) :: MIN_ATTENUATION_PERIOD, MAX_ATTENUATION_PERIOD
+
+ ! local parameters
+ double precision :: THETA(5)
+
+ ! checks number of standard linear solids
+ if(N_SLS < 2 .OR. N_SLS > 5) then
+ stop 'N_SLS must be greater than 1 or less than 6'
+ endif
+
+ ! THETA defines the width of the Attenation Range in Decades
+ ! The number defined here were determined by minimizing
+ ! the "flatness" of the absoption spectrum. Each THETA
+ ! is defined for a particular N_SLS (constants.h)
+ ! THETA(2) is for N_SLS = 2
+ THETA(1) = 0.00d0
+ THETA(2) = 0.75d0
+ THETA(3) = 1.75d0
+ THETA(4) = 2.25d0
+ THETA(5) = 2.85d0
+
+ ! Compute Min Attenuation Period
+ !
+ ! The Minimum attenuation period = (Grid Spacing in km) / V_min
+ ! Grid spacing in km = Width of an element in km * spacing for GLL point * points per wavelength
+
+ MIN_ATTENUATION_PERIOD = min_resolved_period
+
+
+ ! Compute Max Attenuation Period
+ !
+ ! The max attenuation period for 3 SLS is optimally
+ ! 1.75 decades from the min attenuation period, see THETA above
+ !
+ ! this uses: theta = log( T_max / T_min ) to calculate T_max for a given T_min
+
+ MAX_ATTENUATION_PERIOD = MIN_ATTENUATION_PERIOD * 10.0d0**THETA(N_SLS)
+
+ end subroutine get_attenuation_periods
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine get_attenuation_tau_sigma(tau_s, nsls, min_period, max_period)
+
+! Determines stress relaxation times tau_sigma
+! Sets the Tau_sigma (tau_s) to be equally spaced in log10 frequency
+
+ implicit none
+
+ integer :: nsls
+ double precision,intent(in) :: min_period, max_period
+ double precision,intent(out) :: tau_s(nsls)
+ ! local parameters
+ double precision :: f1, f2
+ double precision :: exp1, exp2
+ double precision :: dexp
+ integer :: i
+ double precision, parameter :: PI = 3.14159265358979d0
+
+ ! min/max frequencies
+ f1 = 1.0d0 / max_period
+ f2 = 1.0d0 / min_period
+
+ ! logarithms
+ exp1 = log10(f1)
+ exp2 = log10(f2)
+
+ ! equally spaced in log10 frequency
+ dexp = (exp2-exp1) / ((nsls*1.0d0) - 1)
+ do i = 1,nsls
+ tau_s(i) = 1.0 / (PI * 2.0d0 * 10**(exp1 + (i - 1)* 1.0d0 *dexp))
+ enddo
+
+ end subroutine get_attenuation_tau_sigma
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine get_attenuation_source_freq(f_c_source,min_period,max_period)
+
+! Determines the Source Frequency
+
+ implicit none
+
+ double precision,intent(out) :: f_c_source
+ double precision,intent(in) :: min_period, max_period
+
+ ! local parameters
+ double precision f1, f2,T_c_source
+
+ ! min/max frequencies
+ f1 = 1.0d0 / max_period
+ f2 = 1.0d0 / min_period
+
+ T_c_source = 1.0e+03 * 10.0d0**(0.5 * (log10(f1) + log10(f2)))
+ ! central frequency
+ f_c_source = T_c_source / 1000.0d0
+
+ end subroutine get_attenuation_source_freq
+
+!--------------------------------------------------------------------------------------------------
+!
+! This portion of the SPECFEM3D Code was written by:
+! Brian Savage while at
+! California Institute of Technology
+! Department of Terrestrial Magnetism / Carnegie Institute of Washington
+! Univeristy of Rhode Island
+!
+! <savage at uri.edu>.
+! <savage13 at gps.caltech.edu>
+! <savage13 at dtm.ciw.edu>
+!
+! It is based upon formulation in the following references:
+!
+! Dahlen and Tromp, 1998
+! Theoretical Global Seismology
+!
+! Liu et al. 1976
+! Velocity dispersion due to anelasticity: implications for seismology and mantle composition
+! Geophys, J. R. asts. Soc, Vol 47, pp. 41-58
+!
+! The methodology can be found in:
+! Savage, B, D. Komatitsch and J. Tromp, 2010.
+! Effects of 3D Attenuation on Seismic Wave Amplitude and Phase Measurements
+! BSSA, 100, 3, p. 1241-1251.
+!
+! modifications:
+! - minor modifications by Daniel Peter, november 2010
+!--------------------------------------------------------------------------------------------------
+
+ subroutine get_attenuation_tau_eps(Qmu_in,tau_s,tau_eps,min_period,max_period)
+
+! includes min_period, max_period, and N_SLS
+!
+! returns: determines strain relaxation times tau_eps
+
+ implicit none
+
+ include 'constants.h'
+
+! model_attenuation_variables
+!...
+
+ double precision :: Qmu_in
+ double precision, dimension(N_SLS) :: tau_s, tau_eps
+ double precision :: min_period,max_period
+
+ ! local parameters
+ integer :: rw
+ ! model_attenuation_storage_var
+ type model_attenuation_storage_var
+ sequence
+ double precision, dimension(:,:), pointer :: tau_eps_storage
+ double precision, dimension(:), pointer :: Qmu_storage
+ integer Q_resolution
+ integer Q_max
+ end type model_attenuation_storage_var
+ type (model_attenuation_storage_var) AM_S
+ ! attenuation_simplex_variables
+ type attenuation_simplex_variables
+ sequence
+ double precision Q ! Q = Desired Value of Attenuation or Q
+ double precision iQ ! iQ = 1/Q
+ double precision, dimension(:), pointer :: f
+ ! f = Frequencies at which to evaluate the solution
+ double precision, dimension(:), pointer :: tau_s
+ ! tau_s = Tau_sigma defined by the frequency range and
+ ! number of standard linear solids
+ integer nf ! nf = Number of Frequencies
+ integer nsls ! nsls = Number of Standard Linear Solids
+ end type attenuation_simplex_variables
+ type(attenuation_simplex_variables) AS_V
+
+ ! READ
+ rw = 1
+ call model_attenuation_storage(Qmu_in, tau_eps, rw, AM_S)
+ if(rw > 0) return
+
+ call attenuation_invert_by_simplex(min_period, max_period, N_SLS, Qmu_in, tau_s, tau_eps, AS_V)
+
+ ! WRITE
+ rw = -1
+ call model_attenuation_storage(Qmu_in, tau_eps, rw, AM_S)
+
+ end subroutine get_attenuation_tau_eps
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine model_attenuation_storage(Qmu, tau_eps, rw, AM_S)
+
+ implicit none
+ include 'constants.h'
+
+! model_attenuation_storage_var
+ type model_attenuation_storage_var
+ sequence
+ double precision, dimension(:,:), pointer :: tau_eps_storage
+ double precision, dimension(:), pointer :: Qmu_storage
+ integer Q_resolution
+ integer Q_max
+ end type model_attenuation_storage_var
+
+ type (model_attenuation_storage_var) AM_S
+! model_attenuation_storage_var
+
+ double precision Qmu, Qmu_new
+ double precision, dimension(N_SLS) :: tau_eps
+ integer rw
+
+ integer Qtmp
+ integer, save :: first_time_called = 1
+ double precision, parameter :: ZERO_TOL = 1.e-5
+ integer ier
+
+ if(first_time_called == 1) then
+ first_time_called = 0
+ AM_S%Q_resolution = 10**ATTENUATION_COMP_RESOLUTION
+ AM_S%Q_max = ATTENUATION_COMP_MAXIMUM
+ Qtmp = AM_S%Q_resolution * AM_S%Q_max
+
+ allocate(AM_S%tau_eps_storage(N_SLS, Qtmp), &
+ AM_S%Qmu_storage(Qtmp),stat=ier)
+ if( ier /= 0 ) stop 'error allocating arrays for attenuation storage'
+ AM_S%Qmu_storage(:) = -1
+ endif
+
+ if(Qmu < 0.0d0 .OR. Qmu > AM_S%Q_max) then
+ write(IMAIN,*) 'Error attenuation_storage()'
+ write(IMAIN,*) 'Attenuation Value out of Range: ', Qmu
+ write(IMAIN,*) 'Attenuation Value out of Range: Min, Max ', 0, AM_S%Q_max
+ call exit_MPI(0, 'Attenuation Value out of Range')
+ endif
+
+ if(rw > 0 .AND. Qmu <= ZERO_TOL) then
+ Qmu = 0.0d0;
+ tau_eps(:) = 0.0d0;
+ return
+ endif
+ ! Generate index for Storage Array
+ ! and Recast Qmu using this index
+ ! Accroding to Brian, use float
+ !Qtmp = Qmu * Q_resolution
+ !Qmu = Qtmp / Q_resolution;
+
+ ! by default: resolution is Q_resolution = 10
+ ! converts Qmu to an array integer index:
+ ! e.g. Qmu = 150.31 -> Qtmp = 150.31 * 10 = int( 1503.10 ) = 1503
+ Qtmp = Qmu * dble(AM_S%Q_resolution)
+
+ ! rounds to corresponding double value:
+ ! e.g. Qmu_new = dble( 1503 ) / dble(10) = 150.30
+ ! but Qmu_new is not used any further...
+ Qmu_new = dble(Qtmp) / dble(AM_S%Q_resolution)
+
+ if(rw > 0) then
+ ! READ
+ if(AM_S%Qmu_storage(Qtmp) > 0) then
+ ! READ SUCCESSFUL
+ tau_eps(:) = AM_S%tau_eps_storage(:, Qtmp)
+ Qmu = AM_S%Qmu_storage(Qtmp)
+ rw = 1
+ else
+ ! READ NOT SUCCESSFUL
+ rw = -1
+ endif
+ else
+ ! WRITE SUCCESSFUL
+ AM_S%tau_eps_storage(:,Qtmp) = tau_eps(:)
+ AM_S%Qmu_storage(Qtmp) = Qmu
+ rw = 1
+ endif
+
+ end subroutine model_attenuation_storage
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine attenuation_invert_by_simplex(t2, t1, n, Q_real, tau_s, tau_eps, AS_V)
+
+ implicit none
+
+ ! attenuation_simplex_variables
+ type attenuation_simplex_variables
+ sequence
+ double precision Q ! Q = Desired Value of Attenuation or Q
+ double precision iQ ! iQ = 1/Q
+ double precision, dimension(:), pointer :: f
+ ! f = Frequencies at which to evaluate the solution
+ double precision, dimension(:), pointer :: tau_s
+ ! tau_s = Tau_sigma defined by the frequency range and
+ ! number of standard linear solids
+ integer nf ! nf = Number of Frequencies
+ integer nsls ! nsls = Number of Standard Linear Solids
+ end type attenuation_simplex_variables
+ type(attenuation_simplex_variables) AS_V
+ ! attenuation_simplex_variables
+
+ ! Input / Output
+ double precision t1, t2
+ double precision Q_real
+! double precision omega_not
+ integer n
+ double precision, dimension(n) :: tau_s, tau_eps
+
+ ! Internal
+ integer i, iterations, err,prnt
+ double precision f1, f2, exp1,exp2, min_value !, dexp
+ integer, parameter :: nf = 100
+ double precision, dimension(nf) :: f
+ double precision, parameter :: PI = 3.14159265358979d0
+ double precision, external :: attenuation_eval
+
+ ! Values to be passed into the simplex minimization routine
+ iterations = -1
+ min_value = -1.0e-4
+ err = 0
+ prnt = 0
+
+ !allocate(f(nf))
+
+ ! Determine the min and max frequencies
+ f1 = 1.0d0 / t1
+ f2 = 1.0d0 / t2
+
+ ! Determine the exponents of the frequencies
+ exp1 = log10(f1)
+ exp2 = log10(f2)
+
+! if(f2 < f1 .OR. Q_real < 0.0d0 .OR. n < 1) then
+! call exit_MPI(0, 'frequencies flipped or Q less than zero or N_SLS < 0')
+! endif
+
+ ! Determine the Source frequency
+! omega_not = 1.0e+03 * 10.0d0**(0.5 * (log10(f1) + log10(f2)))
+
+
+ ! Determine the Frequencies at which to compare solutions
+ ! The frequencies should be equally spaced in log10 frequency
+ do i = 1,nf
+ f(i) = exp1 + ((i-1)*1.0d0 * (exp2-exp1) / ((nf-1)*1.0d0))
+ enddo
+
+ ! Set the Tau_sigma (tau_s) to be equally spaced in log10 frequency
+! dexp = (exp2-exp1) / ((n*1.0d0) - 1)
+! do i = 1,n
+! tau_s(i) = 1.0 / (PI * 2.0d0 * 10**(exp1 + (i - 1)* 1.0d0 *dexp))
+! enddo
+
+
+ ! Shove the paramters into the module
+ call attenuation_simplex_setup(nf,n,f,Q_real,tau_s,AS_V)
+
+ ! Set the Tau_epsilon (tau_eps) to an initial value at omega*tau = 1
+ ! tan_delta = 1/Q = (tau_eps - tau_s)/(2 * sqrt(tau e*tau_s))
+ ! if we assume tau_eps =~ tau_s
+ ! we get the equation below
+ do i = 1,n
+ tau_eps(i) = tau_s(i) + (tau_s(i) * 2.0d0/Q_real)
+ enddo
+
+ ! Run a simplex search to determine the optimum values of tau_eps
+ call fminsearch(attenuation_eval, tau_eps, n, iterations, min_value, prnt, err,AS_V)
+ if(err > 0) then
+ write(*,*)'Search did not converge for an attenuation of ', Q_real
+ write(*,*)' Iterations: ', iterations
+ write(*,*)' Min Value: ', min_value
+ write(*,*)' Aborting program'
+ call exit_MPI(0,'attenuation_simplex: Search for Strain relaxation times did not converge')
+ endif
+
+ !deallocate(f)
+
+ call attenuation_simplex_finish(AS_V)
+
+ end subroutine attenuation_invert_by_simplex
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine attenuation_simplex_setup(nf_in,nsls_in,f_in,Q_in,tau_s_in,AS_V)
+
+! - Inserts necessary parameters into the module attenuation_simplex_variables
+! - See module for explaination
+
+ implicit none
+
+ ! attenuation_simplex_variables
+ type attenuation_simplex_variables
+ sequence
+ double precision Q ! Q = Desired Value of Attenuation or Q
+ double precision iQ ! iQ = 1/Q
+ double precision, dimension(:), pointer :: f
+ ! f = Frequencies at which to evaluate the solution
+ double precision, dimension(:), pointer :: tau_s
+ ! tau_s = Tau_sigma defined by the frequency range and
+ ! number of standard linear solids
+ integer nf ! nf = Number of Frequencies
+ integer nsls ! nsls = Number of Standard Linear Solids
+ end type attenuation_simplex_variables
+ type(attenuation_simplex_variables) AS_V
+ ! attenuation_simplex_variables
+
+ integer nf_in, nsls_in
+ double precision Q_in
+ double precision, dimension(nf_in) :: f_in
+ double precision, dimension(nsls_in) :: tau_s_in
+ integer ier
+
+ allocate(AS_V%f(nf_in), &
+ AS_V%tau_s(nsls_in),stat=ier)
+ if( ier /= 0 ) stop 'error allocating arrays for attenuation simplex'
+
+ AS_V%nf = nf_in
+ AS_V%nsls = nsls_in
+ AS_V%f = f_in
+ AS_V%Q = Q_in
+ AS_V%iQ = 1.0d0/AS_V%Q
+ AS_V%tau_s = tau_s_in
+
+ end subroutine attenuation_simplex_setup
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ double precision function attenuation_eval(Xin,AS_V)
+
+! - Computes the misfit from a set of relaxation paramters
+! given a set of frequencies and target attenuation
+! - Evaluates only at the given frequencies
+! - Evaluation is done with an L2 norm
+!
+! Input
+! Xin = Tau_epsilon, Strain Relaxation Time
+! Note: Tau_sigma the Stress Relaxation Time is loaded
+! with attenuation_simplex_setup and stored in
+! attenuation_simplex_variables
+!
+! Xi = Sum_i^N sqrt [ (1/Qc_i - 1/Qt_i)^2 / 1/Qt_i^2 ]
+!
+! where Qc_i is the computed attenuation at a specific frequency
+! Qt_i is the desired attenuaiton at that frequency
+!
+! Uses attenuation_simplex_variables to store constant values
+!
+! See atteunation_simplex_setup
+!
+
+ implicit none
+
+ ! attenuation_simplex_variables
+ type attenuation_simplex_variables
+ sequence
+ double precision Q ! Q = Desired Value of Attenuation or Q
+ double precision iQ ! iQ = 1/Q
+ double precision, dimension(:), pointer :: f
+ ! f = Frequencies at which to evaluate the solution
+ double precision, dimension(:), pointer :: tau_s
+ ! tau_s = Tau_sigma defined by the frequency range and
+ ! number of standard linear solids
+ integer nf ! nf = Number of Frequencies
+ integer nsls ! nsls = Number of Standard Linear Solids
+ end type attenuation_simplex_variables
+ type(attenuation_simplex_variables) AS_V
+ ! attenuation_simplex_variables
+
+ ! Input
+ double precision, dimension(AS_V%nsls) :: Xin
+ double precision, dimension(AS_V%nsls) :: tau_eps
+
+ double precision, dimension(AS_V%nf) :: A, B, tan_delta
+
+ integer i
+ double precision xi, iQ2
+
+ tau_eps = Xin
+
+ call attenuation_maxwell(AS_V%nf,AS_V%nsls,AS_V%f,AS_V%tau_s,tau_eps,B,A)
+
+ tan_delta = B / A
+
+ attenuation_eval = 0.0d0
+ iQ2 = AS_V%iQ**2
+ do i = 1,AS_V%nf
+ xi = sqrt(( ( (tan_delta(i) - AS_V%iQ) ** 2 ) / iQ2 ))
+ attenuation_eval = attenuation_eval + xi
+ enddo
+
+ end function attenuation_eval
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine attenuation_maxwell(nf,nsls,f,tau_s,tau_eps,B,A)
+
+! - Computes the Moduli (Maxwell Solid) for a series of
+! Standard Linear Solids
+! - Computes M1 and M2 parameters after Dahlen and Tromp pp.203
+! here called B and A after Liu et al. 1976
+! - Another formulation uses Kelvin-Voigt Solids and computes
+! Compliences J1 and J2 after Dahlen and Tromp pp.203
+!
+! Input
+! nf = Number of Frequencies
+! nsls = Number of Standard Linear Solids
+! f = Frequencies (in log10 of frequencies)
+! dimension(nf)
+! tau_s = Tau_sigma Stress relaxation time (see References)
+! dimension(nsls)
+! tau_eps = Tau_epislon Strain relaxation time (see References)
+! dimension(nsls)!
+! Output
+! B = Real Moduli ( M2 Dahlen and Tromp pp.203 )
+! dimension(nf)
+! A = Imaginary Moduli ( M1 Dahlen and Tromp pp.203 )
+! dimension(nf)
+!
+! Dahlen and Tromp, 1998
+! Theoretical Global Seismology
+!
+! Liu et al. 1976
+! Velocity dispersion due to anelasticity: implications for seismology and mantle composition
+! Geophys, J. R. asts. Soc, Vol 47, pp. 41-58
+
+ implicit none
+
+ ! Input
+ integer nf, nsls
+ double precision, dimension(nf) :: f
+ double precision, dimension(nsls) :: tau_s, tau_eps
+ ! Output
+ double precision, dimension(nf) :: A,B
+
+ integer i,j
+ double precision w, pi, demon
+
+ PI = 3.14159265358979d0
+
+ A(:) = 1.0d0 - nsls*1.0d0
+ B(:) = 0.0d0
+ do i = 1,nf
+ w = 2.0d0 * PI * 10**f(i)
+ do j = 1,nsls
+ ! write(*,*)j,tau_s(j),tau_eps(j)
+ demon = 1.0d0 + w**2 * tau_s(j)**2
+ A(i) = A(i) + ((1.0d0 + (w**2 * tau_eps(j) * tau_s(j)))/ demon)
+ B(i) = B(i) + ((w * (tau_eps(j) - tau_s(j))) / demon)
+ end do
+ ! write(*,*)A(i),B(i),10**f(i)
+ enddo
+
+ end subroutine attenuation_maxwell
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine fminsearch(funk, x, n, itercount, tolf, prnt, err, AS_V)
+
+! subroutine fminsearch
+! - Computes the minimization of funk(x(n)) using the simplex method
+! - This subroutine is copied from Matlab fminsearch.m
+! and modified to suit my nefarious needs
+! Input
+! funk = double precision function with one input parameter
+! double precision function the_funk(x)
+! x = Input/Output
+! variables to be minimized
+! dimension(n)
+! Input: Initial Value
+! Output: Mimimized Value
+! n = number of variables
+! itercount = Input/Output
+! Input: maximum number of iterations
+! if < 0 default is used (200 * n)
+! Output: total number of iterations on output
+! tolf = Input/Output
+! Input: minimium tolerance of the function funk(x)
+! Output: minimium value of funk(x)(i.e. "a" solution)
+! prnt = Input
+! 3 => report every iteration
+! 4 => report every iteration, total simplex
+! err = Output
+! 0 => Normal exeecution, converged within desired range
+! 1 => Function Evaluation exceeded limit
+! 2 => Iterations exceeded limit
+!
+! See Matlab fminsearch
+
+ implicit none
+
+! attenuation_simplex_variables
+ type attenuation_simplex_variables
+ sequence
+ double precision Q ! Q = Desired Value of Attenuation or Q
+ double precision iQ ! iQ = 1/Q
+ double precision, dimension(:), pointer :: f
+ ! f = Frequencies at which to evaluate the solution
+ double precision, dimension(:), pointer :: tau_s
+ ! tau_s = Tau_sigma defined by the frequency range and
+ ! number of standard linear solids
+ integer nf ! nf = Number of Frequencies
+ integer nsls ! nsls = Number of Standard Linear Solids
+ end type attenuation_simplex_variables
+
+ type(attenuation_simplex_variables) AS_V
+! attenuation_simplex_variables
+
+ ! Input
+ double precision, external :: funk
+
+ integer n
+ double precision x(n) ! Also Output
+ integer itercount, prnt, err
+ double precision tolf
+
+ !Internal
+ integer i,j, how
+ integer, parameter :: none = 0
+ integer, parameter :: initial = 1
+ integer, parameter :: expand = 2
+ integer, parameter :: reflect = 3
+ integer, parameter :: contract_outside = 4
+ integer, parameter :: contract_inside = 5
+ integer, parameter :: shrink = 6
+
+ integer maxiter, maxfun
+ integer func_evals
+ double precision tolx
+
+ double precision rho, chi, psi, sigma
+ double precision xin(n), y(n), v(n,n+1), fv(n+1)
+ double precision vtmp(n,n+1)
+ double precision usual_delta, zero_term_delta
+ double precision xbar(n), xr(n), fxr, xe(n), fxe, xc(n), fxc, fxcc, xcc(n)
+ integer place(n+1)
+
+ double precision max_size_simplex, max_value
+
+ rho = 1.0d0
+ chi = 2.0d0
+ psi = 0.5d0
+ sigma = 0.5d0
+
+
+ if(itercount > 0) then
+ maxiter = itercount
+ else
+ maxiter = 200 * n
+ endif
+ itercount = 0
+ maxfun = 200 * n
+
+ if(tolf > 0.0d0) then
+ tolx = 1.0e-4
+ else
+ tolx = 1.0e-4
+ tolf = 1.0e-4
+ endif
+
+ err = 0
+
+ xin = x
+ v(:,:) = 0.0d0
+ fv(:) = 0.0d0
+
+ v(:,1) = xin
+ x = xin
+
+ fv(1) = funk(xin,AS_V)
+
+ usual_delta = 0.05
+ zero_term_delta = 0.00025
+
+ do j = 1,n
+ y = xin
+ if(y(j) /= 0.0d0) then
+ y(j) = (1.0d0 + usual_delta) * y(j)
+ else
+ y(j) = zero_term_delta
+ endif
+ v(:,j+1) = y
+ x(:) = y
+ fv(j+1) = funk(x,AS_V)
+ enddo
+
+ call qsort_local(fv,n+1,place)
+
+ do i = 1,n+1
+ vtmp(:,i) = v(:,place(i))
+ enddo
+ v = vtmp
+
+ how = initial
+ itercount = 1
+ func_evals = n+1
+ if(prnt == 3) then
+ write(*,*)'Iterations Funk Evals Value How'
+ write(*,*)itercount, func_evals, fv(1), how
+ endif
+ if(prnt == 4) then
+ write(*,*)'How: ',how
+ write(*,*)'V: ', v
+ write(*,*)'fv: ',fv
+ write(*,*)'evals: ',func_evals
+ endif
+
+ do while (func_evals < maxfun .AND. itercount < maxiter)
+
+ if(max_size_simplex(v,n) <= tolx .AND. &
+ max_value(fv,n+1) <= tolf) then
+ goto 666
+ endif
+ how = none
+
+ ! xbar = average of the n (NOT n+1) best points
+ ! xbar = sum(v(:,1:n), 2)/n
+ xbar(:) = 0.0d0
+ do i = 1,n
+ do j = 1,n
+ xbar(i) = xbar(i) + v(i,j)
+ enddo
+ xbar(i) = xbar(i) / (n*1.0d0)
+ enddo
+ xr = (1 + rho)*xbar - rho*v(:,n+1)
+ x(:) = xr
+ fxr = funk(x,AS_V)
+ func_evals = func_evals + 1
+ if (fxr < fv(1)) then
+ ! Calculate the expansion point
+ xe = (1 + rho*chi)*xbar - rho*chi*v(:,n+1)
+ x = xe
+ fxe = funk(x,AS_V)
+ func_evals = func_evals+1
+ if (fxe < fxr) then
+ v(:,n+1) = xe
+ fv(n+1) = fxe
+ how = expand
+ else
+ v(:,n+1) = xr
+ fv(n+1) = fxr
+ how = reflect
+ endif
+ else ! fv(:,1) <= fxr
+ if (fxr < fv(n)) then
+ v(:,n+1) = xr
+ fv(n+1) = fxr
+ how = reflect
+ else ! fxr >= fv(:,n)
+ ! Perform contraction
+ if (fxr < fv(n+1)) then
+ ! Perform an outside contraction
+ xc = (1 + psi*rho)*xbar - psi*rho*v(:,n+1)
+ x(:) = xc
+ fxc = funk(x,AS_V)
+ func_evals = func_evals+1
+
+ if (fxc <= fxr) then
+ v(:,n+1) = xc
+ fv(n+1) = fxc
+ how = contract_outside
+ else
+ ! perform a shrink
+ how = shrink
+ endif
+ else
+ ! Perform an inside contraction
+ xcc = (1-psi)*xbar + psi*v(:,n+1)
+ x(:) = xcc
+ fxcc = funk(x,AS_V)
+ func_evals = func_evals+1
+
+ if (fxcc < fv(n+1)) then
+ v(:,n+1) = xcc
+ fv(n+1) = fxcc
+ how = contract_inside
+ else
+ ! perform a shrink
+ how = shrink
+ endif
+ endif
+ if (how == shrink) then
+ do j=2,n+1
+ v(:,j)=v(:,1)+sigma*(v(:,j) - v(:,1))
+ x(:) = v(:,j)
+ fv(j) = funk(x,AS_V)
+ enddo
+ func_evals = func_evals + n
+ endif
+ endif
+ endif
+
+ call qsort_local(fv,n+1,place)
+ do i = 1,n+1
+ vtmp(:,i) = v(:,place(i))
+ enddo
+ v = vtmp
+
+ itercount = itercount + 1
+ if (prnt == 3) then
+ write(*,*)itercount, func_evals, fv(1), how
+ elseif (prnt == 4) then
+ write(*,*)
+ write(*,*)'How: ',how
+ write(*,*)'v: ',v
+ write(*,*)'fv: ',fv
+ write(*,*)'evals: ',func_evals
+ endif
+ enddo
+
+ if(func_evals > maxfun) then
+ write(*,*)'function evaluations exceeded prescribed limit', maxfun
+ err = 1
+ endif
+ if(itercount > maxiter) then
+ write(*,*)'iterations exceeded prescribed limit', maxiter
+ err = 2
+ endif
+
+666 continue
+ x = v(:,1)
+ tolf = fv(1)
+
+ end subroutine fminsearch
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ double precision function max_value(fv,n)
+
+! - Finds the maximim value of the difference of between the first
+! value and the remaining values of a vector
+! Input
+! fv = Input
+! Vector
+! dimension(n)
+! n = Input
+! Length of fv
+!
+! Returns:
+! Xi = max( || fv(1)- fv(i) || ) for i=2:n
+!
+
+ implicit none
+ integer n
+ double precision fv(n)
+
+ integer i
+ double precision m, z
+
+ m = 0.0d0
+ do i = 2,n
+ z = abs(fv(1) - fv(i))
+ if(z > m) then
+ m = z
+ endif
+ enddo
+
+ max_value = m
+
+ end function max_value
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ double precision function max_size_simplex(v,n)
+
+! - Determines the maximum distance between two point in a simplex
+! Input
+! v = Input
+! Simplex Verticies
+! dimension(n, n+1)
+! n = Pseudo Length of n
+!
+! Returns:
+! Xi = max( max( || v(:,1) - v(:,i) || ) ) for i=2:n+1
+!
+
+ implicit none
+ integer n
+ double precision v(n,n+1)
+
+ integer i,j
+ double precision m, z
+
+ m = 0.0d0
+ do i = 1,n
+ do j = 2,n+1
+ z = abs(v(i,j) - v(i,1))
+ if(z > m) then
+ m = z
+ endif
+ enddo
+ enddo
+
+ max_size_simplex = m
+
+ end function max_size_simplex
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine qsort_local(X,n,I)
+
+! - Implementation of a Bubble Sort Routine
+! Input
+! X = Input/Output
+! Vector to be sorted
+! dimension(n)
+! n = Input
+! Length of X
+! I = Output
+! Sorted Indicies of vecotr X
+!
+! Example:
+! X = [ 4 3 1 2 ] on Input
+! I = [ 1 2 3 4 ] Computed Internally (in order)
+!
+! X = [ 1 2 3 4 ] on Output
+! I = [ 3 4 2 1 ] on Output
+!
+
+ implicit none
+
+ integer n
+ double precision X(n)
+ integer I(n)
+
+ integer j,k
+ double precision rtmp
+ integer itmp
+
+ do j = 1,n
+ I(j) = j
+ enddo
+
+ do j = 1,n
+ do k = 1,n-j
+ if(X(k+1) < X(k)) then
+ rtmp = X(k)
+ X(k) = X(k+1)
+ X(k+1) = rtmp
+
+ itmp = I(k)
+ I(k) = I(k+1)
+ I(k+1) = itmp
+ endif
+ enddo
+ enddo
+
+ end subroutine qsort_local
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine attenuation_simplex_finish(AS_V)
+
+ implicit none
+
+ ! attenuation_simplex_variables
+ type attenuation_simplex_variables
+ sequence
+ double precision Q ! Q = Desired Value of Attenuation or Q
+ double precision iQ ! iQ = 1/Q
+ double precision, dimension(:), pointer :: f
+ ! f = Frequencies at which to evaluate the solution
+ double precision, dimension(:), pointer :: tau_s
+ ! tau_s = Tau_sigma defined by the frequency range and
+ ! number of standard linear solids
+ integer nf ! nf = Number of Frequencies
+ integer nsls ! nsls = Number of Standard Linear Solids
+ end type attenuation_simplex_variables
+ type(attenuation_simplex_variables) AS_V
+ ! attenuation_simplex_variables
+
+ deallocate(AS_V%f)
+ deallocate(AS_V%tau_s)
+
+ end subroutine attenuation_simplex_finish
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_cmt.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_cmt.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_cmt.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,209 +1,209 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine get_cmt(yr,jda,ho,mi,sec,tshift_cmt,hdur,lat,long,depth,moment_tensor,&
- DT,NSOURCES,min_tshift_cmt_original)
-
- implicit none
-
- include "constants.h"
-
-!--- input or output arguments of the subroutine below
-
- integer, intent(in) :: NSOURCES
- double precision, intent(in) :: DT
-
- integer, intent(out) :: yr,jda,ho,mi
- double precision, intent(out) :: sec,min_tshift_cmt_original
- double precision, dimension(NSOURCES), intent(out) :: tshift_cmt,hdur,lat,long,depth
- double precision, dimension(6,NSOURCES), intent(out) :: moment_tensor
-
-!--- local variables below
-
- integer mo,da,julian_day,isource
- double precision t_shift(NSOURCES)
- character(len=5) datasource
- character(len=256) string, CMTSOLUTION
-
- ! initializes
- lat(:) = 0.d0
- long(:) = 0.d0
- depth(:) = 0.d0
- t_shift(:) = 0.d0
- tshift_cmt(:) = 0.d0
- hdur(:) = 0.d0
- moment_tensor(:,:) = 0.d0
- yr = 0
- jda = 0
- ho = 0
- mi = 0
- sec = 0.d0
-
-!
-!---- read hypocenter info
-!
- call get_value_string(CMTSOLUTION, 'solver.CMTSOLUTION', &
- IN_DATA_FILES_PATH(1:len_trim(IN_DATA_FILES_PATH))//'CMTSOLUTION')
-
- open(unit=1,file=CMTSOLUTION,status='old',action='read')
-
-! read source number isource
- do isource=1,NSOURCES
-
- read(1,"(a256)") string
- ! skips empty lines
- do while( len_trim(string) == 0 )
- read(1,"(a256)") string
- enddo
-
- ! read header with event information
- read(string,"(a4,i5,i3,i3,i3,i3,f6.2)") datasource,yr,mo,da,ho,mi,sec
- jda=julian_day(yr,mo,da)
-
- ! ignore line with event name
- read(1,"(a)") string
-
- ! read time shift
- read(1,"(a)") string
- !read(string(12:len_trim(string)),*) tshift_cmt(isource)
- read(string(12:len_trim(string)),*) t_shift(isource)
-
- ! read half duration
- read(1,"(a)") string
- read(string(15:len_trim(string)),*) hdur(isource)
-
- ! read latitude
- read(1,"(a)") string
- read(string(10:len_trim(string)),*) lat(isource)
-
- ! read longitude
- read(1,"(a)") string
- read(string(11:len_trim(string)),*) long(isource)
-
- ! read depth
- read(1,"(a)") string
- read(string(7:len_trim(string)),*) depth(isource)
-
- ! read Mrr
- read(1,"(a)") string
- read(string(5:len_trim(string)),*) moment_tensor(1,isource)
-
- ! read Mtt
- read(1,"(a)") string
- read(string(5:len_trim(string)),*) moment_tensor(2,isource)
-
- ! read Mpp
- read(1,"(a)") string
- read(string(5:len_trim(string)),*) moment_tensor(3,isource)
-
- ! read Mrt
- read(1,"(a)") string
- read(string(5:len_trim(string)),*) moment_tensor(4,isource)
-
- ! read Mrp
- read(1,"(a)") string
- read(string(5:len_trim(string)),*) moment_tensor(5,isource)
-
- ! read Mtp
- read(1,"(a)") string
- read(string(5:len_trim(string)),*) moment_tensor(6,isource)
-
- ! checks half-duration
- if( USE_FORCE_POINT_SOURCE ) then
- ! half-duration is the dominant frequency of the source
- ! point forces use a Ricker source time function
- ! null half-duration indicates a very low-frequency source
- ! (see constants.h: TINYVAL = 1.d-9 )
- if( hdur(isource) < TINYVAL ) hdur(isource) = TINYVAL
- else
- ! null half-duration indicates a Heaviside
- ! replace with very short error function
- if( hdur(isource) < 5. * DT ) hdur(isource) = 5. * DT
- endif
-
- enddo
-
- close(1)
-
- ! Sets tshift_cmt to zero to initiate the simulation!
- if(NSOURCES == 1)then
- tshift_cmt = 0.d0
- min_tshift_cmt_original = t_shift(1)
- else
- tshift_cmt(1:NSOURCES) = t_shift(1:NSOURCES)-minval(t_shift)
- min_tshift_cmt_original = minval(t_shift)
- endif
-
-
- !
- ! scale the moment tensor
- ! CMTSOLUTION file values are in dyne.cm
- ! 1 dyne is 1 gram * 1 cm / (1 second)^2
- ! 1 Newton is 1 kg * 1 m / (1 second)^2
- ! thus 1 Newton = 100,000 dynes
- ! therefore 1 dyne.cm = 1e-7 Newton.m
- !
- moment_tensor(:,:) = moment_tensor(:,:) * 1.d-7
-
- end subroutine get_cmt
-
-! ------------------------------------------------------------------
-
- integer function julian_day(yr,mo,da)
-
- implicit none
-
- integer yr,mo,da
-
- integer mon(12)
- integer lpyr
- data mon /0,31,59,90,120,151,181,212,243,273,304,334/
-
- julian_day = da + mon(mo)
- if(mo>2) julian_day = julian_day + lpyr(yr)
-
- end function julian_day
-
-! ------------------------------------------------------------------
-
- integer function lpyr(yr)
-
- implicit none
-
- integer yr
-!
-!---- returns 1 if leap year
-!
- lpyr=0
- if(mod(yr,400) == 0) then
- lpyr=1
- else if(mod(yr,4) == 0) then
- lpyr=1
- if(mod(yr,100) == 0) lpyr=0
- endif
-
- end function lpyr
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine get_cmt(yr,jda,ho,mi,sec,tshift_cmt,hdur,lat,long,depth,moment_tensor,&
+ DT,NSOURCES,min_tshift_cmt_original)
+
+ implicit none
+
+ include "constants.h"
+
+!--- input or output arguments of the subroutine below
+
+ integer, intent(in) :: NSOURCES
+ double precision, intent(in) :: DT
+
+ integer, intent(out) :: yr,jda,ho,mi
+ double precision, intent(out) :: sec,min_tshift_cmt_original
+ double precision, dimension(NSOURCES), intent(out) :: tshift_cmt,hdur,lat,long,depth
+ double precision, dimension(6,NSOURCES), intent(out) :: moment_tensor
+
+!--- local variables below
+
+ integer mo,da,julian_day,isource
+ double precision t_shift(NSOURCES)
+ character(len=5) datasource
+ character(len=256) string, CMTSOLUTION
+
+ ! initializes
+ lat(:) = 0.d0
+ long(:) = 0.d0
+ depth(:) = 0.d0
+ t_shift(:) = 0.d0
+ tshift_cmt(:) = 0.d0
+ hdur(:) = 0.d0
+ moment_tensor(:,:) = 0.d0
+ yr = 0
+ jda = 0
+ ho = 0
+ mi = 0
+ sec = 0.d0
+
+!
+!---- read hypocenter info
+!
+ call get_value_string(CMTSOLUTION, 'solver.CMTSOLUTION', &
+ IN_DATA_FILES_PATH(1:len_trim(IN_DATA_FILES_PATH))//'CMTSOLUTION')
+
+ open(unit=1,file=CMTSOLUTION,status='old',action='read')
+
+! read source number isource
+ do isource=1,NSOURCES
+
+ read(1,"(a256)") string
+ ! skips empty lines
+ do while( len_trim(string) == 0 )
+ read(1,"(a256)") string
+ enddo
+
+ ! read header with event information
+ read(string,"(a4,i5,i3,i3,i3,i3,f6.2)") datasource,yr,mo,da,ho,mi,sec
+ jda=julian_day(yr,mo,da)
+
+ ! ignore line with event name
+ read(1,"(a)") string
+
+ ! read time shift
+ read(1,"(a)") string
+ !read(string(12:len_trim(string)),*) tshift_cmt(isource)
+ read(string(12:len_trim(string)),*) t_shift(isource)
+
+ ! read half duration
+ read(1,"(a)") string
+ read(string(15:len_trim(string)),*) hdur(isource)
+
+ ! read latitude
+ read(1,"(a)") string
+ read(string(10:len_trim(string)),*) lat(isource)
+
+ ! read longitude
+ read(1,"(a)") string
+ read(string(11:len_trim(string)),*) long(isource)
+
+ ! read depth
+ read(1,"(a)") string
+ read(string(7:len_trim(string)),*) depth(isource)
+
+ ! read Mrr
+ read(1,"(a)") string
+ read(string(5:len_trim(string)),*) moment_tensor(1,isource)
+
+ ! read Mtt
+ read(1,"(a)") string
+ read(string(5:len_trim(string)),*) moment_tensor(2,isource)
+
+ ! read Mpp
+ read(1,"(a)") string
+ read(string(5:len_trim(string)),*) moment_tensor(3,isource)
+
+ ! read Mrt
+ read(1,"(a)") string
+ read(string(5:len_trim(string)),*) moment_tensor(4,isource)
+
+ ! read Mrp
+ read(1,"(a)") string
+ read(string(5:len_trim(string)),*) moment_tensor(5,isource)
+
+ ! read Mtp
+ read(1,"(a)") string
+ read(string(5:len_trim(string)),*) moment_tensor(6,isource)
+
+ ! checks half-duration
+ if( USE_FORCE_POINT_SOURCE ) then
+ ! half-duration is the dominant frequency of the source
+ ! point forces use a Ricker source time function
+ ! null half-duration indicates a very low-frequency source
+ ! (see constants.h: TINYVAL = 1.d-9 )
+ if( hdur(isource) < TINYVAL ) hdur(isource) = TINYVAL
+ else
+ ! null half-duration indicates a Heaviside
+ ! replace with very short error function
+ if( hdur(isource) < 5. * DT ) hdur(isource) = 5. * DT
+ endif
+
+ enddo
+
+ close(1)
+
+ ! Sets tshift_cmt to zero to initiate the simulation!
+ if(NSOURCES == 1)then
+ tshift_cmt = 0.d0
+ min_tshift_cmt_original = t_shift(1)
+ else
+ tshift_cmt(1:NSOURCES) = t_shift(1:NSOURCES)-minval(t_shift)
+ min_tshift_cmt_original = minval(t_shift)
+ endif
+
+
+ !
+ ! scale the moment tensor
+ ! CMTSOLUTION file values are in dyne.cm
+ ! 1 dyne is 1 gram * 1 cm / (1 second)^2
+ ! 1 Newton is 1 kg * 1 m / (1 second)^2
+ ! thus 1 Newton = 100,000 dynes
+ ! therefore 1 dyne.cm = 1e-7 Newton.m
+ !
+ moment_tensor(:,:) = moment_tensor(:,:) * 1.d-7
+
+ end subroutine get_cmt
+
+! ------------------------------------------------------------------
+
+ integer function julian_day(yr,mo,da)
+
+ implicit none
+
+ integer yr,mo,da
+
+ integer mon(12)
+ integer lpyr
+ data mon /0,31,59,90,120,151,181,212,243,273,304,334/
+
+ julian_day = da + mon(mo)
+ if(mo>2) julian_day = julian_day + lpyr(yr)
+
+ end function julian_day
+
+! ------------------------------------------------------------------
+
+ integer function lpyr(yr)
+
+ implicit none
+
+ integer yr
+!
+!---- returns 1 if leap year
+!
+ lpyr=0
+ if(mod(yr,400) == 0) then
+ lpyr=1
+ else if(mod(yr,4) == 0) then
+ lpyr=1
+ if(mod(yr,100) == 0) lpyr=0
+ endif
+
+ end function lpyr
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_jacobian_boundaries.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_jacobian_boundaries.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_jacobian_boundaries.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,933 +1,933 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-
- subroutine get_jacobian_boundary_face(myrank,nspec, &
- xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
- dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
- ispec,iface,jacobian2Dw_face,normal_face,NGLLA,NGLLB)
-
-! returns jacobian2Dw_face and normal_face (pointing outwards of element)
-
- implicit none
-
- include "constants.h"
-
- integer nspec,myrank,nglob
-
-! arrays with the mesh
- integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
- real(kind=CUSTOM_REAL) :: xstore_dummy(nglob),ystore_dummy(nglob),zstore_dummy(nglob)
-
-! face information
- integer :: iface,ispec,NGLLA,NGLLB
- real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
- real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
-
- double precision dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ)
- double precision dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ)
- double precision dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY)
- double precision dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY)
-
- double precision, dimension(NGLLX,NGLLY) :: wgllwgll_xy
- double precision, dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- double precision, dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! local parameters
-! face corners
- double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
-
-! check that the parameter file is correct
- if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
- if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
-
- select case ( iface )
- ! on reference face: xmin
- case(1)
- xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
- yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
- zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
- xelm(2)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
- yelm(2)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
- zelm(2)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
- xelm(3)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
- yelm(3)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
- zelm(3)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
- xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
- yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
- zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
-
- call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
- dershape2D_x,wgllwgll_yz, &
- jacobian2Dw_face,normal_face,NGLLY,NGLLZ)
-
-! on boundary: xmax
- case(2)
- xelm(1)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
- yelm(1)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
- zelm(1)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
- xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
- yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
- zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
- xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
- yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
- zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
- xelm(4)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
- yelm(4)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
- zelm(4)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-
- call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
- dershape2D_x,wgllwgll_yz, &
- jacobian2Dw_face,normal_face,NGLLY,NGLLZ)
-
-! on boundary: ymin
- case(3)
- xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
- yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
- zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
- xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
- yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
- zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
- xelm(3)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
- yelm(3)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
- zelm(3)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
- xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
- yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
- zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
-
- call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
- dershape2D_y,wgllwgll_xz, &
- jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
-
-! on boundary: ymax
- case(4)
- xelm(1)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
- yelm(1)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
- zelm(1)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
- xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
- yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
- zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
- xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
- yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
- zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
- xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
- yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
- zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-
- call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
- dershape2D_y, wgllwgll_xz, &
- jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
-
-
-! on boundary: bottom
- case(5)
- xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
- yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
- zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
- xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
- yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
- zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
- xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
- yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
- zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
- xelm(4)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
- yelm(4)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
- zelm(4)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
-
- call compute_jacobian_2D_face(myrank,xelm,yelm,zelm,&
- dershape2D_bottom,wgllwgll_xy, &
- jacobian2Dw_face,normal_face,NGLLX,NGLLY)
-
-! on boundary: top
- case(6)
- xelm(1)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
- yelm(1)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
- zelm(1)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
- xelm(2)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
- yelm(2)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
- zelm(2)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
- xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
- yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
- zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
- xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
- yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
- zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-
- call compute_jacobian_2D_face(myrank,xelm,yelm,zelm,&
- dershape2D_top, wgllwgll_xy, &
- jacobian2Dw_face,normal_face,NGLLX,NGLLY)
-
- case default
- stop 'error 2D jacobian'
- end select
-
- end subroutine get_jacobian_boundary_face
-
-
-! -------------------------------------------------------
-
- subroutine compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
- dershape2D,wgllwgll, &
- jacobian2Dw_face,normal_face,NGLLA,NGLLB)
-
- implicit none
-
- include "constants.h"
-
-! generic routine that accepts any polynomial degree in each direction
-! returns 2D jacobian and normal for this face only
-
- integer NGLLA,NGLLB,myrank
-
- double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
- double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
- double precision wgllwgll(NGLLA,NGLLB)
-
- real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
- real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
-
- integer i,j,ia
- double precision xxi,xeta,yxi,yeta,zxi,zeta
- double precision unx,uny,unz,jacobian
-
- do j=1,NGLLB
- do i=1,NGLLA
-
- xxi=ZERO
- xeta=ZERO
- yxi=ZERO
- yeta=ZERO
- zxi=ZERO
- zeta=ZERO
- do ia=1,NGNOD2D
- xxi=xxi+dershape2D(1,ia,i,j)*xelm(ia)
- xeta=xeta+dershape2D(2,ia,i,j)*xelm(ia)
- yxi=yxi+dershape2D(1,ia,i,j)*yelm(ia)
- yeta=yeta+dershape2D(2,ia,i,j)*yelm(ia)
- zxi=zxi+dershape2D(1,ia,i,j)*zelm(ia)
- zeta=zeta+dershape2D(2,ia,i,j)*zelm(ia)
- enddo
-
-! calculate the unnormalized normal to the boundary
- unx=yxi*zeta-yeta*zxi
- uny=zxi*xeta-zeta*xxi
- unz=xxi*yeta-xeta*yxi
- jacobian=dsqrt(unx**2+uny**2+unz**2)
- if(jacobian == ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
-
-! normalize normal vector and store weighted surface jacobian
-
-! distinguish if single or double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- jacobian2Dw_face(i,j) = sngl(jacobian * wgllwgll(i,j) )
- normal_face(1,i,j)=sngl(unx/jacobian)
- normal_face(2,i,j)=sngl(uny/jacobian)
- normal_face(3,i,j)=sngl(unz/jacobian)
- else
- jacobian2Dw_face(i,j) = jacobian * wgllwgll(i,j)
- normal_face(1,i,j)=unx/jacobian
- normal_face(2,i,j)=uny/jacobian
- normal_face(3,i,j)=unz/jacobian
- endif
-
- enddo
- enddo
-
- end subroutine compute_jacobian_2D_face
-
-
-! This subroutine recompute the 3D jacobian for one element
-! based upon 125 GLL points
-! Hejun Zhu OCT16,2009
-
-! input: myrank,
-! xstore,ystore,zstore ----- input position
-! xigll,yigll,zigll ----- gll points position
-! ispec,nspec ----- element number
-! ACTUALLY_STORE_ARRAYS ------ save array or not
-
-! output: xixstore,xiystore,xizstore,
-! etaxstore,etaystore,etazstore,
-! gammaxstore,gammaystore,gammazstore ------ parameters used for calculating jacobian
-
-
- subroutine recalc_jacobian_gll2D(myrank,xstore,ystore,zstore, &
- xigll,yigll,wgllwgll,NGLLA,NGLLB, &
- ispec,nspec,jacobian2Dw_face,normal_face)
-
- implicit none
-
- include "constants.h"
-
- ! input parameter
- integer::myrank,ispec,nspec
- double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
-
- integer :: NGLLA,NGLLB
- double precision, dimension(NGLLA):: xigll
- double precision, dimension(NGLLB):: yigll
- double precision:: wgllwgll(NGLLA,NGLLB)
-
- real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
- real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
-
- ! other parameters for this subroutine
- integer:: i,j,k,i1,j1,k1
- double precision:: xxi,xeta,yxi,yeta,zxi,zeta
- double precision:: xi,eta
- double precision,dimension(NGLLA):: hxir,hpxir
- double precision,dimension(NGLLB):: hetar,hpetar
- double precision:: hlagrange,hlagrange_xi,hlagrange_eta
- double precision:: jacobian
- double precision:: unx,uny,unz
-
-
-
- ! test parameters which can be deleted
- double precision:: xmesh,ymesh,zmesh
- double precision:: sumshape,sumdershapexi,sumdershapeeta
-
- ! first go over all gll points on face
- k=1
- do j=1,NGLLB
- do i=1,NGLLA
-
- xxi = 0.0
- xeta = 0.0
- yxi = 0.0
- yeta = 0.0
- zxi = 0.0
- zeta = 0.0
-
- xi = xigll(i)
- eta = yigll(j)
-
- ! calculate lagrange polynomial and its derivative
- call lagrange_any(xi,NGLLA,xigll,hxir,hpxir)
- call lagrange_any(eta,NGLLB,yigll,hetar,hpetar)
-
- ! test parameters
- sumshape = 0.0
- sumdershapexi = 0.0
- sumdershapeeta = 0.0
- xmesh = 0.0
- ymesh = 0.0
- zmesh = 0.0
-
- k1=1
- do j1 = 1,NGLLB
- do i1 = 1,NGLLA
- hlagrange = hxir(i1)*hetar(j1)
- hlagrange_xi = hpxir(i1)*hetar(j1)
- hlagrange_eta = hxir(i1)*hpetar(j1)
-
-
- xxi = xxi + xstore(i1,j1,k1,ispec)*hlagrange_xi
- xeta = xeta + xstore(i1,j1,k1,ispec)*hlagrange_eta
-
- yxi = yxi + ystore(i1,j1,k1,ispec)*hlagrange_xi
- yeta = yeta + ystore(i1,j1,k1,ispec)*hlagrange_eta
-
- zxi = zxi + zstore(i1,j1,k1,ispec)*hlagrange_xi
- zeta = zeta + zstore(i1,j1,k1,ispec)*hlagrange_eta
-
- ! test the lagrange polynomial and its derivate
- xmesh = xmesh + xstore(i1,j1,k1,ispec)*hlagrange
- ymesh = ymesh + ystore(i1,j1,k1,ispec)*hlagrange
- zmesh = zmesh + zstore(i1,j1,k1,ispec)*hlagrange
- sumshape = sumshape + hlagrange
- sumdershapexi = sumdershapexi + hlagrange_xi
- sumdershapeeta = sumdershapeeta + hlagrange_eta
-
- end do
- end do
-
- ! Check the lagrange polynomial and its derivative
- if (xmesh /=xstore(i,j,k,ispec).or.ymesh/=ystore(i,j,k,ispec).or.zmesh/=zstore(i,j,k,ispec)) then
- call exit_MPI(myrank,'new mesh positions are wrong in recalc_jacobian_gall3D.f90')
- end if
- if(abs(sumshape-one) > TINYVAL) then
- call exit_MPI(myrank,'error shape functions in recalc_jacobian_gll3D.f90')
- end if
- if(abs(sumdershapexi) > TINYVAL) then
- call exit_MPI(myrank,'error derivative xi shape functions in recalc_jacobian_gll3D.f90')
- end if
- if(abs(sumdershapeeta) > TINYVAL) then
- call exit_MPI(myrank,'error derivative eta shape functions in recalc_jacobian_gll3D.f90')
- end if
-
-! calculate the unnormalized normal to the boundary
- unx=yxi*zeta-yeta*zxi
- uny=zxi*xeta-zeta*xxi
- unz=xxi*yeta-xeta*yxi
- jacobian=dsqrt(unx**2+uny**2+unz**2)
- if(jacobian <= ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
-
-! normalize normal vector and store weighted surface jacobian
-
-! distinguish if single or double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- jacobian2Dw_face(i,j) = sngl(jacobian * wgllwgll(i,j) )
- normal_face(1,i,j)=sngl(unx/jacobian)
- normal_face(2,i,j)=sngl(uny/jacobian)
- normal_face(3,i,j)=sngl(unz/jacobian)
- else
- jacobian2Dw_face(i,j) = jacobian * wgllwgll(i,j)
- normal_face(1,i,j)=unx/jacobian
- normal_face(2,i,j)=uny/jacobian
- normal_face(3,i,j)=unz/jacobian
- endif
-
- enddo
- enddo
-
- end subroutine recalc_jacobian_gll2D
-
-!
-!------------------------------------------------------------------------------------------------
-!
-!
-! subroutine get_jacobian_boundaries(myrank,iboun,nspec, &
-! xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
-! dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
-! wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
-! ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
-! xcoord_iboun,ycoord_iboun,zcoord_iboun, &
-! nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
-! jacobian2D_xmin,jacobian2D_xmax, &
-! jacobian2D_ymin,jacobian2D_ymax, &
-! jacobian2D_bottom,jacobian2D_top, &
-! normal_xmin,normal_xmax, &
-! normal_ymin,normal_ymax, &
-! normal_bottom,normal_top, &
-! NSPEC2D_BOTTOM,NSPEC2D_TOP)
-!
-! implicit none
-!
-! include "constants.h"
-!
-! integer nspec,myrank,nglob
-!
-!! arrays with the mesh
-! integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-! real(kind=CUSTOM_REAL) :: xstore_dummy(nglob),ystore_dummy(nglob),zstore_dummy(nglob)
-!
-!
-!! absorbing boundaries
-!! (careful with array bounds, no need for NSPEC2DMAX_XMIN_XMAX & NSPEC2DMAX_YMIN_YMAX anymore)
-! integer :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, NSPEC2D_BOTTOM, NSPEC2D_TOP
-! integer, dimension(nspec2D_xmin) :: ibelm_xmin
-! integer, dimension(nspec2D_xmax) :: ibelm_xmax
-! integer, dimension(nspec2D_ymin) :: ibelm_ymin
-! integer, dimension(nspec2D_ymax) :: ibelm_ymax
-! integer, dimension(NSPEC2D_BOTTOM) :: ibelm_bottom
-! integer, dimension(NSPEC2D_TOP) :: ibelm_top
-!
-! logical iboun(6,nspec)
-! real(kind=CUSTOM_REAL), dimension(NGNOD2D,6,nspec) :: xcoord_iboun,ycoord_iboun,zcoord_iboun
-!
-!! double precision xstore(NGLLX,NGLLY,NGLLZ,nspec)
-!! double precision ystore(NGLLX,NGLLY,NGLLZ,nspec)
-!! double precision zstore(NGLLX,NGLLY,NGLLZ,nspec)
-!
-! real(kind=CUSTOM_REAL) jacobian2D_xmin(NGLLY,NGLLZ,NSPEC2D_xmin)
-! real(kind=CUSTOM_REAL) jacobian2D_xmax(NGLLY,NGLLZ,NSPEC2D_xmax)
-! real(kind=CUSTOM_REAL) jacobian2D_ymin(NGLLX,NGLLZ,NSPEC2D_ymin)
-! real(kind=CUSTOM_REAL) jacobian2D_ymax(NGLLX,NGLLZ,NSPEC2D_ymax)
-! real(kind=CUSTOM_REAL) jacobian2D_bottom(NGLLX,NGLLY,NSPEC2D_BOTTOM)
-! real(kind=CUSTOM_REAL) jacobian2D_top(NGLLX,NGLLY,NSPEC2D_TOP)
-!
-! real(kind=CUSTOM_REAL) normal_xmin(NDIM,NGLLY,NGLLZ,NSPEC2D_xmin)
-! real(kind=CUSTOM_REAL) normal_xmax(NDIM,NGLLY,NGLLZ,NSPEC2D_xmax)
-! real(kind=CUSTOM_REAL) normal_ymin(NDIM,NGLLX,NGLLZ,NSPEC2D_ymin)
-! real(kind=CUSTOM_REAL) normal_ymax(NDIM,NGLLX,NGLLZ,NSPEC2D_ymax)
-! real(kind=CUSTOM_REAL) normal_bottom(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM)
-! real(kind=CUSTOM_REAL) normal_top(NDIM,NGLLX,NGLLY,NSPEC2D_TOP)
-!
-! double precision dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ)
-! double precision dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ)
-! double precision dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY)
-! double precision dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY)
-!
-! double precision, dimension(NGLLX,NGLLY) :: wgllwgll_xy
-! double precision, dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-! double precision, dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-!
-! double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
-!
-!! element numbering
-! integer ispec,i,j
-!
-!! counters to keep track of number of elements on each of the boundaries
-! integer ispecb1,ispecb2,ispecb3,ispecb4,ispecb5,ispecb6
-!
-!
-!! check that the parameter file is correct
-! if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
-! if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
-!
-! ispecb1 = 0
-! ispecb2 = 0
-! ispecb3 = 0
-! ispecb4 = 0
-! ispecb5 = 0
-! ispecb6 = 0
-!
-! do ispec=1,nspec
-!
-!! determine if the element falls on a boundary
-!
-!! on boundary: xmin
-!
-! if(iboun(1,ispec)) then
-!
-! ispecb1=ispecb1+1
-! ibelm_xmin(ispecb1)=ispec
-!
-!! specify the 4 nodes for the 2-D boundary element
-!! i.e. face (0,0,0),(0,1,0),(0,1,1),(0,0,1)
-!
-!! careful: these points may not be on the xmin face for unstructured grids
-!! xelm(1)=xstore(1,1,1,ispec)
-!! yelm(1)=ystore(1,1,1,ispec)
-!! zelm(1)=zstore(1,1,1,ispec)
-!! xelm(2)=xstore(1,NGLLY,1,ispec)
-!! yelm(2)=ystore(1,NGLLY,1,ispec)
-!! zelm(2)=zstore(1,NGLLY,1,ispec)
-!! xelm(3)=xstore(1,NGLLY,NGLLZ,ispec)
-!! yelm(3)=ystore(1,NGLLY,NGLLZ,ispec)
-!! zelm(3)=zstore(1,NGLLY,NGLLZ,ispec)
-!! xelm(4)=xstore(1,1,NGLLZ,ispec)
-!! yelm(4)=ystore(1,1,NGLLZ,ispec)
-!! zelm(4)=zstore(1,1,NGLLZ,ispec)
-!
-! xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
-! yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
-! zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
-! xelm(2)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
-! yelm(2)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
-! zelm(2)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
-! xelm(3)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-! yelm(3)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-! zelm(3)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-! xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
-! yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
-! zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
-!
-!! takes coordinates from boundary faces
-!! do i=1,NGNOD2D
-!! xelm(i) = xcoord_iboun(i,1,ispec)
-!! yelm(i) = ycoord_iboun(i,1,ispec)
-!! zelm(i) = zcoord_iboun(i,1,ispec)
-!! enddo
-!
-! call compute_jacobian_2D(myrank,ispecb1,xelm,yelm,zelm, &
-! dershape2D_x,wgllwgll_yz, &
-! jacobian2D_xmin,normal_xmin,NGLLY,NGLLZ,NSPEC2D_xmin)
-!
-! ! normal convention: points away from element
-! ! switches normal direction if necessary
-! do i=1,NGLLY
-! do j=1,NGLLZ
-! call get_element_face_normal(ispecb1, 1, xelm,yelm,zelm, &
-! ibool,nspec,nglob, &
-! xstore_dummy,ystore_dummy,zstore_dummy, &
-! normal_xmin(:,i,j,ispecb1) )
-! enddo
-! enddo
-!
-! endif
-!
-!! on boundary: xmax
-!
-! if(iboun(2,ispec)) then
-!
-! ispecb2=ispecb2+1
-! ibelm_xmax(ispecb2)=ispec
-!
-!! careful...
-!! specify the 4 nodes for the 2-D boundary element
-!! xelm(1)=xstore(NGLLX,1,1,ispec)
-!! yelm(1)=ystore(NGLLX,1,1,ispec)
-!! zelm(1)=zstore(NGLLX,1,1,ispec)
-!! xelm(2)=xstore(NGLLX,NGLLY,1,ispec)
-!! yelm(2)=ystore(NGLLX,NGLLY,1,ispec)
-!! zelm(2)=zstore(NGLLX,NGLLY,1,ispec)
-!! xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
-!! yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
-!! zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
-!! xelm(4)=xstore(NGLLX,1,NGLLZ,ispec)
-!! yelm(4)=ystore(NGLLX,1,NGLLZ,ispec)
-!! zelm(4)=zstore(NGLLX,1,NGLLZ,ispec)
-!
-! xelm(1)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
-! yelm(1)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
-! zelm(1)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
-! xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-! yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-! zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-! xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-! yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-! zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-! xelm(4)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-! yelm(4)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-! zelm(4)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-!
-!! takes coordinates from boundary faces
-!! do i=1,NGNOD2D
-!! xelm(i) = xcoord_iboun(i,2,ispec)
-!! yelm(i) = ycoord_iboun(i,2,ispec)
-!! zelm(i) = zcoord_iboun(i,2,ispec)
-!! enddo
-!
-! call compute_jacobian_2D(myrank,ispecb2,xelm,yelm,zelm, &
-! dershape2D_x,wgllwgll_yz, &
-! jacobian2D_xmax,normal_xmax,NGLLY,NGLLZ,NSPEC2D_xmax)
-!
-! ! normal convention: points away from element
-! ! switch normal direction if necessary
-! do i=1,NGLLY
-! do j=1,NGLLZ
-! call get_element_face_normal(ispecb2, 2, xelm,yelm,zelm, &
-! ibool,nspec,nglob, &
-! xstore_dummy,ystore_dummy,zstore_dummy, &
-! normal_xmax(:,i,j,ispecb2) )
-! enddo
-! enddo
-!
-! endif
-!
-!! on boundary: ymin
-!
-! if(iboun(3,ispec)) then
-!
-! ispecb3=ispecb3+1
-! ibelm_ymin(ispecb3)=ispec
-!
-!! careful...
-!! specify the 4 nodes for the 2-D boundary element
-!! xelm(1)=xstore(1,1,1,ispec)
-!! yelm(1)=ystore(1,1,1,ispec)
-!! zelm(1)=zstore(1,1,1,ispec)
-!! xelm(2)=xstore(NGLLX,1,1,ispec)
-!! yelm(2)=ystore(NGLLX,1,1,ispec)
-!! zelm(2)=zstore(NGLLX,1,1,ispec)
-!! xelm(3)=xstore(NGLLX,1,NGLLZ,ispec)
-!! yelm(3)=ystore(NGLLX,1,NGLLZ,ispec)
-!! zelm(3)=zstore(NGLLX,1,NGLLZ,ispec)
-!! xelm(4)=xstore(1,1,NGLLZ,ispec)
-!! yelm(4)=ystore(1,1,NGLLZ,ispec)
-!! zelm(4)=zstore(1,1,NGLLZ,ispec)
-!
-! xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
-! yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
-! zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
-! xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
-! yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
-! zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
-! xelm(3)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-! yelm(3)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-! zelm(3)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
-! xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
-! yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
-! zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
-!
-!! takes coordinates from boundary faces
-!! do i=1,NGNOD2D
-!! xelm(i) = xcoord_iboun(i,3,ispec)
-!! yelm(i) = ycoord_iboun(i,3,ispec)
-!! zelm(i) = zcoord_iboun(i,3,ispec)
-!! enddo
-!
-! call compute_jacobian_2D(myrank,ispecb3,xelm,yelm,zelm, &
-! dershape2D_y,wgllwgll_xz, &
-! jacobian2D_ymin,normal_ymin,NGLLX,NGLLZ,NSPEC2D_ymin)
-!
-! ! normal convention: points away from element
-! ! switch normal direction if necessary
-! do i=1,NGLLX
-! do j=1,NGLLZ
-! call get_element_face_normal(ispecb3, 3, xelm,yelm,zelm, &
-! ibool,nspec,nglob, &
-! xstore_dummy,ystore_dummy,zstore_dummy, &
-! normal_ymin(:,i,j,ispecb3) )
-! enddo
-! enddo
-!
-!
-! endif
-!
-!! on boundary: ymax
-!
-! if(iboun(4,ispec)) then
-!
-! ispecb4=ispecb4+1
-! ibelm_ymax(ispecb4)=ispec
-!
-!!careful...
-!! specify the 4 nodes for the 2-D boundary element
-!! xelm(1)=xstore(1,NGLLY,1,ispec)
-!! yelm(1)=ystore(1,NGLLY,1,ispec)
-!! zelm(1)=zstore(1,NGLLY,1,ispec)
-!! xelm(2)=xstore(NGLLX,NGLLY,1,ispec)
-!! yelm(2)=ystore(NGLLX,NGLLY,1,ispec)
-!! zelm(2)=zstore(NGLLX,NGLLY,1,ispec)
-!! xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
-!! yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
-!! zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
-!! xelm(4)=xstore(1,NGLLY,NGLLZ,ispec)
-!! yelm(4)=ystore(1,NGLLY,NGLLZ,ispec)
-!! zelm(4)=zstore(1,NGLLY,NGLLZ,ispec)
-!
-! xelm(1)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
-! yelm(1)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
-! zelm(1)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
-! xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-! yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-! zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-! xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-! yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-! zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
-! xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-! yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-! zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
-!
-!! takes coordinates from boundary faces
-!! do i=1,NGNOD2D
-!! xelm(i) = xcoord_iboun(i,4,ispec)
-!! yelm(i) = ycoord_iboun(i,4,ispec)
-!! zelm(i) = zcoord_iboun(i,4,ispec)
-!! enddo
-!!
-! call compute_jacobian_2D(myrank,ispecb4,xelm,yelm,zelm, &
-! dershape2D_y, wgllwgll_xz, &
-! jacobian2D_ymax,normal_ymax,NGLLX,NGLLZ,NSPEC2D_ymax)
-!
-! ! normal convention: points away from element
-! ! switch normal direction if necessary
-! do i=1,NGLLX
-! do j=1,NGLLZ
-! call get_element_face_normal(ispecb4, 4, xelm,yelm,zelm, &
-! ibool,nspec,nglob, &
-! xstore_dummy,ystore_dummy,zstore_dummy, &
-! normal_ymax(:,i,j,ispecb4) )
-! enddo
-! enddo
-!
-! endif
-!
-!! on boundary: bottom
-!
-! if(iboun(5,ispec)) then
-!
-! ispecb5=ispecb5+1
-! ibelm_bottom(ispecb5)=ispec
-!
-!! careful...
-!! for bottom, this might be actually working... when mesh is oriented along z direction...
-!! xelm(1)=xstore(1,1,1,ispec)
-!! yelm(1)=ystore(1,1,1,ispec)
-!! zelm(1)=zstore(1,1,1,ispec)
-!! xelm(2)=xstore(NGLLX,1,1,ispec)
-!! yelm(2)=ystore(NGLLX,1,1,ispec)
-!! zelm(2)=zstore(NGLLX,1,1,ispec)
-!! xelm(3)=xstore(NGLLX,NGLLY,1,ispec)
-!! yelm(3)=ystore(NGLLX,NGLLY,1,ispec)
-!! zelm(3)=zstore(NGLLX,NGLLY,1,ispec)
-!! xelm(4)=xstore(1,NGLLY,1,ispec)
-!! yelm(4)=ystore(1,NGLLY,1,ispec)
-!! zelm(4)=zstore(1,NGLLY,1,ispec)
-!
-! xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
-! yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
-! zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
-! xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
-! yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
-! zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
-! xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-! yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-! zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
-! xelm(4)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
-! yelm(4)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
-! zelm(4)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
-!
-!
-!! takes coordinates from boundary faces
-!! do i=1,NGNOD2D
-!! xelm(i) = xcoord_iboun(i,5,ispec)
-!! yelm(i) = ycoord_iboun(i,5,ispec)
-!! zelm(i) = zcoord_iboun(i,5,ispec)
-!! enddo
-!
-! call compute_jacobian_2D(myrank,ispecb5,xelm,yelm,zelm,&
-! dershape2D_bottom,wgllwgll_xy, &
-! jacobian2D_bottom,normal_bottom,NGLLX,NGLLY,NSPEC2D_BOTTOM)
-!
-! ! normal convention: points away from element
-! ! switch normal direction if necessary
-! do i=1,NGLLX
-! do j=1,NGLLY
-! call get_element_face_normal(ispecb5, 5, xelm,yelm,zelm, &
-! ibool,nspec,nglob, &
-! xstore_dummy,ystore_dummy,zstore_dummy, &
-! normal_bottom(:,i,j,ispecb5) )
-! enddo
-! enddo
-!
-! endif
-!
-!! on boundary: top
-!
-! if(iboun(6,ispec)) then
-!
-! ispecb6=ispecb6+1
-! ibelm_top(ispecb6)=ispec
-!
-!! careful...
-!! for top, this might be working as well ... when mesh is oriented along z direction...
-!! xelm(1)=xstore(1,1,NGLLZ,ispec)
-!! yelm(1)=ystore(1,1,NGLLZ,ispec)
-!! zelm(1)=zstore(1,1,NGLLZ,ispec)
-!! xelm(2)=xstore(NGLLX,1,NGLLZ,ispec)
-!! yelm(2)=ystore(NGLLX,1,NGLLZ,ispec)
-!! zelm(2)=zstore(NGLLX,1,NGLLZ,ispec)
-!! xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
-!! yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
-!! zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
-!! xelm(4)=xstore(1,NGLLY,NGLLZ,ispec)
-!! yelm(4)=ystore(1,NGLLY,NGLLZ,ispec)
-!! zelm(4)=zstore(1,NGLLY,NGLLZ,ispec)
-!
-!
-!! takes coordinates from boundary faces
-!! do i=1,NGNOD2D
-!! xelm(i) = xcoord_iboun(i,6,ispec)
-!! yelm(i) = ycoord_iboun(i,6,ispec)
-!! zelm(i) = zcoord_iboun(i,6,ispec)
-!! enddo
-!
-! call compute_jacobian_2D(myrank,ispecb6,xelm,yelm,zelm,&
-! dershape2D_top, wgllwgll_xy, &
-! jacobian2D_top,normal_top,NGLLX,NGLLY,NSPEC2D_TOP)
-!
-! ! normal convention: points away from element
-! ! switch normal direction if necessary
-! do i=1,NGLLX
-! do j=1,NGLLY
-! call get_element_face_normal(ispecb6, 6, xelm,yelm,zelm, &
-! ibool,nspec,nglob, &
-! xstore_dummy,ystore_dummy,zstore_dummy, &
-! normal_top(:,i,j,ispecb6) )
-! enddo
-! enddo
-!
-! endif
-!
-! enddo
-!
-!! check theoretical value of elements
-!! if(ispecb1 /= NSPEC2D_xmin) call exit_MPI(myrank,'ispecb1 should equal NSPEC2D_xmin')
-!! if(ispecb2 /= NSPEC2D_xmax) call exit_MPI(myrank,'ispecb2 should equal NSPEC2D_xmax')
-!! if(ispecb3 /= NSPEC2D_ymin) call exit_MPI(myrank,'ispecb3 should equal NSPEC2D_ymin')
-!! if(ispecb4 /= NSPEC2D_ymax) call exit_MPI(myrank,'ispecb4 should equal NSPEC2D_ymax')
-!! if(ispecb5 /= NSPEC2D_BOTTOM) call exit_MPI(myrank,'ispecb5 should equal NSPEC2D_BOTTOM')
-!! if(ispecb6 /= NSPEC2D_TOP) call exit_MPI(myrank,'ispecb6 should equal NSPEC2D_TOP')
-!
-! end subroutine get_jacobian_boundaries
-!
-!! -------------------------------------------------------
-!
-! subroutine compute_jacobian_2D(myrank,ispecb,xelm,yelm,zelm, &
-! dershape2D,wgllwgll, &
-! jacobian2D,normal, &
-! NGLLA,NGLLB,NSPEC2DMAX_AB)
-!
-! implicit none
-!
-! include "constants.h"
-!
-!! generic routine that accepts any polynomial degree in each direction
-!
-! integer ispecb,NGLLA,NGLLB,NSPEC2DMAX_AB,myrank
-!
-! double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
-! double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
-! double precision wgllwgll
-!
-! real(kind=CUSTOM_REAL) jacobian2D(NGLLA,NGLLB,NSPEC2DMAX_AB)
-! real(kind=CUSTOM_REAL) normal(3,NGLLA,NGLLB,NSPEC2DMAX_AB)
-!
-! integer i,j,ia
-! double precision xxi,xeta,yxi,yeta,zxi,zeta
-! double precision unx,uny,unz,jacobian
-!
-! do j=1,NGLLB
-! do i=1,NGLLA
-!
-! xxi=ZERO
-! xeta=ZERO
-! yxi=ZERO
-! yeta=ZERO
-! zxi=ZERO
-! zeta=ZERO
-! do ia=1,NGNOD2D
-! xxi=xxi+dershape2D(1,ia,i,j)*xelm(ia)
-! xeta=xeta+dershape2D(2,ia,i,j)*xelm(ia)
-! yxi=yxi+dershape2D(1,ia,i,j)*yelm(ia)
-! yeta=yeta+dershape2D(2,ia,i,j)*yelm(ia)
-! zxi=zxi+dershape2D(1,ia,i,j)*zelm(ia)
-! zeta=zeta+dershape2D(2,ia,i,j)*zelm(ia)
-! enddo
-!
-!! calculate the unnormalized normal to the boundary
-! unx=yxi*zeta-yeta*zxi
-! uny=zxi*xeta-zeta*xxi
-! unz=xxi*yeta-xeta*yxi
-! jacobian=dsqrt(unx**2+uny**2+unz**2)
-! if(jacobian == ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
-!
-!! normalize normal vector and store weighted surface jacobian
-!
-!! distinguish if single or double precision for reals
-! if(CUSTOM_REAL == SIZE_REAL) then
-! jacobian2D(i,j,ispecb) = sngl(jacobian * wgllwgll(i,j) )
-! normal(1,i,j,ispecb)=sngl(unx/jacobian)
-! normal(2,i,j,ispecb)=sngl(uny/jacobian)
-! normal(3,i,j,ispecb)=sngl(unz/jacobian)
-! else
-! jacobian2D(i,j,ispecb) = jacobian * wgllwgll(i,j)
-! normal(1,i,j,ispecb)=unx/jacobian
-! normal(2,i,j,ispecb)=uny/jacobian
-! normal(3,i,j,ispecb)=unz/jacobian
-! endif
-!
-! enddo
-! enddo
-!
-! end subroutine compute_jacobian_2D
-!
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+ subroutine get_jacobian_boundary_face(myrank,nspec, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
+ dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
+ ispec,iface,jacobian2Dw_face,normal_face,NGLLA,NGLLB)
+
+! returns jacobian2Dw_face and normal_face (pointing outwards of element)
+
+ implicit none
+
+ include "constants.h"
+
+ integer nspec,myrank,nglob
+
+! arrays with the mesh
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+ real(kind=CUSTOM_REAL) :: xstore_dummy(nglob),ystore_dummy(nglob),zstore_dummy(nglob)
+
+! face information
+ integer :: iface,ispec,NGLLA,NGLLB
+ real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
+ real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
+
+ double precision dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ)
+ double precision dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ)
+ double precision dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY)
+ double precision dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY)
+
+ double precision, dimension(NGLLX,NGLLY) :: wgllwgll_xy
+ double precision, dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+ double precision, dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+! local parameters
+! face corners
+ double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
+
+! check that the parameter file is correct
+ if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
+ if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
+
+ select case ( iface )
+ ! on reference face: xmin
+ case(1)
+ xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+ yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+ zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+ xelm(2)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+ yelm(2)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+ zelm(2)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+ xelm(3)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+ yelm(3)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+ zelm(3)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+ xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
+ yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
+ zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
+
+ call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
+ dershape2D_x,wgllwgll_yz, &
+ jacobian2Dw_face,normal_face,NGLLY,NGLLZ)
+
+! on boundary: xmax
+ case(2)
+ xelm(1)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+ yelm(1)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+ zelm(1)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+ xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+ yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+ zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+ xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+ yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+ zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+ xelm(4)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+ yelm(4)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+ zelm(4)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+
+ call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
+ dershape2D_x,wgllwgll_yz, &
+ jacobian2Dw_face,normal_face,NGLLY,NGLLZ)
+
+! on boundary: ymin
+ case(3)
+ xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+ yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+ zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+ xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+ yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+ zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+ xelm(3)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+ yelm(3)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+ zelm(3)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+ xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
+ yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
+ zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
+
+ call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
+ dershape2D_y,wgllwgll_xz, &
+ jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
+
+! on boundary: ymax
+ case(4)
+ xelm(1)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+ yelm(1)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+ zelm(1)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+ xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+ yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+ zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+ xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+ yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+ zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+ xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+ yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+ zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+
+ call compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
+ dershape2D_y, wgllwgll_xz, &
+ jacobian2Dw_face,normal_face,NGLLX,NGLLZ)
+
+
+! on boundary: bottom
+ case(5)
+ xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+ yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+ zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+ xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+ yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+ zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+ xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+ yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+ zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+ xelm(4)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+ yelm(4)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+ zelm(4)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+
+ call compute_jacobian_2D_face(myrank,xelm,yelm,zelm,&
+ dershape2D_bottom,wgllwgll_xy, &
+ jacobian2Dw_face,normal_face,NGLLX,NGLLY)
+
+! on boundary: top
+ case(6)
+ xelm(1)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
+ yelm(1)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
+ zelm(1)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
+ xelm(2)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+ yelm(2)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+ zelm(2)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+ xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+ yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+ zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+ xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+ yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+ zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+
+ call compute_jacobian_2D_face(myrank,xelm,yelm,zelm,&
+ dershape2D_top, wgllwgll_xy, &
+ jacobian2Dw_face,normal_face,NGLLX,NGLLY)
+
+ case default
+ stop 'error 2D jacobian'
+ end select
+
+ end subroutine get_jacobian_boundary_face
+
+
+! -------------------------------------------------------
+
+ subroutine compute_jacobian_2D_face(myrank,xelm,yelm,zelm, &
+ dershape2D,wgllwgll, &
+ jacobian2Dw_face,normal_face,NGLLA,NGLLB)
+
+ implicit none
+
+ include "constants.h"
+
+! generic routine that accepts any polynomial degree in each direction
+! returns 2D jacobian and normal for this face only
+
+ integer NGLLA,NGLLB,myrank
+
+ double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
+ double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
+ double precision wgllwgll(NGLLA,NGLLB)
+
+ real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
+ real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
+
+ integer i,j,ia
+ double precision xxi,xeta,yxi,yeta,zxi,zeta
+ double precision unx,uny,unz,jacobian
+
+ do j=1,NGLLB
+ do i=1,NGLLA
+
+ xxi=ZERO
+ xeta=ZERO
+ yxi=ZERO
+ yeta=ZERO
+ zxi=ZERO
+ zeta=ZERO
+ do ia=1,NGNOD2D
+ xxi=xxi+dershape2D(1,ia,i,j)*xelm(ia)
+ xeta=xeta+dershape2D(2,ia,i,j)*xelm(ia)
+ yxi=yxi+dershape2D(1,ia,i,j)*yelm(ia)
+ yeta=yeta+dershape2D(2,ia,i,j)*yelm(ia)
+ zxi=zxi+dershape2D(1,ia,i,j)*zelm(ia)
+ zeta=zeta+dershape2D(2,ia,i,j)*zelm(ia)
+ enddo
+
+! calculate the unnormalized normal to the boundary
+ unx=yxi*zeta-yeta*zxi
+ uny=zxi*xeta-zeta*xxi
+ unz=xxi*yeta-xeta*yxi
+ jacobian=dsqrt(unx**2+uny**2+unz**2)
+ if(jacobian == ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
+
+! normalize normal vector and store weighted surface jacobian
+
+! distinguish if single or double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ jacobian2Dw_face(i,j) = sngl(jacobian * wgllwgll(i,j) )
+ normal_face(1,i,j)=sngl(unx/jacobian)
+ normal_face(2,i,j)=sngl(uny/jacobian)
+ normal_face(3,i,j)=sngl(unz/jacobian)
+ else
+ jacobian2Dw_face(i,j) = jacobian * wgllwgll(i,j)
+ normal_face(1,i,j)=unx/jacobian
+ normal_face(2,i,j)=uny/jacobian
+ normal_face(3,i,j)=unz/jacobian
+ endif
+
+ enddo
+ enddo
+
+ end subroutine compute_jacobian_2D_face
+
+
+! This subroutine recompute the 3D jacobian for one element
+! based upon 125 GLL points
+! Hejun Zhu OCT16,2009
+
+! input: myrank,
+! xstore,ystore,zstore ----- input position
+! xigll,yigll,zigll ----- gll points position
+! ispec,nspec ----- element number
+! ACTUALLY_STORE_ARRAYS ------ save array or not
+
+! output: xixstore,xiystore,xizstore,
+! etaxstore,etaystore,etazstore,
+! gammaxstore,gammaystore,gammazstore ------ parameters used for calculating jacobian
+
+
+ subroutine recalc_jacobian_gll2D(myrank,xstore,ystore,zstore, &
+ xigll,yigll,wgllwgll,NGLLA,NGLLB, &
+ ispec,nspec,jacobian2Dw_face,normal_face)
+
+ implicit none
+
+ include "constants.h"
+
+ ! input parameter
+ integer::myrank,ispec,nspec
+ double precision, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xstore,ystore,zstore
+
+ integer :: NGLLA,NGLLB
+ double precision, dimension(NGLLA):: xigll
+ double precision, dimension(NGLLB):: yigll
+ double precision:: wgllwgll(NGLLA,NGLLB)
+
+ real(kind=CUSTOM_REAL) jacobian2Dw_face(NGLLA,NGLLB)
+ real(kind=CUSTOM_REAL) normal_face(NDIM,NGLLA,NGLLB)
+
+ ! other parameters for this subroutine
+ integer:: i,j,k,i1,j1,k1
+ double precision:: xxi,xeta,yxi,yeta,zxi,zeta
+ double precision:: xi,eta
+ double precision,dimension(NGLLA):: hxir,hpxir
+ double precision,dimension(NGLLB):: hetar,hpetar
+ double precision:: hlagrange,hlagrange_xi,hlagrange_eta
+ double precision:: jacobian
+ double precision:: unx,uny,unz
+
+
+
+ ! test parameters which can be deleted
+ double precision:: xmesh,ymesh,zmesh
+ double precision:: sumshape,sumdershapexi,sumdershapeeta
+
+ ! first go over all gll points on face
+ k=1
+ do j=1,NGLLB
+ do i=1,NGLLA
+
+ xxi = 0.0
+ xeta = 0.0
+ yxi = 0.0
+ yeta = 0.0
+ zxi = 0.0
+ zeta = 0.0
+
+ xi = xigll(i)
+ eta = yigll(j)
+
+ ! calculate lagrange polynomial and its derivative
+ call lagrange_any(xi,NGLLA,xigll,hxir,hpxir)
+ call lagrange_any(eta,NGLLB,yigll,hetar,hpetar)
+
+ ! test parameters
+ sumshape = 0.0
+ sumdershapexi = 0.0
+ sumdershapeeta = 0.0
+ xmesh = 0.0
+ ymesh = 0.0
+ zmesh = 0.0
+
+ k1=1
+ do j1 = 1,NGLLB
+ do i1 = 1,NGLLA
+ hlagrange = hxir(i1)*hetar(j1)
+ hlagrange_xi = hpxir(i1)*hetar(j1)
+ hlagrange_eta = hxir(i1)*hpetar(j1)
+
+
+ xxi = xxi + xstore(i1,j1,k1,ispec)*hlagrange_xi
+ xeta = xeta + xstore(i1,j1,k1,ispec)*hlagrange_eta
+
+ yxi = yxi + ystore(i1,j1,k1,ispec)*hlagrange_xi
+ yeta = yeta + ystore(i1,j1,k1,ispec)*hlagrange_eta
+
+ zxi = zxi + zstore(i1,j1,k1,ispec)*hlagrange_xi
+ zeta = zeta + zstore(i1,j1,k1,ispec)*hlagrange_eta
+
+ ! test the lagrange polynomial and its derivate
+ xmesh = xmesh + xstore(i1,j1,k1,ispec)*hlagrange
+ ymesh = ymesh + ystore(i1,j1,k1,ispec)*hlagrange
+ zmesh = zmesh + zstore(i1,j1,k1,ispec)*hlagrange
+ sumshape = sumshape + hlagrange
+ sumdershapexi = sumdershapexi + hlagrange_xi
+ sumdershapeeta = sumdershapeeta + hlagrange_eta
+
+ end do
+ end do
+
+ ! Check the lagrange polynomial and its derivative
+ if (xmesh /=xstore(i,j,k,ispec).or.ymesh/=ystore(i,j,k,ispec).or.zmesh/=zstore(i,j,k,ispec)) then
+ call exit_MPI(myrank,'new mesh positions are wrong in recalc_jacobian_gall3D.f90')
+ end if
+ if(abs(sumshape-one) > TINYVAL) then
+ call exit_MPI(myrank,'error shape functions in recalc_jacobian_gll3D.f90')
+ end if
+ if(abs(sumdershapexi) > TINYVAL) then
+ call exit_MPI(myrank,'error derivative xi shape functions in recalc_jacobian_gll3D.f90')
+ end if
+ if(abs(sumdershapeeta) > TINYVAL) then
+ call exit_MPI(myrank,'error derivative eta shape functions in recalc_jacobian_gll3D.f90')
+ end if
+
+! calculate the unnormalized normal to the boundary
+ unx=yxi*zeta-yeta*zxi
+ uny=zxi*xeta-zeta*xxi
+ unz=xxi*yeta-xeta*yxi
+ jacobian=dsqrt(unx**2+uny**2+unz**2)
+ if(jacobian <= ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
+
+! normalize normal vector and store weighted surface jacobian
+
+! distinguish if single or double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ jacobian2Dw_face(i,j) = sngl(jacobian * wgllwgll(i,j) )
+ normal_face(1,i,j)=sngl(unx/jacobian)
+ normal_face(2,i,j)=sngl(uny/jacobian)
+ normal_face(3,i,j)=sngl(unz/jacobian)
+ else
+ jacobian2Dw_face(i,j) = jacobian * wgllwgll(i,j)
+ normal_face(1,i,j)=unx/jacobian
+ normal_face(2,i,j)=uny/jacobian
+ normal_face(3,i,j)=unz/jacobian
+ endif
+
+ enddo
+ enddo
+
+ end subroutine recalc_jacobian_gll2D
+
+!
+!------------------------------------------------------------------------------------------------
+!
+!
+! subroutine get_jacobian_boundaries(myrank,iboun,nspec, &
+! xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob,&
+! dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
+! wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,&
+! ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
+! xcoord_iboun,ycoord_iboun,zcoord_iboun, &
+! nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, &
+! jacobian2D_xmin,jacobian2D_xmax, &
+! jacobian2D_ymin,jacobian2D_ymax, &
+! jacobian2D_bottom,jacobian2D_top, &
+! normal_xmin,normal_xmax, &
+! normal_ymin,normal_ymax, &
+! normal_bottom,normal_top, &
+! NSPEC2D_BOTTOM,NSPEC2D_TOP)
+!
+! implicit none
+!
+! include "constants.h"
+!
+! integer nspec,myrank,nglob
+!
+!! arrays with the mesh
+! integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+! real(kind=CUSTOM_REAL) :: xstore_dummy(nglob),ystore_dummy(nglob),zstore_dummy(nglob)
+!
+!
+!! absorbing boundaries
+!! (careful with array bounds, no need for NSPEC2DMAX_XMIN_XMAX & NSPEC2DMAX_YMIN_YMAX anymore)
+! integer :: nspec2D_xmin, nspec2D_xmax, nspec2D_ymin, nspec2D_ymax, NSPEC2D_BOTTOM, NSPEC2D_TOP
+! integer, dimension(nspec2D_xmin) :: ibelm_xmin
+! integer, dimension(nspec2D_xmax) :: ibelm_xmax
+! integer, dimension(nspec2D_ymin) :: ibelm_ymin
+! integer, dimension(nspec2D_ymax) :: ibelm_ymax
+! integer, dimension(NSPEC2D_BOTTOM) :: ibelm_bottom
+! integer, dimension(NSPEC2D_TOP) :: ibelm_top
+!
+! logical iboun(6,nspec)
+! real(kind=CUSTOM_REAL), dimension(NGNOD2D,6,nspec) :: xcoord_iboun,ycoord_iboun,zcoord_iboun
+!
+!! double precision xstore(NGLLX,NGLLY,NGLLZ,nspec)
+!! double precision ystore(NGLLX,NGLLY,NGLLZ,nspec)
+!! double precision zstore(NGLLX,NGLLY,NGLLZ,nspec)
+!
+! real(kind=CUSTOM_REAL) jacobian2D_xmin(NGLLY,NGLLZ,NSPEC2D_xmin)
+! real(kind=CUSTOM_REAL) jacobian2D_xmax(NGLLY,NGLLZ,NSPEC2D_xmax)
+! real(kind=CUSTOM_REAL) jacobian2D_ymin(NGLLX,NGLLZ,NSPEC2D_ymin)
+! real(kind=CUSTOM_REAL) jacobian2D_ymax(NGLLX,NGLLZ,NSPEC2D_ymax)
+! real(kind=CUSTOM_REAL) jacobian2D_bottom(NGLLX,NGLLY,NSPEC2D_BOTTOM)
+! real(kind=CUSTOM_REAL) jacobian2D_top(NGLLX,NGLLY,NSPEC2D_TOP)
+!
+! real(kind=CUSTOM_REAL) normal_xmin(NDIM,NGLLY,NGLLZ,NSPEC2D_xmin)
+! real(kind=CUSTOM_REAL) normal_xmax(NDIM,NGLLY,NGLLZ,NSPEC2D_xmax)
+! real(kind=CUSTOM_REAL) normal_ymin(NDIM,NGLLX,NGLLZ,NSPEC2D_ymin)
+! real(kind=CUSTOM_REAL) normal_ymax(NDIM,NGLLX,NGLLZ,NSPEC2D_ymax)
+! real(kind=CUSTOM_REAL) normal_bottom(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM)
+! real(kind=CUSTOM_REAL) normal_top(NDIM,NGLLX,NGLLY,NSPEC2D_TOP)
+!
+! double precision dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ)
+! double precision dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ)
+! double precision dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY)
+! double precision dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY)
+!
+! double precision, dimension(NGLLX,NGLLY) :: wgllwgll_xy
+! double precision, dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+! double precision, dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+!
+! double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
+!
+!! element numbering
+! integer ispec,i,j
+!
+!! counters to keep track of number of elements on each of the boundaries
+! integer ispecb1,ispecb2,ispecb3,ispecb4,ispecb5,ispecb6
+!
+!
+!! check that the parameter file is correct
+! if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
+! if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
+!
+! ispecb1 = 0
+! ispecb2 = 0
+! ispecb3 = 0
+! ispecb4 = 0
+! ispecb5 = 0
+! ispecb6 = 0
+!
+! do ispec=1,nspec
+!
+!! determine if the element falls on a boundary
+!
+!! on boundary: xmin
+!
+! if(iboun(1,ispec)) then
+!
+! ispecb1=ispecb1+1
+! ibelm_xmin(ispecb1)=ispec
+!
+!! specify the 4 nodes for the 2-D boundary element
+!! i.e. face (0,0,0),(0,1,0),(0,1,1),(0,0,1)
+!
+!! careful: these points may not be on the xmin face for unstructured grids
+!! xelm(1)=xstore(1,1,1,ispec)
+!! yelm(1)=ystore(1,1,1,ispec)
+!! zelm(1)=zstore(1,1,1,ispec)
+!! xelm(2)=xstore(1,NGLLY,1,ispec)
+!! yelm(2)=ystore(1,NGLLY,1,ispec)
+!! zelm(2)=zstore(1,NGLLY,1,ispec)
+!! xelm(3)=xstore(1,NGLLY,NGLLZ,ispec)
+!! yelm(3)=ystore(1,NGLLY,NGLLZ,ispec)
+!! zelm(3)=zstore(1,NGLLY,NGLLZ,ispec)
+!! xelm(4)=xstore(1,1,NGLLZ,ispec)
+!! yelm(4)=ystore(1,1,NGLLZ,ispec)
+!! zelm(4)=zstore(1,1,NGLLZ,ispec)
+!
+! xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+! yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+! zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+! xelm(2)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+! yelm(2)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+! zelm(2)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+! xelm(3)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+! yelm(3)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+! zelm(3)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+! xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
+! yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
+! zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
+!
+!! takes coordinates from boundary faces
+!! do i=1,NGNOD2D
+!! xelm(i) = xcoord_iboun(i,1,ispec)
+!! yelm(i) = ycoord_iboun(i,1,ispec)
+!! zelm(i) = zcoord_iboun(i,1,ispec)
+!! enddo
+!
+! call compute_jacobian_2D(myrank,ispecb1,xelm,yelm,zelm, &
+! dershape2D_x,wgllwgll_yz, &
+! jacobian2D_xmin,normal_xmin,NGLLY,NGLLZ,NSPEC2D_xmin)
+!
+! ! normal convention: points away from element
+! ! switches normal direction if necessary
+! do i=1,NGLLY
+! do j=1,NGLLZ
+! call get_element_face_normal(ispecb1, 1, xelm,yelm,zelm, &
+! ibool,nspec,nglob, &
+! xstore_dummy,ystore_dummy,zstore_dummy, &
+! normal_xmin(:,i,j,ispecb1) )
+! enddo
+! enddo
+!
+! endif
+!
+!! on boundary: xmax
+!
+! if(iboun(2,ispec)) then
+!
+! ispecb2=ispecb2+1
+! ibelm_xmax(ispecb2)=ispec
+!
+!! careful...
+!! specify the 4 nodes for the 2-D boundary element
+!! xelm(1)=xstore(NGLLX,1,1,ispec)
+!! yelm(1)=ystore(NGLLX,1,1,ispec)
+!! zelm(1)=zstore(NGLLX,1,1,ispec)
+!! xelm(2)=xstore(NGLLX,NGLLY,1,ispec)
+!! yelm(2)=ystore(NGLLX,NGLLY,1,ispec)
+!! zelm(2)=zstore(NGLLX,NGLLY,1,ispec)
+!! xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
+!! yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
+!! zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
+!! xelm(4)=xstore(NGLLX,1,NGLLZ,ispec)
+!! yelm(4)=ystore(NGLLX,1,NGLLZ,ispec)
+!! zelm(4)=zstore(NGLLX,1,NGLLZ,ispec)
+!
+! xelm(1)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+! yelm(1)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+! zelm(1)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+! xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+! yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+! zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+! xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+! yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+! zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+! xelm(4)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+! yelm(4)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+! zelm(4)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+!
+!! takes coordinates from boundary faces
+!! do i=1,NGNOD2D
+!! xelm(i) = xcoord_iboun(i,2,ispec)
+!! yelm(i) = ycoord_iboun(i,2,ispec)
+!! zelm(i) = zcoord_iboun(i,2,ispec)
+!! enddo
+!
+! call compute_jacobian_2D(myrank,ispecb2,xelm,yelm,zelm, &
+! dershape2D_x,wgllwgll_yz, &
+! jacobian2D_xmax,normal_xmax,NGLLY,NGLLZ,NSPEC2D_xmax)
+!
+! ! normal convention: points away from element
+! ! switch normal direction if necessary
+! do i=1,NGLLY
+! do j=1,NGLLZ
+! call get_element_face_normal(ispecb2, 2, xelm,yelm,zelm, &
+! ibool,nspec,nglob, &
+! xstore_dummy,ystore_dummy,zstore_dummy, &
+! normal_xmax(:,i,j,ispecb2) )
+! enddo
+! enddo
+!
+! endif
+!
+!! on boundary: ymin
+!
+! if(iboun(3,ispec)) then
+!
+! ispecb3=ispecb3+1
+! ibelm_ymin(ispecb3)=ispec
+!
+!! careful...
+!! specify the 4 nodes for the 2-D boundary element
+!! xelm(1)=xstore(1,1,1,ispec)
+!! yelm(1)=ystore(1,1,1,ispec)
+!! zelm(1)=zstore(1,1,1,ispec)
+!! xelm(2)=xstore(NGLLX,1,1,ispec)
+!! yelm(2)=ystore(NGLLX,1,1,ispec)
+!! zelm(2)=zstore(NGLLX,1,1,ispec)
+!! xelm(3)=xstore(NGLLX,1,NGLLZ,ispec)
+!! yelm(3)=ystore(NGLLX,1,NGLLZ,ispec)
+!! zelm(3)=zstore(NGLLX,1,NGLLZ,ispec)
+!! xelm(4)=xstore(1,1,NGLLZ,ispec)
+!! yelm(4)=ystore(1,1,NGLLZ,ispec)
+!! zelm(4)=zstore(1,1,NGLLZ,ispec)
+!
+! xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+! yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+! zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+! xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+! yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+! zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+! xelm(3)=xstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+! yelm(3)=ystore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+! zelm(3)=zstore_dummy( ibool(NGLLX,1,NGLLZ,ispec) )
+! xelm(4)=xstore_dummy( ibool(1,1,NGLLZ,ispec) )
+! yelm(4)=ystore_dummy( ibool(1,1,NGLLZ,ispec) )
+! zelm(4)=zstore_dummy( ibool(1,1,NGLLZ,ispec) )
+!
+!! takes coordinates from boundary faces
+!! do i=1,NGNOD2D
+!! xelm(i) = xcoord_iboun(i,3,ispec)
+!! yelm(i) = ycoord_iboun(i,3,ispec)
+!! zelm(i) = zcoord_iboun(i,3,ispec)
+!! enddo
+!
+! call compute_jacobian_2D(myrank,ispecb3,xelm,yelm,zelm, &
+! dershape2D_y,wgllwgll_xz, &
+! jacobian2D_ymin,normal_ymin,NGLLX,NGLLZ,NSPEC2D_ymin)
+!
+! ! normal convention: points away from element
+! ! switch normal direction if necessary
+! do i=1,NGLLX
+! do j=1,NGLLZ
+! call get_element_face_normal(ispecb3, 3, xelm,yelm,zelm, &
+! ibool,nspec,nglob, &
+! xstore_dummy,ystore_dummy,zstore_dummy, &
+! normal_ymin(:,i,j,ispecb3) )
+! enddo
+! enddo
+!
+!
+! endif
+!
+!! on boundary: ymax
+!
+! if(iboun(4,ispec)) then
+!
+! ispecb4=ispecb4+1
+! ibelm_ymax(ispecb4)=ispec
+!
+!!careful...
+!! specify the 4 nodes for the 2-D boundary element
+!! xelm(1)=xstore(1,NGLLY,1,ispec)
+!! yelm(1)=ystore(1,NGLLY,1,ispec)
+!! zelm(1)=zstore(1,NGLLY,1,ispec)
+!! xelm(2)=xstore(NGLLX,NGLLY,1,ispec)
+!! yelm(2)=ystore(NGLLX,NGLLY,1,ispec)
+!! zelm(2)=zstore(NGLLX,NGLLY,1,ispec)
+!! xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
+!! yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
+!! zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
+!! xelm(4)=xstore(1,NGLLY,NGLLZ,ispec)
+!! yelm(4)=ystore(1,NGLLY,NGLLZ,ispec)
+!! zelm(4)=zstore(1,NGLLY,NGLLZ,ispec)
+!
+! xelm(1)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+! yelm(1)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+! zelm(1)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+! xelm(2)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+! yelm(2)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+! zelm(2)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+! xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+! yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+! zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,NGLLZ,ispec) )
+! xelm(4)=xstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+! yelm(4)=ystore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+! zelm(4)=zstore_dummy( ibool(1,NGLLY,NGLLZ,ispec) )
+!
+!! takes coordinates from boundary faces
+!! do i=1,NGNOD2D
+!! xelm(i) = xcoord_iboun(i,4,ispec)
+!! yelm(i) = ycoord_iboun(i,4,ispec)
+!! zelm(i) = zcoord_iboun(i,4,ispec)
+!! enddo
+!!
+! call compute_jacobian_2D(myrank,ispecb4,xelm,yelm,zelm, &
+! dershape2D_y, wgllwgll_xz, &
+! jacobian2D_ymax,normal_ymax,NGLLX,NGLLZ,NSPEC2D_ymax)
+!
+! ! normal convention: points away from element
+! ! switch normal direction if necessary
+! do i=1,NGLLX
+! do j=1,NGLLZ
+! call get_element_face_normal(ispecb4, 4, xelm,yelm,zelm, &
+! ibool,nspec,nglob, &
+! xstore_dummy,ystore_dummy,zstore_dummy, &
+! normal_ymax(:,i,j,ispecb4) )
+! enddo
+! enddo
+!
+! endif
+!
+!! on boundary: bottom
+!
+! if(iboun(5,ispec)) then
+!
+! ispecb5=ispecb5+1
+! ibelm_bottom(ispecb5)=ispec
+!
+!! careful...
+!! for bottom, this might be actually working... when mesh is oriented along z direction...
+!! xelm(1)=xstore(1,1,1,ispec)
+!! yelm(1)=ystore(1,1,1,ispec)
+!! zelm(1)=zstore(1,1,1,ispec)
+!! xelm(2)=xstore(NGLLX,1,1,ispec)
+!! yelm(2)=ystore(NGLLX,1,1,ispec)
+!! zelm(2)=zstore(NGLLX,1,1,ispec)
+!! xelm(3)=xstore(NGLLX,NGLLY,1,ispec)
+!! yelm(3)=ystore(NGLLX,NGLLY,1,ispec)
+!! zelm(3)=zstore(NGLLX,NGLLY,1,ispec)
+!! xelm(4)=xstore(1,NGLLY,1,ispec)
+!! yelm(4)=ystore(1,NGLLY,1,ispec)
+!! zelm(4)=zstore(1,NGLLY,1,ispec)
+!
+! xelm(1)=xstore_dummy( ibool(1,1,1,ispec) )
+! yelm(1)=ystore_dummy( ibool(1,1,1,ispec) )
+! zelm(1)=zstore_dummy( ibool(1,1,1,ispec) )
+! xelm(2)=xstore_dummy( ibool(NGLLX,1,1,ispec) )
+! yelm(2)=ystore_dummy( ibool(NGLLX,1,1,ispec) )
+! zelm(2)=zstore_dummy( ibool(NGLLX,1,1,ispec) )
+! xelm(3)=xstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+! yelm(3)=ystore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+! zelm(3)=zstore_dummy( ibool(NGLLX,NGLLY,1,ispec) )
+! xelm(4)=xstore_dummy( ibool(1,NGLLY,1,ispec) )
+! yelm(4)=ystore_dummy( ibool(1,NGLLY,1,ispec) )
+! zelm(4)=zstore_dummy( ibool(1,NGLLY,1,ispec) )
+!
+!
+!! takes coordinates from boundary faces
+!! do i=1,NGNOD2D
+!! xelm(i) = xcoord_iboun(i,5,ispec)
+!! yelm(i) = ycoord_iboun(i,5,ispec)
+!! zelm(i) = zcoord_iboun(i,5,ispec)
+!! enddo
+!
+! call compute_jacobian_2D(myrank,ispecb5,xelm,yelm,zelm,&
+! dershape2D_bottom,wgllwgll_xy, &
+! jacobian2D_bottom,normal_bottom,NGLLX,NGLLY,NSPEC2D_BOTTOM)
+!
+! ! normal convention: points away from element
+! ! switch normal direction if necessary
+! do i=1,NGLLX
+! do j=1,NGLLY
+! call get_element_face_normal(ispecb5, 5, xelm,yelm,zelm, &
+! ibool,nspec,nglob, &
+! xstore_dummy,ystore_dummy,zstore_dummy, &
+! normal_bottom(:,i,j,ispecb5) )
+! enddo
+! enddo
+!
+! endif
+!
+!! on boundary: top
+!
+! if(iboun(6,ispec)) then
+!
+! ispecb6=ispecb6+1
+! ibelm_top(ispecb6)=ispec
+!
+!! careful...
+!! for top, this might be working as well ... when mesh is oriented along z direction...
+!! xelm(1)=xstore(1,1,NGLLZ,ispec)
+!! yelm(1)=ystore(1,1,NGLLZ,ispec)
+!! zelm(1)=zstore(1,1,NGLLZ,ispec)
+!! xelm(2)=xstore(NGLLX,1,NGLLZ,ispec)
+!! yelm(2)=ystore(NGLLX,1,NGLLZ,ispec)
+!! zelm(2)=zstore(NGLLX,1,NGLLZ,ispec)
+!! xelm(3)=xstore(NGLLX,NGLLY,NGLLZ,ispec)
+!! yelm(3)=ystore(NGLLX,NGLLY,NGLLZ,ispec)
+!! zelm(3)=zstore(NGLLX,NGLLY,NGLLZ,ispec)
+!! xelm(4)=xstore(1,NGLLY,NGLLZ,ispec)
+!! yelm(4)=ystore(1,NGLLY,NGLLZ,ispec)
+!! zelm(4)=zstore(1,NGLLY,NGLLZ,ispec)
+!
+!
+!! takes coordinates from boundary faces
+!! do i=1,NGNOD2D
+!! xelm(i) = xcoord_iboun(i,6,ispec)
+!! yelm(i) = ycoord_iboun(i,6,ispec)
+!! zelm(i) = zcoord_iboun(i,6,ispec)
+!! enddo
+!
+! call compute_jacobian_2D(myrank,ispecb6,xelm,yelm,zelm,&
+! dershape2D_top, wgllwgll_xy, &
+! jacobian2D_top,normal_top,NGLLX,NGLLY,NSPEC2D_TOP)
+!
+! ! normal convention: points away from element
+! ! switch normal direction if necessary
+! do i=1,NGLLX
+! do j=1,NGLLY
+! call get_element_face_normal(ispecb6, 6, xelm,yelm,zelm, &
+! ibool,nspec,nglob, &
+! xstore_dummy,ystore_dummy,zstore_dummy, &
+! normal_top(:,i,j,ispecb6) )
+! enddo
+! enddo
+!
+! endif
+!
+! enddo
+!
+!! check theoretical value of elements
+!! if(ispecb1 /= NSPEC2D_xmin) call exit_MPI(myrank,'ispecb1 should equal NSPEC2D_xmin')
+!! if(ispecb2 /= NSPEC2D_xmax) call exit_MPI(myrank,'ispecb2 should equal NSPEC2D_xmax')
+!! if(ispecb3 /= NSPEC2D_ymin) call exit_MPI(myrank,'ispecb3 should equal NSPEC2D_ymin')
+!! if(ispecb4 /= NSPEC2D_ymax) call exit_MPI(myrank,'ispecb4 should equal NSPEC2D_ymax')
+!! if(ispecb5 /= NSPEC2D_BOTTOM) call exit_MPI(myrank,'ispecb5 should equal NSPEC2D_BOTTOM')
+!! if(ispecb6 /= NSPEC2D_TOP) call exit_MPI(myrank,'ispecb6 should equal NSPEC2D_TOP')
+!
+! end subroutine get_jacobian_boundaries
+!
+!! -------------------------------------------------------
+!
+! subroutine compute_jacobian_2D(myrank,ispecb,xelm,yelm,zelm, &
+! dershape2D,wgllwgll, &
+! jacobian2D,normal, &
+! NGLLA,NGLLB,NSPEC2DMAX_AB)
+!
+! implicit none
+!
+! include "constants.h"
+!
+!! generic routine that accepts any polynomial degree in each direction
+!
+! integer ispecb,NGLLA,NGLLB,NSPEC2DMAX_AB,myrank
+!
+! double precision xelm(NGNOD2D),yelm(NGNOD2D),zelm(NGNOD2D)
+! double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
+! double precision wgllwgll
+!
+! real(kind=CUSTOM_REAL) jacobian2D(NGLLA,NGLLB,NSPEC2DMAX_AB)
+! real(kind=CUSTOM_REAL) normal(3,NGLLA,NGLLB,NSPEC2DMAX_AB)
+!
+! integer i,j,ia
+! double precision xxi,xeta,yxi,yeta,zxi,zeta
+! double precision unx,uny,unz,jacobian
+!
+! do j=1,NGLLB
+! do i=1,NGLLA
+!
+! xxi=ZERO
+! xeta=ZERO
+! yxi=ZERO
+! yeta=ZERO
+! zxi=ZERO
+! zeta=ZERO
+! do ia=1,NGNOD2D
+! xxi=xxi+dershape2D(1,ia,i,j)*xelm(ia)
+! xeta=xeta+dershape2D(2,ia,i,j)*xelm(ia)
+! yxi=yxi+dershape2D(1,ia,i,j)*yelm(ia)
+! yeta=yeta+dershape2D(2,ia,i,j)*yelm(ia)
+! zxi=zxi+dershape2D(1,ia,i,j)*zelm(ia)
+! zeta=zeta+dershape2D(2,ia,i,j)*zelm(ia)
+! enddo
+!
+!! calculate the unnormalized normal to the boundary
+! unx=yxi*zeta-yeta*zxi
+! uny=zxi*xeta-zeta*xxi
+! unz=xxi*yeta-xeta*yxi
+! jacobian=dsqrt(unx**2+uny**2+unz**2)
+! if(jacobian == ZERO) call exit_MPI(myrank,'2D Jacobian undefined')
+!
+!! normalize normal vector and store weighted surface jacobian
+!
+!! distinguish if single or double precision for reals
+! if(CUSTOM_REAL == SIZE_REAL) then
+! jacobian2D(i,j,ispecb) = sngl(jacobian * wgllwgll(i,j) )
+! normal(1,i,j,ispecb)=sngl(unx/jacobian)
+! normal(2,i,j,ispecb)=sngl(uny/jacobian)
+! normal(3,i,j,ispecb)=sngl(unz/jacobian)
+! else
+! jacobian2D(i,j,ispecb) = jacobian * wgllwgll(i,j)
+! normal(1,i,j,ispecb)=unx/jacobian
+! normal(2,i,j,ispecb)=uny/jacobian
+! normal(3,i,j,ispecb)=unz/jacobian
+! endif
+!
+! enddo
+! enddo
+!
+! end subroutine compute_jacobian_2D
+!
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape2D.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape2D.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape2D.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,121 +1,121 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine get_shape2D(myrank,shape2D,dershape2D,xigll,yigll,NGLLA,NGLLB)
-
- implicit none
-
- include "constants.h"
-
-! generic routine that accepts any polynomial degree in each direction
-
- integer NGLLA,NGLLB,myrank
-
- double precision xigll(NGLLA)
- double precision yigll(NGLLB)
-
-! 2D shape functions and their derivatives
- double precision shape2D(NGNOD2D,NGLLA,NGLLB)
- double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
-
- integer i,j,ia
-
-! location of the nodes of the 2D quadrilateral elements
- double precision xi,eta
- double precision xi_map,eta_map
-
-! for checking the 2D shape functions
- double precision sumshape,sumdershapexi,sumdershapeeta
-
-! check that the parameter file is correct
- if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
- if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
-
-! generate the 2D shape functions and their derivatives (4 nodes)
- do i=1,NGLLA
-
- xi=xigll(i)
-
- do j=1,NGLLB
-
- eta=yigll(j)
-
-! map coordinates to [0,1]
- xi_map = (xi + 1.) / 2.
- eta_map = (eta + 1.) / 2.
-
-! corner nodes
- shape2D(1,i,j) = (1 - xi_map)*(1 - eta_map)
- shape2D(2,i,j) = xi_map*(1 - eta_map)
- shape2D(3,i,j) = xi_map*eta_map
- shape2D(4,i,j) = (1 - xi_map)*eta_map
-
- dershape2D(1,1,i,j) = (eta - 1.) / 4.
- dershape2D(2,1,i,j) = (xi - 1.) / 4.
-
- dershape2D(1,2,i,j) = (1. - eta) / 4.
- dershape2D(2,2,i,j) = (-1. - xi) / 4.
-
- dershape2D(1,3,i,j) = (1. + eta) / 4.
- dershape2D(2,3,i,j) = (1. + xi) / 4.
-
- dershape2D(1,4,i,j) = (- 1. - eta) / 4.
- dershape2D(2,4,i,j) = (1. - xi) / 4.
-
- enddo
- enddo
-
-! check the 2D shape functions
- do i=1,NGLLA
- do j=1,NGLLB
-
- sumshape=ZERO
-
- sumdershapexi=ZERO
- sumdershapeeta=ZERO
-
- do ia=1,NGNOD2D
- sumshape=sumshape+shape2D(ia,i,j)
-
- sumdershapexi=sumdershapexi+dershape2D(1,ia,i,j)
- sumdershapeeta=sumdershapeeta+dershape2D(2,ia,i,j)
- enddo
-
-! the sum of the shape functions should be 1
- if(abs(sumshape-ONE)>TINYVAL) call exit_MPI(myrank,'error in 2D shape functions')
-
-! the sum of the derivatives of the shape functions should be 0
- if(abs(sumdershapexi)>TINYVAL) &
- call exit_MPI(myrank,'error in xi derivatives of 2D shape function')
-
- if(abs(sumdershapeeta)>TINYVAL) &
- call exit_MPI(myrank,'error in eta derivatives of 2D shape function')
-
- enddo
- enddo
-
- end subroutine get_shape2D
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine get_shape2D(myrank,shape2D,dershape2D,xigll,yigll,NGLLA,NGLLB)
+
+ implicit none
+
+ include "constants.h"
+
+! generic routine that accepts any polynomial degree in each direction
+
+ integer NGLLA,NGLLB,myrank
+
+ double precision xigll(NGLLA)
+ double precision yigll(NGLLB)
+
+! 2D shape functions and their derivatives
+ double precision shape2D(NGNOD2D,NGLLA,NGLLB)
+ double precision dershape2D(NDIM2D,NGNOD2D,NGLLA,NGLLB)
+
+ integer i,j,ia
+
+! location of the nodes of the 2D quadrilateral elements
+ double precision xi,eta
+ double precision xi_map,eta_map
+
+! for checking the 2D shape functions
+ double precision sumshape,sumdershapexi,sumdershapeeta
+
+! check that the parameter file is correct
+ if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
+ if(NGNOD2D /= 4) call exit_MPI(myrank,'surface elements should have 4 control nodes')
+
+! generate the 2D shape functions and their derivatives (4 nodes)
+ do i=1,NGLLA
+
+ xi=xigll(i)
+
+ do j=1,NGLLB
+
+ eta=yigll(j)
+
+! map coordinates to [0,1]
+ xi_map = (xi + 1.) / 2.
+ eta_map = (eta + 1.) / 2.
+
+! corner nodes
+ shape2D(1,i,j) = (1 - xi_map)*(1 - eta_map)
+ shape2D(2,i,j) = xi_map*(1 - eta_map)
+ shape2D(3,i,j) = xi_map*eta_map
+ shape2D(4,i,j) = (1 - xi_map)*eta_map
+
+ dershape2D(1,1,i,j) = (eta - 1.) / 4.
+ dershape2D(2,1,i,j) = (xi - 1.) / 4.
+
+ dershape2D(1,2,i,j) = (1. - eta) / 4.
+ dershape2D(2,2,i,j) = (-1. - xi) / 4.
+
+ dershape2D(1,3,i,j) = (1. + eta) / 4.
+ dershape2D(2,3,i,j) = (1. + xi) / 4.
+
+ dershape2D(1,4,i,j) = (- 1. - eta) / 4.
+ dershape2D(2,4,i,j) = (1. - xi) / 4.
+
+ enddo
+ enddo
+
+! check the 2D shape functions
+ do i=1,NGLLA
+ do j=1,NGLLB
+
+ sumshape=ZERO
+
+ sumdershapexi=ZERO
+ sumdershapeeta=ZERO
+
+ do ia=1,NGNOD2D
+ sumshape=sumshape+shape2D(ia,i,j)
+
+ sumdershapexi=sumdershapexi+dershape2D(1,ia,i,j)
+ sumdershapeeta=sumdershapeeta+dershape2D(2,ia,i,j)
+ enddo
+
+! the sum of the shape functions should be 1
+ if(abs(sumshape-ONE)>TINYVAL) call exit_MPI(myrank,'error in 2D shape functions')
+
+! the sum of the derivatives of the shape functions should be 0
+ if(abs(sumdershapexi)>TINYVAL) &
+ call exit_MPI(myrank,'error in xi derivatives of 2D shape function')
+
+ if(abs(sumdershapeeta)>TINYVAL) &
+ call exit_MPI(myrank,'error in eta derivatives of 2D shape function')
+
+ enddo
+ enddo
+
+ end subroutine get_shape2D
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape3D.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape3D.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/get_shape3D.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,270 +1,270 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! 3D shape functions for 8-node element
-
- subroutine get_shape3D(myrank,shape3D,dershape3D,xigll,yigll,zigll)
-
- implicit none
-
- include "constants.h"
-
- integer myrank
-
-! Gauss-Lobatto-Legendre points of integration
- double precision xigll(NGLLX)
- double precision yigll(NGLLY)
- double precision zigll(NGLLZ)
-
-! 3D shape functions and their derivatives
- double precision shape3D(NGNOD,NGLLX,NGLLY,NGLLZ)
- double precision dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ)
-
- integer i,j,k,ia
-
-! location of the nodes of the 3D quadrilateral elements
- double precision xi,eta,gamma
- double precision ra1,ra2,rb1,rb2,rc1,rc2
-
-! for checking the 3D shape functions
- double precision sumshape,sumdershapexi,sumdershapeeta,sumdershapegamma
-
- double precision, parameter :: ONE_EIGHTH = 0.125d0
-
-! check that the parameter file is correct
- if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
-
-! ***
-! *** create 3D shape functions and jacobian
-! ***
-
-!--- case of a 3D 8-node element (Dhatt-Touzot p. 115)
-
- do i=1,NGLLX
- do j=1,NGLLY
- do k=1,NGLLZ
-
- xi = xigll(i)
- eta = yigll(j)
- gamma = zigll(k)
-
- ra1 = one + xi
- ra2 = one - xi
-
- rb1 = one + eta
- rb2 = one - eta
-
- rc1 = one + gamma
- rc2 = one - gamma
-
- shape3D(1,i,j,k) = ONE_EIGHTH*ra2*rb2*rc2
- shape3D(2,i,j,k) = ONE_EIGHTH*ra1*rb2*rc2
- shape3D(3,i,j,k) = ONE_EIGHTH*ra1*rb1*rc2
- shape3D(4,i,j,k) = ONE_EIGHTH*ra2*rb1*rc2
- shape3D(5,i,j,k) = ONE_EIGHTH*ra2*rb2*rc1
- shape3D(6,i,j,k) = ONE_EIGHTH*ra1*rb2*rc1
- shape3D(7,i,j,k) = ONE_EIGHTH*ra1*rb1*rc1
- shape3D(8,i,j,k) = ONE_EIGHTH*ra2*rb1*rc1
-
- dershape3D(1,1,i,j,k) = - ONE_EIGHTH*rb2*rc2
- dershape3D(1,2,i,j,k) = ONE_EIGHTH*rb2*rc2
- dershape3D(1,3,i,j,k) = ONE_EIGHTH*rb1*rc2
- dershape3D(1,4,i,j,k) = - ONE_EIGHTH*rb1*rc2
- dershape3D(1,5,i,j,k) = - ONE_EIGHTH*rb2*rc1
- dershape3D(1,6,i,j,k) = ONE_EIGHTH*rb2*rc1
- dershape3D(1,7,i,j,k) = ONE_EIGHTH*rb1*rc1
- dershape3D(1,8,i,j,k) = - ONE_EIGHTH*rb1*rc1
-
- dershape3D(2,1,i,j,k) = - ONE_EIGHTH*ra2*rc2
- dershape3D(2,2,i,j,k) = - ONE_EIGHTH*ra1*rc2
- dershape3D(2,3,i,j,k) = ONE_EIGHTH*ra1*rc2
- dershape3D(2,4,i,j,k) = ONE_EIGHTH*ra2*rc2
- dershape3D(2,5,i,j,k) = - ONE_EIGHTH*ra2*rc1
- dershape3D(2,6,i,j,k) = - ONE_EIGHTH*ra1*rc1
- dershape3D(2,7,i,j,k) = ONE_EIGHTH*ra1*rc1
- dershape3D(2,8,i,j,k) = ONE_EIGHTH*ra2*rc1
-
- dershape3D(3,1,i,j,k) = - ONE_EIGHTH*ra2*rb2
- dershape3D(3,2,i,j,k) = - ONE_EIGHTH*ra1*rb2
- dershape3D(3,3,i,j,k) = - ONE_EIGHTH*ra1*rb1
- dershape3D(3,4,i,j,k) = - ONE_EIGHTH*ra2*rb1
- dershape3D(3,5,i,j,k) = ONE_EIGHTH*ra2*rb2
- dershape3D(3,6,i,j,k) = ONE_EIGHTH*ra1*rb2
- dershape3D(3,7,i,j,k) = ONE_EIGHTH*ra1*rb1
- dershape3D(3,8,i,j,k) = ONE_EIGHTH*ra2*rb1
-
- enddo
- enddo
- enddo
-
-!--- check the shape functions and their derivatives
-
- do i=1,NGLLX
- do j=1,NGLLY
- do k=1,NGLLZ
-
- sumshape = ZERO
- sumdershapexi = ZERO
- sumdershapeeta = ZERO
- sumdershapegamma = ZERO
-
- do ia=1,NGNOD
- sumshape = sumshape + shape3D(ia,i,j,k)
- sumdershapexi = sumdershapexi + dershape3D(1,ia,i,j,k)
- sumdershapeeta = sumdershapeeta + dershape3D(2,ia,i,j,k)
- sumdershapegamma = sumdershapegamma + dershape3D(3,ia,i,j,k)
- enddo
-
-! sum of shape functions should be one
-! sum of derivative of shape functions should be zero
- if(abs(sumshape-one) > TINYVAL) call exit_MPI(myrank,'error shape functions')
- if(abs(sumdershapexi) > TINYVAL) call exit_MPI(myrank,'error derivative xi shape functions')
- if(abs(sumdershapeeta) > TINYVAL) call exit_MPI(myrank,'error derivative eta shape functions')
- if(abs(sumdershapegamma) > TINYVAL) call exit_MPI(myrank,'error derivative gamma shape functions')
-
- enddo
- enddo
- enddo
-
- end subroutine get_shape3D
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! 3D shape functions for given, single xi/eta/gamma location
-
- subroutine get_shape3D_single(myrank,shape3D,xi,eta,gamma)
-
- implicit none
-
- include "constants.h"
-
- integer :: myrank
-
- ! 3D shape functions
- double precision :: shape3D(NGNOD)
-
- ! location
- double precision :: xi,eta,gamma
-
- ! local parameters
- double precision :: ra1,ra2,rb1,rb2,rc1,rc2
- double precision, parameter :: ONE_EIGHTH = 0.125d0
- double precision :: sumshape
- integer :: ia
-
-! check that the parameter file is correct
- if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
-
-!--- case of a 3D 8-node element (Dhatt-Touzot p. 115)
- ra1 = one + xi
- ra2 = one - xi
-
- rb1 = one + eta
- rb2 = one - eta
-
- rc1 = one + gamma
- rc2 = one - gamma
-
- ! shape functions
- shape3D(1) = ONE_EIGHTH*ra2*rb2*rc2
- shape3D(2) = ONE_EIGHTH*ra1*rb2*rc2
- shape3D(3) = ONE_EIGHTH*ra1*rb1*rc2
- shape3D(4) = ONE_EIGHTH*ra2*rb1*rc2
- shape3D(5) = ONE_EIGHTH*ra2*rb2*rc1
- shape3D(6) = ONE_EIGHTH*ra1*rb2*rc1
- shape3D(7) = ONE_EIGHTH*ra1*rb1*rc1
- shape3D(8) = ONE_EIGHTH*ra2*rb1*rc1
-
- ! check the shape functions
- sumshape = ZERO
- do ia=1,NGNOD
- sumshape = sumshape + shape3D(ia)
- enddo
-
- ! sum of shape functions should be one
- ! sum of derivative of shape functions should be zero
- if(abs(sumshape-one) > TINYVAL) call exit_MPI(myrank,'error single shape functions')
-
- end subroutine get_shape3D_single
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine get_shape3D_element_corners(xelm,yelm,zelm,ispec,&
- ibool,xstore,ystore,zstore,NSPEC_AB,NGLOB_AB)
-
- implicit none
-
- include "constants.h"
-
- integer :: ispec
- integer :: NSPEC_AB,NGLOB_AB
-
- real(kind=CUSTOM_REAL),dimension(NGNOD),intent(out) :: xelm,yelm,zelm
-
- ! mesh coordinates
- real(kind=CUSTOM_REAL),dimension(NGLOB_AB) :: xstore,ystore,zstore
- integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-
-! 8 node corners
- xelm(1)=xstore(ibool(1,1,1,ispec))
- yelm(1)=ystore(ibool(1,1,1,ispec))
- zelm(1)=zstore(ibool(1,1,1,ispec))
-
- xelm(2)=xstore(ibool(NGLLX,1,1,ispec))
- yelm(2)=ystore(ibool(NGLLX,1,1,ispec))
- zelm(2)=zstore(ibool(NGLLX,1,1,ispec))
-
- xelm(3)=xstore(ibool(NGLLX,NGLLY,1,ispec))
- yelm(3)=ystore(ibool(NGLLX,NGLLY,1,ispec))
- zelm(3)=zstore(ibool(NGLLX,NGLLY,1,ispec))
-
- xelm(4)=xstore(ibool(1,NGLLY,1,ispec))
- yelm(4)=ystore(ibool(1,NGLLY,1,ispec))
- zelm(4)=zstore(ibool(1,NGLLY,1,ispec))
-
- xelm(5)=xstore(ibool(1,1,NGLLZ,ispec))
- yelm(5)=ystore(ibool(1,1,NGLLZ,ispec))
- zelm(5)=zstore(ibool(1,1,NGLLZ,ispec))
-
- xelm(6)=xstore(ibool(NGLLX,1,NGLLZ,ispec))
- yelm(6)=ystore(ibool(NGLLX,1,NGLLZ,ispec))
- zelm(6)=zstore(ibool(NGLLX,1,NGLLZ,ispec))
-
- xelm(7)=xstore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
- yelm(7)=ystore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
- zelm(7)=zstore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
-
- xelm(8)=xstore(ibool(1,NGLLY,NGLLZ,ispec))
- yelm(8)=ystore(ibool(1,NGLLY,NGLLZ,ispec))
- zelm(8)=zstore(ibool(1,NGLLY,NGLLZ,ispec))
-
- end subroutine get_shape3D_element_corners
-
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! 3D shape functions for 8-node element
+
+ subroutine get_shape3D(myrank,shape3D,dershape3D,xigll,yigll,zigll)
+
+ implicit none
+
+ include "constants.h"
+
+ integer myrank
+
+! Gauss-Lobatto-Legendre points of integration
+ double precision xigll(NGLLX)
+ double precision yigll(NGLLY)
+ double precision zigll(NGLLZ)
+
+! 3D shape functions and their derivatives
+ double precision shape3D(NGNOD,NGLLX,NGLLY,NGLLZ)
+ double precision dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ)
+
+ integer i,j,k,ia
+
+! location of the nodes of the 3D quadrilateral elements
+ double precision xi,eta,gamma
+ double precision ra1,ra2,rb1,rb2,rc1,rc2
+
+! for checking the 3D shape functions
+ double precision sumshape,sumdershapexi,sumdershapeeta,sumdershapegamma
+
+ double precision, parameter :: ONE_EIGHTH = 0.125d0
+
+! check that the parameter file is correct
+ if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
+
+! ***
+! *** create 3D shape functions and jacobian
+! ***
+
+!--- case of a 3D 8-node element (Dhatt-Touzot p. 115)
+
+ do i=1,NGLLX
+ do j=1,NGLLY
+ do k=1,NGLLZ
+
+ xi = xigll(i)
+ eta = yigll(j)
+ gamma = zigll(k)
+
+ ra1 = one + xi
+ ra2 = one - xi
+
+ rb1 = one + eta
+ rb2 = one - eta
+
+ rc1 = one + gamma
+ rc2 = one - gamma
+
+ shape3D(1,i,j,k) = ONE_EIGHTH*ra2*rb2*rc2
+ shape3D(2,i,j,k) = ONE_EIGHTH*ra1*rb2*rc2
+ shape3D(3,i,j,k) = ONE_EIGHTH*ra1*rb1*rc2
+ shape3D(4,i,j,k) = ONE_EIGHTH*ra2*rb1*rc2
+ shape3D(5,i,j,k) = ONE_EIGHTH*ra2*rb2*rc1
+ shape3D(6,i,j,k) = ONE_EIGHTH*ra1*rb2*rc1
+ shape3D(7,i,j,k) = ONE_EIGHTH*ra1*rb1*rc1
+ shape3D(8,i,j,k) = ONE_EIGHTH*ra2*rb1*rc1
+
+ dershape3D(1,1,i,j,k) = - ONE_EIGHTH*rb2*rc2
+ dershape3D(1,2,i,j,k) = ONE_EIGHTH*rb2*rc2
+ dershape3D(1,3,i,j,k) = ONE_EIGHTH*rb1*rc2
+ dershape3D(1,4,i,j,k) = - ONE_EIGHTH*rb1*rc2
+ dershape3D(1,5,i,j,k) = - ONE_EIGHTH*rb2*rc1
+ dershape3D(1,6,i,j,k) = ONE_EIGHTH*rb2*rc1
+ dershape3D(1,7,i,j,k) = ONE_EIGHTH*rb1*rc1
+ dershape3D(1,8,i,j,k) = - ONE_EIGHTH*rb1*rc1
+
+ dershape3D(2,1,i,j,k) = - ONE_EIGHTH*ra2*rc2
+ dershape3D(2,2,i,j,k) = - ONE_EIGHTH*ra1*rc2
+ dershape3D(2,3,i,j,k) = ONE_EIGHTH*ra1*rc2
+ dershape3D(2,4,i,j,k) = ONE_EIGHTH*ra2*rc2
+ dershape3D(2,5,i,j,k) = - ONE_EIGHTH*ra2*rc1
+ dershape3D(2,6,i,j,k) = - ONE_EIGHTH*ra1*rc1
+ dershape3D(2,7,i,j,k) = ONE_EIGHTH*ra1*rc1
+ dershape3D(2,8,i,j,k) = ONE_EIGHTH*ra2*rc1
+
+ dershape3D(3,1,i,j,k) = - ONE_EIGHTH*ra2*rb2
+ dershape3D(3,2,i,j,k) = - ONE_EIGHTH*ra1*rb2
+ dershape3D(3,3,i,j,k) = - ONE_EIGHTH*ra1*rb1
+ dershape3D(3,4,i,j,k) = - ONE_EIGHTH*ra2*rb1
+ dershape3D(3,5,i,j,k) = ONE_EIGHTH*ra2*rb2
+ dershape3D(3,6,i,j,k) = ONE_EIGHTH*ra1*rb2
+ dershape3D(3,7,i,j,k) = ONE_EIGHTH*ra1*rb1
+ dershape3D(3,8,i,j,k) = ONE_EIGHTH*ra2*rb1
+
+ enddo
+ enddo
+ enddo
+
+!--- check the shape functions and their derivatives
+
+ do i=1,NGLLX
+ do j=1,NGLLY
+ do k=1,NGLLZ
+
+ sumshape = ZERO
+ sumdershapexi = ZERO
+ sumdershapeeta = ZERO
+ sumdershapegamma = ZERO
+
+ do ia=1,NGNOD
+ sumshape = sumshape + shape3D(ia,i,j,k)
+ sumdershapexi = sumdershapexi + dershape3D(1,ia,i,j,k)
+ sumdershapeeta = sumdershapeeta + dershape3D(2,ia,i,j,k)
+ sumdershapegamma = sumdershapegamma + dershape3D(3,ia,i,j,k)
+ enddo
+
+! sum of shape functions should be one
+! sum of derivative of shape functions should be zero
+ if(abs(sumshape-one) > TINYVAL) call exit_MPI(myrank,'error shape functions')
+ if(abs(sumdershapexi) > TINYVAL) call exit_MPI(myrank,'error derivative xi shape functions')
+ if(abs(sumdershapeeta) > TINYVAL) call exit_MPI(myrank,'error derivative eta shape functions')
+ if(abs(sumdershapegamma) > TINYVAL) call exit_MPI(myrank,'error derivative gamma shape functions')
+
+ enddo
+ enddo
+ enddo
+
+ end subroutine get_shape3D
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! 3D shape functions for given, single xi/eta/gamma location
+
+ subroutine get_shape3D_single(myrank,shape3D,xi,eta,gamma)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: myrank
+
+ ! 3D shape functions
+ double precision :: shape3D(NGNOD)
+
+ ! location
+ double precision :: xi,eta,gamma
+
+ ! local parameters
+ double precision :: ra1,ra2,rb1,rb2,rc1,rc2
+ double precision, parameter :: ONE_EIGHTH = 0.125d0
+ double precision :: sumshape
+ integer :: ia
+
+! check that the parameter file is correct
+ if(NGNOD /= 8) call exit_MPI(myrank,'elements should have 8 control nodes')
+
+!--- case of a 3D 8-node element (Dhatt-Touzot p. 115)
+ ra1 = one + xi
+ ra2 = one - xi
+
+ rb1 = one + eta
+ rb2 = one - eta
+
+ rc1 = one + gamma
+ rc2 = one - gamma
+
+ ! shape functions
+ shape3D(1) = ONE_EIGHTH*ra2*rb2*rc2
+ shape3D(2) = ONE_EIGHTH*ra1*rb2*rc2
+ shape3D(3) = ONE_EIGHTH*ra1*rb1*rc2
+ shape3D(4) = ONE_EIGHTH*ra2*rb1*rc2
+ shape3D(5) = ONE_EIGHTH*ra2*rb2*rc1
+ shape3D(6) = ONE_EIGHTH*ra1*rb2*rc1
+ shape3D(7) = ONE_EIGHTH*ra1*rb1*rc1
+ shape3D(8) = ONE_EIGHTH*ra2*rb1*rc1
+
+ ! check the shape functions
+ sumshape = ZERO
+ do ia=1,NGNOD
+ sumshape = sumshape + shape3D(ia)
+ enddo
+
+ ! sum of shape functions should be one
+ ! sum of derivative of shape functions should be zero
+ if(abs(sumshape-one) > TINYVAL) call exit_MPI(myrank,'error single shape functions')
+
+ end subroutine get_shape3D_single
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine get_shape3D_element_corners(xelm,yelm,zelm,ispec,&
+ ibool,xstore,ystore,zstore,NSPEC_AB,NGLOB_AB)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: ispec
+ integer :: NSPEC_AB,NGLOB_AB
+
+ real(kind=CUSTOM_REAL),dimension(NGNOD),intent(out) :: xelm,yelm,zelm
+
+ ! mesh coordinates
+ real(kind=CUSTOM_REAL),dimension(NGLOB_AB) :: xstore,ystore,zstore
+ integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+
+! 8 node corners
+ xelm(1)=xstore(ibool(1,1,1,ispec))
+ yelm(1)=ystore(ibool(1,1,1,ispec))
+ zelm(1)=zstore(ibool(1,1,1,ispec))
+
+ xelm(2)=xstore(ibool(NGLLX,1,1,ispec))
+ yelm(2)=ystore(ibool(NGLLX,1,1,ispec))
+ zelm(2)=zstore(ibool(NGLLX,1,1,ispec))
+
+ xelm(3)=xstore(ibool(NGLLX,NGLLY,1,ispec))
+ yelm(3)=ystore(ibool(NGLLX,NGLLY,1,ispec))
+ zelm(3)=zstore(ibool(NGLLX,NGLLY,1,ispec))
+
+ xelm(4)=xstore(ibool(1,NGLLY,1,ispec))
+ yelm(4)=ystore(ibool(1,NGLLY,1,ispec))
+ zelm(4)=zstore(ibool(1,NGLLY,1,ispec))
+
+ xelm(5)=xstore(ibool(1,1,NGLLZ,ispec))
+ yelm(5)=ystore(ibool(1,1,NGLLZ,ispec))
+ zelm(5)=zstore(ibool(1,1,NGLLZ,ispec))
+
+ xelm(6)=xstore(ibool(NGLLX,1,NGLLZ,ispec))
+ yelm(6)=ystore(ibool(NGLLX,1,NGLLZ,ispec))
+ zelm(6)=zstore(ibool(NGLLX,1,NGLLZ,ispec))
+
+ xelm(7)=xstore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
+ yelm(7)=ystore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
+ zelm(7)=zstore(ibool(NGLLX,NGLLY,NGLLZ,ispec))
+
+ xelm(8)=xstore(ibool(1,NGLLY,NGLLZ,ispec))
+ yelm(8)=ystore(ibool(1,NGLLY,NGLLZ,ispec))
+ zelm(8)=zstore(ibool(1,NGLLY,NGLLZ,ispec))
+
+ end subroutine get_shape3D_element_corners
+
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/gll_library.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/gll_library.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/gll_library.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,554 +1,554 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!=======================================================================
-!
-! Library to compute the Gauss-Lobatto-Legendre points and weights
-! Based on Gauss-Lobatto routines from M.I.T.
-! Department of Mechanical Engineering
-!
-!=======================================================================
-
- double precision function endw1(n,alpha,beta)
-
- implicit none
-
- integer n
- double precision alpha,beta
-
- double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0,three=3.d0,four=4.d0
- double precision apb,f1,fint1,fint2,f2,di,abn,abnn,a1,a2,a3,f3
- double precision, external :: gammaf
- integer i
-
- f3 = zero
- apb = alpha+beta
- if (n == 0) then
- endw1 = zero
- return
- endif
- f1 = gammaf(alpha+two)*gammaf(beta+one)/gammaf(apb+three)
- f1 = f1*(apb+two)*two**(apb+two)/two
- if (n == 1) then
- endw1 = f1
- return
- endif
- fint1 = gammaf(alpha+two)*gammaf(beta+one)/gammaf(apb+three)
- fint1 = fint1*two**(apb+two)
- fint2 = gammaf(alpha+two)*gammaf(beta+two)/gammaf(apb+four)
- fint2 = fint2*two**(apb+three)
- f2 = (-two*(beta+two)*fint1 + (apb+four)*fint2) * (apb+three)/four
- if (n == 2) then
- endw1 = f2
- return
- endif
- do i=3,n
- di = dble(i-1)
- abn = alpha+beta+di
- abnn = abn+di
- a1 = -(two*(di+alpha)*(di+beta))/(abn*abnn*(abnn+one))
- a2 = (two*(alpha-beta))/(abnn*(abnn+two))
- a3 = (two*(abn+one))/((abnn+two)*(abnn+one))
- f3 = -(a2*f2+a1*f1)/a3
- f1 = f2
- f2 = f3
- enddo
- endw1 = f3
-
- end function endw1
-
-!
-!=======================================================================
-!
-
- double precision function endw2(n,alpha,beta)
-
- implicit none
-
- integer n
- double precision alpha,beta
-
- double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0,three=3.d0,four=4.d0
- double precision apb,f1,fint1,fint2,f2,di,abn,abnn,a1,a2,a3,f3
- double precision, external :: gammaf
- integer i
-
- apb = alpha+beta
- f3 = zero
- if (n == 0) then
- endw2 = zero
- return
- endif
- f1 = gammaf(alpha+one)*gammaf(beta+two)/gammaf(apb+three)
- f1 = f1*(apb+two)*two**(apb+two)/two
- if (n == 1) then
- endw2 = f1
- return
- endif
- fint1 = gammaf(alpha+one)*gammaf(beta+two)/gammaf(apb+three)
- fint1 = fint1*two**(apb+two)
- fint2 = gammaf(alpha+two)*gammaf(beta+two)/gammaf(apb+four)
- fint2 = fint2*two**(apb+three)
- f2 = (two*(alpha+two)*fint1 - (apb+four)*fint2) * (apb+three)/four
- if (n == 2) then
- endw2 = f2
- return
- endif
- do i=3,n
- di = dble(i-1)
- abn = alpha+beta+di
- abnn = abn+di
- a1 = -(two*(di+alpha)*(di+beta))/(abn*abnn*(abnn+one))
- a2 = (two*(alpha-beta))/(abnn*(abnn+two))
- a3 = (two*(abn+one))/((abnn+two)*(abnn+one))
- f3 = -(a2*f2+a1*f1)/a3
- f1 = f2
- f2 = f3
- enddo
- endw2 = f3
-
- end function endw2
-
-!
-!=======================================================================
-!
-
- double precision function gammaf (x)
-
- implicit none
-
- double precision, parameter :: pi = 3.141592653589793d0
-
- double precision x
-
- double precision, parameter :: half=0.5d0,one=1.d0,two=2.d0
-
- gammaf = one
-
- if (x == -half) gammaf = -two*dsqrt(pi)
- if (x == half) gammaf = dsqrt(pi)
- if (x == one ) gammaf = one
- if (x == two ) gammaf = one
- if (x == 1.5d0) gammaf = dsqrt(pi)/2.d0
- if (x == 2.5d0) gammaf = 1.5d0*dsqrt(pi)/2.d0
- if (x == 3.5d0) gammaf = 2.5d0*1.5d0*dsqrt(pi)/2.d0
- if (x == 3.d0 ) gammaf = 2.d0
- if (x == 4.d0 ) gammaf = 6.d0
- if (x == 5.d0 ) gammaf = 24.d0
- if (x == 6.d0 ) gammaf = 120.d0
-
- end function gammaf
-
-!
-!=====================================================================
-!
-
- subroutine jacg (xjac,np,alpha,beta)
-
-!=======================================================================
-!
-! computes np Gauss points, which are the zeros of the
-! Jacobi polynomial with parameters alpha and beta
-!
-! .alpha = beta = 0.0 -> Legendre points
-! .alpha = beta = -0.5 -> Chebyshev points
-!
-!=======================================================================
-
- implicit none
-
- integer np
- double precision alpha,beta
- double precision xjac(np)
-
- integer k,j,i,jmin,jm,n
- double precision xlast,dth,x,x1,x2,recsum,delx,xmin,swap
- double precision p,pd,pm1,pdm1,pm2,pdm2
-
- integer, parameter :: K_MAX_ITER = 10
- double precision, parameter :: zero = 0.d0, eps = 1.0d-12
-
- pm1 = zero
- pm2 = zero
- pdm1 = zero
- pdm2 = zero
-
- xlast = 0.d0
- n = np-1
- dth = 4.d0*datan(1.d0)/(2.d0*dble(n)+2.d0)
- p = 0.d0
- pd = 0.d0
- jmin = 0
- do j=1,np
- if(j == 1) then
- x = dcos((2.d0*(dble(j)-1.d0)+1.d0)*dth)
- else
- x1 = dcos((2.d0*(dble(j)-1.d0)+1.d0)*dth)
- x2 = xlast
- x = (x1+x2)/2.d0
- endif
- do k=1,K_MAX_ITER
- call jacobf (p,pd,pm1,pdm1,pm2,pdm2,np,alpha,beta,x)
- recsum = 0.d0
- jm = j-1
- do i=1,jm
- recsum = recsum+1.d0/(x-xjac(np-i+1))
- enddo
- delx = -p/(pd-recsum*p)
- x = x+delx
- if(abs(delx) < eps) goto 31
- enddo
- 31 continue
- xjac(np-j+1) = x
- xlast = x
- enddo
- do i=1,np
- xmin = 2.d0
- do j=i,np
- if(xjac(j) < xmin) then
- xmin = xjac(j)
- jmin = j
- endif
- enddo
- if(jmin /= i) then
- swap = xjac(i)
- xjac(i) = xjac(jmin)
- xjac(jmin) = swap
- endif
- enddo
-
- end subroutine jacg
-
-!
-!=====================================================================
-!
-
- subroutine jacobf (poly,pder,polym1,pderm1,polym2,pderm2,n,alp,bet,x)
-
-!=======================================================================
-!
-! Computes the Jacobi polynomial of degree n and its derivative at x
-!
-!=======================================================================
-
- implicit none
-
- double precision poly,pder,polym1,pderm1,polym2,pderm2,alp,bet,x
- integer n
-
- double precision apb,polyl,pderl,dk,a1,a2,b3,a3,a4,polyn,pdern,psave,pdsave
- integer k
-
- apb = alp+bet
- poly = 1.d0
- pder = 0.d0
- psave = 0.d0
- pdsave = 0.d0
-
- if (n == 0) return
-
- polyl = poly
- pderl = pder
- poly = (alp-bet+(apb+2.d0)*x)/2.d0
- pder = (apb+2.d0)/2.d0
- if (n == 1) return
-
- do k=2,n
- dk = dble(k)
- a1 = 2.d0*dk*(dk+apb)*(2.d0*dk+apb-2.d0)
- a2 = (2.d0*dk+apb-1.d0)*(alp**2-bet**2)
- b3 = (2.d0*dk+apb-2.d0)
- a3 = b3*(b3+1.d0)*(b3+2.d0)
- a4 = 2.d0*(dk+alp-1.d0)*(dk+bet-1.d0)*(2.d0*dk+apb)
- polyn = ((a2+a3*x)*poly-a4*polyl)/a1
- pdern = ((a2+a3*x)*pder-a4*pderl+a3*poly)/a1
- psave = polyl
- pdsave = pderl
- polyl = poly
- poly = polyn
- pderl = pder
- pder = pdern
- enddo
-
- polym1 = polyl
- pderm1 = pderl
- polym2 = psave
- pderm2 = pdsave
-
- end subroutine jacobf
-
-!
-!------------------------------------------------------------------------
-!
-
- double precision FUNCTION PNDLEG (Z,N)
-
-!------------------------------------------------------------------------
-!
-! Compute the derivative of the Nth order Legendre polynomial at Z.
-! Based on the recursion formula for the Legendre polynomials.
-!
-!------------------------------------------------------------------------
- implicit none
-
- double precision z
- integer n
-
- double precision P1,P2,P1D,P2D,P3D,FK,P3
- integer k
-
- P1 = 1.d0
- P2 = Z
- P1D = 0.d0
- P2D = 1.d0
- P3D = 1.d0
-
- do K = 1, N-1
- FK = dble(K)
- P3 = ((2.d0*FK+1.d0)*Z*P2 - FK*P1)/(FK+1.d0)
- P3D = ((2.d0*FK+1.d0)*P2 + (2.d0*FK+1.d0)*Z*P2D - FK*P1D) / (FK+1.d0)
- P1 = P2
- P2 = P3
- P1D = P2D
- P2D = P3D
- enddo
-
- PNDLEG = P3D
-
- end function pndleg
-
-!
-!------------------------------------------------------------------------
-!
-
- double precision FUNCTION PNLEG (Z,N)
-
-!------------------------------------------------------------------------
-!
-! Compute the value of the Nth order Legendre polynomial at Z.
-! Based on the recursion formula for the Legendre polynomials.
-!
-!------------------------------------------------------------------------
- implicit none
-
- double precision z
- integer n
-
- double precision P1,P2,P3,FK
- integer k
-
- P1 = 1.d0
- P2 = Z
- P3 = P2
-
- do K = 1, N-1
- FK = dble(K)
- P3 = ((2.d0*FK+1.d0)*Z*P2 - FK*P1)/(FK+1.d0)
- P1 = P2
- P2 = P3
- enddo
-
- PNLEG = P3
-
- end function pnleg
-
-!
-!------------------------------------------------------------------------
-!
-
- double precision function pnormj (n,alpha,beta)
-
- implicit none
-
- double precision alpha,beta
- integer n
-
- double precision one,two,dn,const,prod,dindx,frac
- double precision, external :: gammaf
- integer i
-
- one = 1.d0
- two = 2.d0
- dn = dble(n)
- const = alpha+beta+one
-
- if (n <= 1) then
- prod = gammaf(dn+alpha)*gammaf(dn+beta)
- prod = prod/(gammaf(dn)*gammaf(dn+alpha+beta))
- pnormj = prod * two**const/(two*dn+const)
- return
- endif
-
- prod = gammaf(alpha+one)*gammaf(beta+one)
- prod = prod/(two*(one+const)*gammaf(const+one))
- prod = prod*(one+alpha)*(two+alpha)
- prod = prod*(one+beta)*(two+beta)
-
- do i=3,n
- dindx = dble(i)
- frac = (dindx+alpha)*(dindx+beta)/(dindx*(dindx+alpha+beta))
- prod = prod*frac
- enddo
-
- pnormj = prod * two**const/(two*dn+const)
-
- end function pnormj
-
-!
-!------------------------------------------------------------------------
-!
-
- subroutine zwgjd(z,w,np,alpha,beta)
-
-!=======================================================================
-!
-! Z w g j d : Generate np Gauss-Jacobi points and weights
-! associated with Jacobi polynomial of degree n = np-1
-!
-! Note : Coefficients alpha and beta must be greater than -1.
-! ----
-!=======================================================================
-
- implicit none
-
- double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0
-
- integer np
- double precision z(np),w(np)
- double precision alpha,beta
-
- integer n,np1,np2,i
- double precision p,pd,pm1,pdm1,pm2,pdm2
- double precision apb,dnp1,dnp2,fac1,fac2,fac3,fnorm,rcoef
- double precision, external :: gammaf,pnormj
-
- pd = zero
- pm1 = zero
- pm2 = zero
- pdm1 = zero
- pdm2 = zero
-
- n = np-1
- apb = alpha+beta
- p = zero
- pdm1 = zero
-
- if (np <= 0) stop 'minimum number of Gauss points is 1'
-
- if ((alpha <= -one) .or. (beta <= -one)) stop 'alpha and beta must be greater than -1'
-
- if (np == 1) then
- z(1) = (beta-alpha)/(apb+two)
- w(1) = gammaf(alpha+one)*gammaf(beta+one)/gammaf(apb+two) * two**(apb+one)
- return
- endif
-
- call jacg(z,np,alpha,beta)
-
- np1 = n+1
- np2 = n+2
- dnp1 = dble(np1)
- dnp2 = dble(np2)
- fac1 = dnp1+alpha+beta+one
- fac2 = fac1+dnp1
- fac3 = fac2+one
- fnorm = pnormj(np1,alpha,beta)
- rcoef = (fnorm*fac2*fac3)/(two*fac1*dnp2)
- do i=1,np
- call jacobf(p,pd,pm1,pdm1,pm2,pdm2,np2,alpha,beta,z(i))
- w(i) = -rcoef/(p*pdm1)
- enddo
-
- end subroutine zwgjd
-
-!
-!------------------------------------------------------------------------
-!
-
- subroutine zwgljd(z,w,np,alpha,beta)
-
-!=======================================================================
-!
-! Z w g l j d : Generate np Gauss-Lobatto-Jacobi points and the
-! ----------- weights associated with Jacobi polynomials of degree
-! n = np-1.
-!
-! Note : alpha and beta coefficients must be greater than -1.
-! Legendre polynomials are special case of Jacobi polynomials
-! just by setting alpha and beta to 0.
-!
-!=======================================================================
-
- implicit none
-
- double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0
-
- integer np
- double precision alpha,beta
- double precision z(np), w(np)
-
- integer n,nm1,i
- double precision p,pd,pm1,pdm1,pm2,pdm2
- double precision alpg,betg
- double precision, external :: endw1,endw2
-
- p = zero
- pm1 = zero
- pm2 = zero
- pdm1 = zero
- pdm2 = zero
-
- n = np-1
- nm1 = n-1
- pd = zero
-
- if (np <= 1) stop 'minimum number of Gauss-Lobatto points is 2'
-
-! with spectral elements, use at least 3 points
- if (np <= 2) stop 'minimum number of Gauss-Lobatto points for the SEM is 3'
-
- if ((alpha <= -one) .or. (beta <= -one)) stop 'alpha and beta must be greater than -1'
-
- if (nm1 > 0) then
- alpg = alpha+one
- betg = beta+one
- call zwgjd(z(2),w(2),nm1,alpg,betg)
- endif
-
- z(1) = - one
- z(np) = one
-
- do i=2,np-1
- w(i) = w(i)/(one-z(i)**2)
- enddo
-
- call jacobf(p,pd,pm1,pdm1,pm2,pdm2,n,alpha,beta,z(1))
- w(1) = endw1(n,alpha,beta)/(two*pd)
- call jacobf(p,pd,pm1,pdm1,pm2,pdm2,n,alpha,beta,z(np))
- w(np) = endw2(n,alpha,beta)/(two*pd)
-
- end subroutine zwgljd
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!=======================================================================
+!
+! Library to compute the Gauss-Lobatto-Legendre points and weights
+! Based on Gauss-Lobatto routines from M.I.T.
+! Department of Mechanical Engineering
+!
+!=======================================================================
+
+ double precision function endw1(n,alpha,beta)
+
+ implicit none
+
+ integer n
+ double precision alpha,beta
+
+ double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0,three=3.d0,four=4.d0
+ double precision apb,f1,fint1,fint2,f2,di,abn,abnn,a1,a2,a3,f3
+ double precision, external :: gammaf
+ integer i
+
+ f3 = zero
+ apb = alpha+beta
+ if (n == 0) then
+ endw1 = zero
+ return
+ endif
+ f1 = gammaf(alpha+two)*gammaf(beta+one)/gammaf(apb+three)
+ f1 = f1*(apb+two)*two**(apb+two)/two
+ if (n == 1) then
+ endw1 = f1
+ return
+ endif
+ fint1 = gammaf(alpha+two)*gammaf(beta+one)/gammaf(apb+three)
+ fint1 = fint1*two**(apb+two)
+ fint2 = gammaf(alpha+two)*gammaf(beta+two)/gammaf(apb+four)
+ fint2 = fint2*two**(apb+three)
+ f2 = (-two*(beta+two)*fint1 + (apb+four)*fint2) * (apb+three)/four
+ if (n == 2) then
+ endw1 = f2
+ return
+ endif
+ do i=3,n
+ di = dble(i-1)
+ abn = alpha+beta+di
+ abnn = abn+di
+ a1 = -(two*(di+alpha)*(di+beta))/(abn*abnn*(abnn+one))
+ a2 = (two*(alpha-beta))/(abnn*(abnn+two))
+ a3 = (two*(abn+one))/((abnn+two)*(abnn+one))
+ f3 = -(a2*f2+a1*f1)/a3
+ f1 = f2
+ f2 = f3
+ enddo
+ endw1 = f3
+
+ end function endw1
+
+!
+!=======================================================================
+!
+
+ double precision function endw2(n,alpha,beta)
+
+ implicit none
+
+ integer n
+ double precision alpha,beta
+
+ double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0,three=3.d0,four=4.d0
+ double precision apb,f1,fint1,fint2,f2,di,abn,abnn,a1,a2,a3,f3
+ double precision, external :: gammaf
+ integer i
+
+ apb = alpha+beta
+ f3 = zero
+ if (n == 0) then
+ endw2 = zero
+ return
+ endif
+ f1 = gammaf(alpha+one)*gammaf(beta+two)/gammaf(apb+three)
+ f1 = f1*(apb+two)*two**(apb+two)/two
+ if (n == 1) then
+ endw2 = f1
+ return
+ endif
+ fint1 = gammaf(alpha+one)*gammaf(beta+two)/gammaf(apb+three)
+ fint1 = fint1*two**(apb+two)
+ fint2 = gammaf(alpha+two)*gammaf(beta+two)/gammaf(apb+four)
+ fint2 = fint2*two**(apb+three)
+ f2 = (two*(alpha+two)*fint1 - (apb+four)*fint2) * (apb+three)/four
+ if (n == 2) then
+ endw2 = f2
+ return
+ endif
+ do i=3,n
+ di = dble(i-1)
+ abn = alpha+beta+di
+ abnn = abn+di
+ a1 = -(two*(di+alpha)*(di+beta))/(abn*abnn*(abnn+one))
+ a2 = (two*(alpha-beta))/(abnn*(abnn+two))
+ a3 = (two*(abn+one))/((abnn+two)*(abnn+one))
+ f3 = -(a2*f2+a1*f1)/a3
+ f1 = f2
+ f2 = f3
+ enddo
+ endw2 = f3
+
+ end function endw2
+
+!
+!=======================================================================
+!
+
+ double precision function gammaf (x)
+
+ implicit none
+
+ double precision, parameter :: pi = 3.141592653589793d0
+
+ double precision x
+
+ double precision, parameter :: half=0.5d0,one=1.d0,two=2.d0
+
+ gammaf = one
+
+ if (x == -half) gammaf = -two*dsqrt(pi)
+ if (x == half) gammaf = dsqrt(pi)
+ if (x == one ) gammaf = one
+ if (x == two ) gammaf = one
+ if (x == 1.5d0) gammaf = dsqrt(pi)/2.d0
+ if (x == 2.5d0) gammaf = 1.5d0*dsqrt(pi)/2.d0
+ if (x == 3.5d0) gammaf = 2.5d0*1.5d0*dsqrt(pi)/2.d0
+ if (x == 3.d0 ) gammaf = 2.d0
+ if (x == 4.d0 ) gammaf = 6.d0
+ if (x == 5.d0 ) gammaf = 24.d0
+ if (x == 6.d0 ) gammaf = 120.d0
+
+ end function gammaf
+
+!
+!=====================================================================
+!
+
+ subroutine jacg (xjac,np,alpha,beta)
+
+!=======================================================================
+!
+! computes np Gauss points, which are the zeros of the
+! Jacobi polynomial with parameters alpha and beta
+!
+! .alpha = beta = 0.0 -> Legendre points
+! .alpha = beta = -0.5 -> Chebyshev points
+!
+!=======================================================================
+
+ implicit none
+
+ integer np
+ double precision alpha,beta
+ double precision xjac(np)
+
+ integer k,j,i,jmin,jm,n
+ double precision xlast,dth,x,x1,x2,recsum,delx,xmin,swap
+ double precision p,pd,pm1,pdm1,pm2,pdm2
+
+ integer, parameter :: K_MAX_ITER = 10
+ double precision, parameter :: zero = 0.d0, eps = 1.0d-12
+
+ pm1 = zero
+ pm2 = zero
+ pdm1 = zero
+ pdm2 = zero
+
+ xlast = 0.d0
+ n = np-1
+ dth = 4.d0*datan(1.d0)/(2.d0*dble(n)+2.d0)
+ p = 0.d0
+ pd = 0.d0
+ jmin = 0
+ do j=1,np
+ if(j == 1) then
+ x = dcos((2.d0*(dble(j)-1.d0)+1.d0)*dth)
+ else
+ x1 = dcos((2.d0*(dble(j)-1.d0)+1.d0)*dth)
+ x2 = xlast
+ x = (x1+x2)/2.d0
+ endif
+ do k=1,K_MAX_ITER
+ call jacobf (p,pd,pm1,pdm1,pm2,pdm2,np,alpha,beta,x)
+ recsum = 0.d0
+ jm = j-1
+ do i=1,jm
+ recsum = recsum+1.d0/(x-xjac(np-i+1))
+ enddo
+ delx = -p/(pd-recsum*p)
+ x = x+delx
+ if(abs(delx) < eps) goto 31
+ enddo
+ 31 continue
+ xjac(np-j+1) = x
+ xlast = x
+ enddo
+ do i=1,np
+ xmin = 2.d0
+ do j=i,np
+ if(xjac(j) < xmin) then
+ xmin = xjac(j)
+ jmin = j
+ endif
+ enddo
+ if(jmin /= i) then
+ swap = xjac(i)
+ xjac(i) = xjac(jmin)
+ xjac(jmin) = swap
+ endif
+ enddo
+
+ end subroutine jacg
+
+!
+!=====================================================================
+!
+
+ subroutine jacobf (poly,pder,polym1,pderm1,polym2,pderm2,n,alp,bet,x)
+
+!=======================================================================
+!
+! Computes the Jacobi polynomial of degree n and its derivative at x
+!
+!=======================================================================
+
+ implicit none
+
+ double precision poly,pder,polym1,pderm1,polym2,pderm2,alp,bet,x
+ integer n
+
+ double precision apb,polyl,pderl,dk,a1,a2,b3,a3,a4,polyn,pdern,psave,pdsave
+ integer k
+
+ apb = alp+bet
+ poly = 1.d0
+ pder = 0.d0
+ psave = 0.d0
+ pdsave = 0.d0
+
+ if (n == 0) return
+
+ polyl = poly
+ pderl = pder
+ poly = (alp-bet+(apb+2.d0)*x)/2.d0
+ pder = (apb+2.d0)/2.d0
+ if (n == 1) return
+
+ do k=2,n
+ dk = dble(k)
+ a1 = 2.d0*dk*(dk+apb)*(2.d0*dk+apb-2.d0)
+ a2 = (2.d0*dk+apb-1.d0)*(alp**2-bet**2)
+ b3 = (2.d0*dk+apb-2.d0)
+ a3 = b3*(b3+1.d0)*(b3+2.d0)
+ a4 = 2.d0*(dk+alp-1.d0)*(dk+bet-1.d0)*(2.d0*dk+apb)
+ polyn = ((a2+a3*x)*poly-a4*polyl)/a1
+ pdern = ((a2+a3*x)*pder-a4*pderl+a3*poly)/a1
+ psave = polyl
+ pdsave = pderl
+ polyl = poly
+ poly = polyn
+ pderl = pder
+ pder = pdern
+ enddo
+
+ polym1 = polyl
+ pderm1 = pderl
+ polym2 = psave
+ pderm2 = pdsave
+
+ end subroutine jacobf
+
+!
+!------------------------------------------------------------------------
+!
+
+ double precision FUNCTION PNDLEG (Z,N)
+
+!------------------------------------------------------------------------
+!
+! Compute the derivative of the Nth order Legendre polynomial at Z.
+! Based on the recursion formula for the Legendre polynomials.
+!
+!------------------------------------------------------------------------
+ implicit none
+
+ double precision z
+ integer n
+
+ double precision P1,P2,P1D,P2D,P3D,FK,P3
+ integer k
+
+ P1 = 1.d0
+ P2 = Z
+ P1D = 0.d0
+ P2D = 1.d0
+ P3D = 1.d0
+
+ do K = 1, N-1
+ FK = dble(K)
+ P3 = ((2.d0*FK+1.d0)*Z*P2 - FK*P1)/(FK+1.d0)
+ P3D = ((2.d0*FK+1.d0)*P2 + (2.d0*FK+1.d0)*Z*P2D - FK*P1D) / (FK+1.d0)
+ P1 = P2
+ P2 = P3
+ P1D = P2D
+ P2D = P3D
+ enddo
+
+ PNDLEG = P3D
+
+ end function pndleg
+
+!
+!------------------------------------------------------------------------
+!
+
+ double precision FUNCTION PNLEG (Z,N)
+
+!------------------------------------------------------------------------
+!
+! Compute the value of the Nth order Legendre polynomial at Z.
+! Based on the recursion formula for the Legendre polynomials.
+!
+!------------------------------------------------------------------------
+ implicit none
+
+ double precision z
+ integer n
+
+ double precision P1,P2,P3,FK
+ integer k
+
+ P1 = 1.d0
+ P2 = Z
+ P3 = P2
+
+ do K = 1, N-1
+ FK = dble(K)
+ P3 = ((2.d0*FK+1.d0)*Z*P2 - FK*P1)/(FK+1.d0)
+ P1 = P2
+ P2 = P3
+ enddo
+
+ PNLEG = P3
+
+ end function pnleg
+
+!
+!------------------------------------------------------------------------
+!
+
+ double precision function pnormj (n,alpha,beta)
+
+ implicit none
+
+ double precision alpha,beta
+ integer n
+
+ double precision one,two,dn,const,prod,dindx,frac
+ double precision, external :: gammaf
+ integer i
+
+ one = 1.d0
+ two = 2.d0
+ dn = dble(n)
+ const = alpha+beta+one
+
+ if (n <= 1) then
+ prod = gammaf(dn+alpha)*gammaf(dn+beta)
+ prod = prod/(gammaf(dn)*gammaf(dn+alpha+beta))
+ pnormj = prod * two**const/(two*dn+const)
+ return
+ endif
+
+ prod = gammaf(alpha+one)*gammaf(beta+one)
+ prod = prod/(two*(one+const)*gammaf(const+one))
+ prod = prod*(one+alpha)*(two+alpha)
+ prod = prod*(one+beta)*(two+beta)
+
+ do i=3,n
+ dindx = dble(i)
+ frac = (dindx+alpha)*(dindx+beta)/(dindx*(dindx+alpha+beta))
+ prod = prod*frac
+ enddo
+
+ pnormj = prod * two**const/(two*dn+const)
+
+ end function pnormj
+
+!
+!------------------------------------------------------------------------
+!
+
+ subroutine zwgjd(z,w,np,alpha,beta)
+
+!=======================================================================
+!
+! Z w g j d : Generate np Gauss-Jacobi points and weights
+! associated with Jacobi polynomial of degree n = np-1
+!
+! Note : Coefficients alpha and beta must be greater than -1.
+! ----
+!=======================================================================
+
+ implicit none
+
+ double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0
+
+ integer np
+ double precision z(np),w(np)
+ double precision alpha,beta
+
+ integer n,np1,np2,i
+ double precision p,pd,pm1,pdm1,pm2,pdm2
+ double precision apb,dnp1,dnp2,fac1,fac2,fac3,fnorm,rcoef
+ double precision, external :: gammaf,pnormj
+
+ pd = zero
+ pm1 = zero
+ pm2 = zero
+ pdm1 = zero
+ pdm2 = zero
+
+ n = np-1
+ apb = alpha+beta
+ p = zero
+ pdm1 = zero
+
+ if (np <= 0) stop 'minimum number of Gauss points is 1'
+
+ if ((alpha <= -one) .or. (beta <= -one)) stop 'alpha and beta must be greater than -1'
+
+ if (np == 1) then
+ z(1) = (beta-alpha)/(apb+two)
+ w(1) = gammaf(alpha+one)*gammaf(beta+one)/gammaf(apb+two) * two**(apb+one)
+ return
+ endif
+
+ call jacg(z,np,alpha,beta)
+
+ np1 = n+1
+ np2 = n+2
+ dnp1 = dble(np1)
+ dnp2 = dble(np2)
+ fac1 = dnp1+alpha+beta+one
+ fac2 = fac1+dnp1
+ fac3 = fac2+one
+ fnorm = pnormj(np1,alpha,beta)
+ rcoef = (fnorm*fac2*fac3)/(two*fac1*dnp2)
+ do i=1,np
+ call jacobf(p,pd,pm1,pdm1,pm2,pdm2,np2,alpha,beta,z(i))
+ w(i) = -rcoef/(p*pdm1)
+ enddo
+
+ end subroutine zwgjd
+
+!
+!------------------------------------------------------------------------
+!
+
+ subroutine zwgljd(z,w,np,alpha,beta)
+
+!=======================================================================
+!
+! Z w g l j d : Generate np Gauss-Lobatto-Jacobi points and the
+! ----------- weights associated with Jacobi polynomials of degree
+! n = np-1.
+!
+! Note : alpha and beta coefficients must be greater than -1.
+! Legendre polynomials are special case of Jacobi polynomials
+! just by setting alpha and beta to 0.
+!
+!=======================================================================
+
+ implicit none
+
+ double precision, parameter :: zero=0.d0,one=1.d0,two=2.d0
+
+ integer np
+ double precision alpha,beta
+ double precision z(np), w(np)
+
+ integer n,nm1,i
+ double precision p,pd,pm1,pdm1,pm2,pdm2
+ double precision alpg,betg
+ double precision, external :: endw1,endw2
+
+ p = zero
+ pm1 = zero
+ pm2 = zero
+ pdm1 = zero
+ pdm2 = zero
+
+ n = np-1
+ nm1 = n-1
+ pd = zero
+
+ if (np <= 1) stop 'minimum number of Gauss-Lobatto points is 2'
+
+! with spectral elements, use at least 3 points
+ if (np <= 2) stop 'minimum number of Gauss-Lobatto points for the SEM is 3'
+
+ if ((alpha <= -one) .or. (beta <= -one)) stop 'alpha and beta must be greater than -1'
+
+ if (nm1 > 0) then
+ alpg = alpha+one
+ betg = beta+one
+ call zwgjd(z(2),w(2),nm1,alpg,betg)
+ endif
+
+ z(1) = - one
+ z(np) = one
+
+ do i=2,np-1
+ w(i) = w(i)/(one-z(i)**2)
+ enddo
+
+ call jacobf(p,pd,pm1,pdm1,pm2,pdm2,n,alpha,beta,z(1))
+ w(1) = endw1(n,alpha,beta)/(two*pd)
+ call jacobf(p,pd,pm1,pdm1,pm2,pdm2,n,alpha,beta,z(np))
+ w(np) = endw2(n,alpha,beta)/(two*pd)
+
+ end subroutine zwgljd
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/hex_nodes.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/hex_nodes.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/hex_nodes.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,479 +1,479 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine usual_hex_nodes(iaddx,iaddy,iaddz)
-
- implicit none
-
- include "constants.h"
-
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! check that the parameter file is correct
- if(NGNOD /= 8) stop 'elements should have 8 control nodes'
-
-! corner nodes
- iaddx(1)=0
- iaddy(1)=0
- iaddz(1)=0
-
- iaddx(2)=2
- iaddy(2)=0
- iaddz(2)=0
-
- iaddx(3)=2
- iaddy(3)=2
- iaddz(3)=0
-
- iaddx(4)=0
- iaddy(4)=2
- iaddz(4)=0
-
- iaddx(5)=0
- iaddy(5)=0
- iaddz(5)=2
-
- iaddx(6)=2
- iaddy(6)=0
- iaddz(6)=2
-
- iaddx(7)=2
- iaddy(7)=2
- iaddz(7)=2
-
- iaddx(8)=0
- iaddy(8)=2
- iaddz(8)=2
-
- end subroutine usual_hex_nodes
-
- subroutine unusual_hex_nodes1(iaddx,iaddy,iaddz)
-
- implicit none
-
- include "constants.h"
-
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
- iaddx(1)=0
- iaddy(1)=0
- iaddz(1)=0
-
- iaddx(2)=4
- iaddy(2)=0
- iaddz(2)=0
-
- iaddx(3)=4
- iaddy(3)=4
- iaddz(3)=0
-
- iaddx(4)=0
- iaddy(4)=4
- iaddz(4)=0
-
- iaddx(5)=2
- iaddy(5)=0
- iaddz(5)=2
-
- iaddx(6)=4
- iaddy(6)=0
- iaddz(6)=2
-
- iaddx(7)=4
- iaddy(7)=4
- iaddz(7)=2
-
- iaddx(8)=2
- iaddy(8)=4
- iaddz(8)=2
-
- end subroutine unusual_hex_nodes1
-
- subroutine unusual_hex_nodes1p(iaddx,iaddy,iaddz)
-
- implicit none
-
- include "constants.h"
-
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
- iaddx(1)=0
- iaddy(1)=0
- iaddz(1)=0
-
- iaddx(2)=4
- iaddy(2)=0
- iaddz(2)=0
-
- iaddx(3)=4
- iaddy(3)=4
- iaddz(3)=0
-
- iaddx(4)=0
- iaddy(4)=4
- iaddz(4)=0
-
- iaddx(5)=0
- iaddy(5)=0
- iaddz(5)=2
-
- iaddx(6)=2
- iaddy(6)=0
- iaddz(6)=2
-
- iaddx(7)=2
- iaddy(7)=4
- iaddz(7)=2
-
- iaddx(8)=0
- iaddy(8)=4
- iaddz(8)=2
-
- end subroutine unusual_hex_nodes1p
-
- subroutine unusual_hex_nodes2(iaddx,iaddy,iaddz)
-
- implicit none
-
- include "constants.h"
-
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
- iaddx(1)=0
- iaddy(1)=0
- iaddz(1)=0
-
- iaddx(2)=2
- iaddy(2)=0
- iaddz(2)=2
-
- iaddx(3)=2
- iaddy(3)=4
- iaddz(3)=2
-
- iaddx(4)=0
- iaddy(4)=4
- iaddz(4)=0
-
- iaddx(5)=0
- iaddy(5)=0
- iaddz(5)=4
-
- iaddx(6)=2
- iaddy(6)=0
- iaddz(6)=4
-
- iaddx(7)=2
- iaddy(7)=4
- iaddz(7)=4
-
- iaddx(8)=0
- iaddy(8)=4
- iaddz(8)=4
-
- end subroutine unusual_hex_nodes2
-
- subroutine unusual_hex_nodes2p(iaddx,iaddy,iaddz)
-
- implicit none
-
- include "constants.h"
-
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
- iaddx(1)=0
- iaddy(1)=0
- iaddz(1)=0
-
- iaddx(2)=2
- iaddy(2)=0
- iaddz(2)=-2
-
- iaddx(3)=2
- iaddy(3)=4
- iaddz(3)=-2
-
- iaddx(4)=0
- iaddy(4)=4
- iaddz(4)=0
-
- iaddx(5)=0
- iaddy(5)=0
- iaddz(5)=2
-
- iaddx(6)=2
- iaddy(6)=0
- iaddz(6)=2
-
- iaddx(7)=2
- iaddy(7)=4
- iaddz(7)=2
-
- iaddx(8)=0
- iaddy(8)=4
- iaddz(8)=2
-
- end subroutine unusual_hex_nodes2p
-
- subroutine unusual_hex_nodes3(iaddx,iaddy,iaddz)
-
- implicit none
-
- include "constants.h"
-
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
- iaddx(1)=0
- iaddy(1)=0
- iaddz(1)=0
-
- iaddx(2)=2
- iaddy(2)=0
- iaddz(2)=0
-
- iaddx(3)=2
- iaddy(3)=4
- iaddz(3)=0
-
- iaddx(4)=0
- iaddy(4)=4
- iaddz(4)=0
-
- iaddx(5)=0
- iaddy(5)=0
- iaddz(5)=2
-
- iaddx(6)=2
- iaddy(6)=0
- iaddz(6)=2
-
- iaddx(7)=2
- iaddy(7)=4
- iaddz(7)=2
-
- iaddx(8)=0
- iaddy(8)=4
- iaddz(8)=2
-
- end subroutine unusual_hex_nodes3
-
- subroutine unusual_hex_nodes4(iaddx,iaddy,iaddz)
-
- implicit none
-
- include "constants.h"
-
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
- iaddx(1)=0
- iaddy(1)=0
- iaddz(1)=0
-
- iaddx(2)=2
- iaddy(2)=0
- iaddz(2)=0
-
- iaddx(3)=2
- iaddy(3)=4
- iaddz(3)=0
-
- iaddx(4)=0
- iaddy(4)=4
- iaddz(4)=0
-
- iaddx(5)=0
- iaddy(5)=0
- iaddz(5)=2
-
- iaddx(6)=2
- iaddy(6)=0
- iaddz(6)=2
-
- iaddx(7)=2
- iaddy(7)=2
- iaddz(7)=2
-
- iaddx(8)=0
- iaddy(8)=2
- iaddz(8)=2
-
- end subroutine unusual_hex_nodes4
-
- subroutine unusual_hex_nodes4p(iaddx,iaddy,iaddz)
-
- implicit none
-
- include "constants.h"
-
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
- iaddx(1)=0
- iaddy(1)=0
- iaddz(1)=0
-
- iaddx(2)=2
- iaddy(2)=0
- iaddz(2)=0
-
- iaddx(3)=2
- iaddy(3)=4
- iaddz(3)=0
-
- iaddx(4)=0
- iaddy(4)=4
- iaddz(4)=0
-
- iaddx(5)=0
- iaddy(5)=2
- iaddz(5)=2
-
- iaddx(6)=2
- iaddy(6)=2
- iaddz(6)=2
-
- iaddx(7)=2
- iaddy(7)=4
- iaddz(7)=2
-
- iaddx(8)=0
- iaddy(8)=4
- iaddz(8)=2
-
- end subroutine unusual_hex_nodes4p
-
- subroutine unusual_hex_nodes6(iaddx,iaddy,iaddz)
-
- implicit none
-
- include "constants.h"
-
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
- iaddx(1)=0
- iaddy(1)=0
- iaddz(1)=0
-
- iaddx(2)=2
- iaddy(2)=0
- iaddz(2)=0
-
- iaddx(3)=2
- iaddy(3)=2
- iaddz(3)=-2
-
- iaddx(4)=0
- iaddy(4)=2
- iaddz(4)=-2
-
- iaddx(5)=0
- iaddy(5)=0
- iaddz(5)=2
-
- iaddx(6)=2
- iaddy(6)=0
- iaddz(6)=2
-
- iaddx(7)=2
- iaddy(7)=2
- iaddz(7)=2
-
- iaddx(8)=0
- iaddy(8)=2
- iaddz(8)=2
-
- end subroutine unusual_hex_nodes6
-
- subroutine unusual_hex_nodes6p(iaddx,iaddy,iaddz)
-
- implicit none
-
- include "constants.h"
-
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! define the topology of the hexahedral elements
-
-! corner nodes
- iaddx(1)=0
- iaddy(1)=0
- iaddz(1)=0
-
- iaddx(2)=2
- iaddy(2)=0
- iaddz(2)=0
-
- iaddx(3)=2
- iaddy(3)=2
- iaddz(3)=2
-
- iaddx(4)=0
- iaddy(4)=2
- iaddz(4)=2
-
- iaddx(5)=0
- iaddy(5)=0
- iaddz(5)=4
-
- iaddx(6)=2
- iaddy(6)=0
- iaddz(6)=4
-
- iaddx(7)=2
- iaddy(7)=2
- iaddz(7)=4
-
- iaddx(8)=0
- iaddy(8)=2
- iaddz(8)=4
-
- end subroutine unusual_hex_nodes6p
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine usual_hex_nodes(iaddx,iaddy,iaddz)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! check that the parameter file is correct
+ if(NGNOD /= 8) stop 'elements should have 8 control nodes'
+
+! corner nodes
+ iaddx(1)=0
+ iaddy(1)=0
+ iaddz(1)=0
+
+ iaddx(2)=2
+ iaddy(2)=0
+ iaddz(2)=0
+
+ iaddx(3)=2
+ iaddy(3)=2
+ iaddz(3)=0
+
+ iaddx(4)=0
+ iaddy(4)=2
+ iaddz(4)=0
+
+ iaddx(5)=0
+ iaddy(5)=0
+ iaddz(5)=2
+
+ iaddx(6)=2
+ iaddy(6)=0
+ iaddz(6)=2
+
+ iaddx(7)=2
+ iaddy(7)=2
+ iaddz(7)=2
+
+ iaddx(8)=0
+ iaddy(8)=2
+ iaddz(8)=2
+
+ end subroutine usual_hex_nodes
+
+ subroutine unusual_hex_nodes1(iaddx,iaddy,iaddz)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+ iaddx(1)=0
+ iaddy(1)=0
+ iaddz(1)=0
+
+ iaddx(2)=4
+ iaddy(2)=0
+ iaddz(2)=0
+
+ iaddx(3)=4
+ iaddy(3)=4
+ iaddz(3)=0
+
+ iaddx(4)=0
+ iaddy(4)=4
+ iaddz(4)=0
+
+ iaddx(5)=2
+ iaddy(5)=0
+ iaddz(5)=2
+
+ iaddx(6)=4
+ iaddy(6)=0
+ iaddz(6)=2
+
+ iaddx(7)=4
+ iaddy(7)=4
+ iaddz(7)=2
+
+ iaddx(8)=2
+ iaddy(8)=4
+ iaddz(8)=2
+
+ end subroutine unusual_hex_nodes1
+
+ subroutine unusual_hex_nodes1p(iaddx,iaddy,iaddz)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+ iaddx(1)=0
+ iaddy(1)=0
+ iaddz(1)=0
+
+ iaddx(2)=4
+ iaddy(2)=0
+ iaddz(2)=0
+
+ iaddx(3)=4
+ iaddy(3)=4
+ iaddz(3)=0
+
+ iaddx(4)=0
+ iaddy(4)=4
+ iaddz(4)=0
+
+ iaddx(5)=0
+ iaddy(5)=0
+ iaddz(5)=2
+
+ iaddx(6)=2
+ iaddy(6)=0
+ iaddz(6)=2
+
+ iaddx(7)=2
+ iaddy(7)=4
+ iaddz(7)=2
+
+ iaddx(8)=0
+ iaddy(8)=4
+ iaddz(8)=2
+
+ end subroutine unusual_hex_nodes1p
+
+ subroutine unusual_hex_nodes2(iaddx,iaddy,iaddz)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+ iaddx(1)=0
+ iaddy(1)=0
+ iaddz(1)=0
+
+ iaddx(2)=2
+ iaddy(2)=0
+ iaddz(2)=2
+
+ iaddx(3)=2
+ iaddy(3)=4
+ iaddz(3)=2
+
+ iaddx(4)=0
+ iaddy(4)=4
+ iaddz(4)=0
+
+ iaddx(5)=0
+ iaddy(5)=0
+ iaddz(5)=4
+
+ iaddx(6)=2
+ iaddy(6)=0
+ iaddz(6)=4
+
+ iaddx(7)=2
+ iaddy(7)=4
+ iaddz(7)=4
+
+ iaddx(8)=0
+ iaddy(8)=4
+ iaddz(8)=4
+
+ end subroutine unusual_hex_nodes2
+
+ subroutine unusual_hex_nodes2p(iaddx,iaddy,iaddz)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+ iaddx(1)=0
+ iaddy(1)=0
+ iaddz(1)=0
+
+ iaddx(2)=2
+ iaddy(2)=0
+ iaddz(2)=-2
+
+ iaddx(3)=2
+ iaddy(3)=4
+ iaddz(3)=-2
+
+ iaddx(4)=0
+ iaddy(4)=4
+ iaddz(4)=0
+
+ iaddx(5)=0
+ iaddy(5)=0
+ iaddz(5)=2
+
+ iaddx(6)=2
+ iaddy(6)=0
+ iaddz(6)=2
+
+ iaddx(7)=2
+ iaddy(7)=4
+ iaddz(7)=2
+
+ iaddx(8)=0
+ iaddy(8)=4
+ iaddz(8)=2
+
+ end subroutine unusual_hex_nodes2p
+
+ subroutine unusual_hex_nodes3(iaddx,iaddy,iaddz)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+ iaddx(1)=0
+ iaddy(1)=0
+ iaddz(1)=0
+
+ iaddx(2)=2
+ iaddy(2)=0
+ iaddz(2)=0
+
+ iaddx(3)=2
+ iaddy(3)=4
+ iaddz(3)=0
+
+ iaddx(4)=0
+ iaddy(4)=4
+ iaddz(4)=0
+
+ iaddx(5)=0
+ iaddy(5)=0
+ iaddz(5)=2
+
+ iaddx(6)=2
+ iaddy(6)=0
+ iaddz(6)=2
+
+ iaddx(7)=2
+ iaddy(7)=4
+ iaddz(7)=2
+
+ iaddx(8)=0
+ iaddy(8)=4
+ iaddz(8)=2
+
+ end subroutine unusual_hex_nodes3
+
+ subroutine unusual_hex_nodes4(iaddx,iaddy,iaddz)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+ iaddx(1)=0
+ iaddy(1)=0
+ iaddz(1)=0
+
+ iaddx(2)=2
+ iaddy(2)=0
+ iaddz(2)=0
+
+ iaddx(3)=2
+ iaddy(3)=4
+ iaddz(3)=0
+
+ iaddx(4)=0
+ iaddy(4)=4
+ iaddz(4)=0
+
+ iaddx(5)=0
+ iaddy(5)=0
+ iaddz(5)=2
+
+ iaddx(6)=2
+ iaddy(6)=0
+ iaddz(6)=2
+
+ iaddx(7)=2
+ iaddy(7)=2
+ iaddz(7)=2
+
+ iaddx(8)=0
+ iaddy(8)=2
+ iaddz(8)=2
+
+ end subroutine unusual_hex_nodes4
+
+ subroutine unusual_hex_nodes4p(iaddx,iaddy,iaddz)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+ iaddx(1)=0
+ iaddy(1)=0
+ iaddz(1)=0
+
+ iaddx(2)=2
+ iaddy(2)=0
+ iaddz(2)=0
+
+ iaddx(3)=2
+ iaddy(3)=4
+ iaddz(3)=0
+
+ iaddx(4)=0
+ iaddy(4)=4
+ iaddz(4)=0
+
+ iaddx(5)=0
+ iaddy(5)=2
+ iaddz(5)=2
+
+ iaddx(6)=2
+ iaddy(6)=2
+ iaddz(6)=2
+
+ iaddx(7)=2
+ iaddy(7)=4
+ iaddz(7)=2
+
+ iaddx(8)=0
+ iaddy(8)=4
+ iaddz(8)=2
+
+ end subroutine unusual_hex_nodes4p
+
+ subroutine unusual_hex_nodes6(iaddx,iaddy,iaddz)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+ iaddx(1)=0
+ iaddy(1)=0
+ iaddz(1)=0
+
+ iaddx(2)=2
+ iaddy(2)=0
+ iaddz(2)=0
+
+ iaddx(3)=2
+ iaddy(3)=2
+ iaddz(3)=-2
+
+ iaddx(4)=0
+ iaddy(4)=2
+ iaddz(4)=-2
+
+ iaddx(5)=0
+ iaddy(5)=0
+ iaddz(5)=2
+
+ iaddx(6)=2
+ iaddy(6)=0
+ iaddz(6)=2
+
+ iaddx(7)=2
+ iaddy(7)=2
+ iaddz(7)=2
+
+ iaddx(8)=0
+ iaddy(8)=2
+ iaddz(8)=2
+
+ end subroutine unusual_hex_nodes6
+
+ subroutine unusual_hex_nodes6p(iaddx,iaddy,iaddz)
+
+ implicit none
+
+ include "constants.h"
+
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! define the topology of the hexahedral elements
+
+! corner nodes
+ iaddx(1)=0
+ iaddy(1)=0
+ iaddz(1)=0
+
+ iaddx(2)=2
+ iaddy(2)=0
+ iaddz(2)=0
+
+ iaddx(3)=2
+ iaddy(3)=2
+ iaddz(3)=2
+
+ iaddx(4)=0
+ iaddy(4)=2
+ iaddz(4)=2
+
+ iaddx(5)=0
+ iaddy(5)=0
+ iaddz(5)=4
+
+ iaddx(6)=2
+ iaddy(6)=0
+ iaddz(6)=4
+
+ iaddx(7)=2
+ iaddy(7)=2
+ iaddz(7)=4
+
+ iaddx(8)=0
+ iaddy(8)=2
+ iaddz(8)=4
+
+ end subroutine unusual_hex_nodes6p
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/lagrange_poly.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/lagrange_poly.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/lagrange_poly.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,110 +1,110 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine lagrange_any(xi,NGLL,xigll,h,hprime)
-
-! subroutine to compute the Lagrange interpolants based upon the GLL points
-! and their first derivatives at any point xi in [-1,1]
-
- implicit none
-
- integer, intent(in) :: NGLL
- double precision, intent(in) :: xi,xigll(NGLL)
- double precision, intent(out) :: h(NGLL),hprime(NGLL)
-
- integer dgr,i,j
- double precision prod1,prod2
-
- do dgr=1,NGLL
-
- prod1 = 1.0d0
- prod2 = 1.0d0
- do i=1,NGLL
- if(i /= dgr) then
- prod1 = prod1*(xi-xigll(i))
- prod2 = prod2*(xigll(dgr)-xigll(i))
- endif
- enddo
- h(dgr)=prod1/prod2
-
- hprime(dgr)=0.0d0
- do i=1,NGLL
- if(i /= dgr) then
- prod1=1.0d0
- do j=1,NGLL
- if(j /= dgr .and. j /= i) prod1 = prod1*(xi-xigll(j))
- enddo
- hprime(dgr) = hprime(dgr)+prod1
- endif
- enddo
- hprime(dgr) = hprime(dgr)/prod2
-
- enddo
-
- end subroutine lagrange_any
-
-!
-!=====================================================================
-!
-
-! subroutine to compute the derivative of the Lagrange interpolants
-! at the GLL points at any given GLL point
-
- double precision function lagrange_deriv_GLL(I,j,ZGLL,NZ)
-
-!------------------------------------------------------------------------
-!
-! Compute the value of the derivative of the I-th
-! Lagrange interpolant through the
-! NZ Gauss-Lobatto Legendre points ZGLL at point ZGLL(j)
-!
-!------------------------------------------------------------------------
-
- implicit none
-
- integer i,j,nz
- double precision zgll(0:nz-1)
-
- integer degpoly
-
- double precision, external :: pnleg,pndleg
-
- degpoly = nz - 1
- if (i == 0 .and. j == 0) then
- lagrange_deriv_GLL = - dble(degpoly)*(dble(degpoly)+1.d0) / 4.d0
- else if (i == degpoly .and. j == degpoly) then
- lagrange_deriv_GLL = dble(degpoly)*(dble(degpoly)+1.d0) / 4.d0
- else if (i == j) then
- lagrange_deriv_GLL = 0.d0
- else
- lagrange_deriv_GLL = pnleg(zgll(j),degpoly) / &
- (pnleg(zgll(i),degpoly)*(zgll(j)-zgll(i))) &
- + (1.d0-zgll(j)*zgll(j))*pndleg(zgll(j),degpoly) / (dble(degpoly)* &
- (dble(degpoly)+1.d0)*pnleg(zgll(i),degpoly)*(zgll(j)-zgll(i))*(zgll(j)-zgll(i)))
- endif
-
- end function lagrange_deriv_GLL
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine lagrange_any(xi,NGLL,xigll,h,hprime)
+
+! subroutine to compute the Lagrange interpolants based upon the GLL points
+! and their first derivatives at any point xi in [-1,1]
+
+ implicit none
+
+ integer, intent(in) :: NGLL
+ double precision, intent(in) :: xi,xigll(NGLL)
+ double precision, intent(out) :: h(NGLL),hprime(NGLL)
+
+ integer dgr,i,j
+ double precision prod1,prod2
+
+ do dgr=1,NGLL
+
+ prod1 = 1.0d0
+ prod2 = 1.0d0
+ do i=1,NGLL
+ if(i /= dgr) then
+ prod1 = prod1*(xi-xigll(i))
+ prod2 = prod2*(xigll(dgr)-xigll(i))
+ endif
+ enddo
+ h(dgr)=prod1/prod2
+
+ hprime(dgr)=0.0d0
+ do i=1,NGLL
+ if(i /= dgr) then
+ prod1=1.0d0
+ do j=1,NGLL
+ if(j /= dgr .and. j /= i) prod1 = prod1*(xi-xigll(j))
+ enddo
+ hprime(dgr) = hprime(dgr)+prod1
+ endif
+ enddo
+ hprime(dgr) = hprime(dgr)/prod2
+
+ enddo
+
+ end subroutine lagrange_any
+
+!
+!=====================================================================
+!
+
+! subroutine to compute the derivative of the Lagrange interpolants
+! at the GLL points at any given GLL point
+
+ double precision function lagrange_deriv_GLL(I,j,ZGLL,NZ)
+
+!------------------------------------------------------------------------
+!
+! Compute the value of the derivative of the I-th
+! Lagrange interpolant through the
+! NZ Gauss-Lobatto Legendre points ZGLL at point ZGLL(j)
+!
+!------------------------------------------------------------------------
+
+ implicit none
+
+ integer i,j,nz
+ double precision zgll(0:nz-1)
+
+ integer degpoly
+
+ double precision, external :: pnleg,pndleg
+
+ degpoly = nz - 1
+ if (i == 0 .and. j == 0) then
+ lagrange_deriv_GLL = - dble(degpoly)*(dble(degpoly)+1.d0) / 4.d0
+ else if (i == degpoly .and. j == degpoly) then
+ lagrange_deriv_GLL = dble(degpoly)*(dble(degpoly)+1.d0) / 4.d0
+ else if (i == j) then
+ lagrange_deriv_GLL = 0.d0
+ else
+ lagrange_deriv_GLL = pnleg(zgll(j),degpoly) / &
+ (pnleg(zgll(i),degpoly)*(zgll(j)-zgll(i))) &
+ + (1.d0-zgll(j)*zgll(j))*pndleg(zgll(j),degpoly) / (dble(degpoly)* &
+ (dble(degpoly)+1.d0)*pnleg(zgll(i),degpoly)*(zgll(j)-zgll(i))*(zgll(j)-zgll(i)))
+ endif
+
+ end function lagrange_deriv_GLL
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_topo_bathy_file.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_topo_bathy_file.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_topo_bathy_file.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,55 +1,55 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine read_topo_bathy_file(itopo_bathy,NX_TOPO,NY_TOPO,topo_file)
-!
-!---- read topography and bathymetry file once and for all
-!
- implicit none
-
- include "constants.h"
-
- integer NX_TOPO,NY_TOPO
-
-! use integer array to store topography values
- integer itopo_bathy(NX_TOPO,NY_TOPO)
-
- character(len=100) topo_file
-
- integer ix,iy
-
- itopo_bathy(:,:) = 0
-
- open(unit=13,file=topo_file,status='old',action='read')
- do iy=1,NY_TOPO
- do ix=1,NX_TOPO
- read(13,*) itopo_bathy(ix,iy)
- enddo
- enddo
- close(13)
-
- end subroutine read_topo_bathy_file
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine read_topo_bathy_file(itopo_bathy,NX_TOPO,NY_TOPO,topo_file)
+!
+!---- read topography and bathymetry file once and for all
+!
+ implicit none
+
+ include "constants.h"
+
+ integer NX_TOPO,NY_TOPO
+
+! use integer array to store topography values
+ integer itopo_bathy(NX_TOPO,NY_TOPO)
+
+ character(len=100) topo_file
+
+ integer ix,iy
+
+ itopo_bathy(:,:) = 0
+
+ open(unit=13,file=topo_file,status='old',action='read')
+ do iy=1,NY_TOPO
+ do ix=1,NX_TOPO
+ read(13,*) itopo_bathy(ix,iy)
+ enddo
+ enddo
+ close(13)
+
+ end subroutine read_topo_bathy_file
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_value_parameters.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_value_parameters.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_value_parameters.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,290 +1,290 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! read values from parameter file, ignoring white lines and comments
-
- subroutine read_value_integer(value_to_read, name)
-
- implicit none
-
- integer value_to_read
- character(len=*) name
- character(len=100) string_read
- integer ierr
- common /param_err_common/ ierr
-
- call param_read(string_read, len(string_read), name, len(name), ierr)
- if (ierr .ne. 0) return
- read(string_read,*) value_to_read
-
- end subroutine read_value_integer
-
-!--------------------
-
- subroutine read_value_double_precision(value_to_read, name)
-
- implicit none
-
- double precision value_to_read
- character(len=*) name
- character(len=100) string_read
- integer ierr
- common /param_err_common/ ierr
-
- call param_read(string_read, len(string_read), name, len(name), ierr)
- if (ierr .ne. 0) return
- read(string_read,*) value_to_read
-
- end subroutine read_value_double_precision
-
-!--------------------
-
- subroutine read_value_logical(value_to_read, name)
-
- implicit none
-
- logical value_to_read
- character(len=*) name
- character(len=100) string_read
- integer ierr
- common /param_err_common/ ierr
-
- call param_read(string_read, len(string_read), name, len(name), ierr)
- if (ierr .ne. 0) return
- read(string_read,*) value_to_read
-
- end subroutine read_value_logical
-
-!--------------------
-
- subroutine read_value_string(value_to_read, name)
-
- implicit none
-
- character(len=*) value_to_read
- character(len=*) name
- character(len=100) string_read
- integer ierr
- common /param_err_common/ ierr
-
- call param_read(string_read, len(string_read), name, len(name), ierr)
- if (ierr .ne. 0) return
- value_to_read = string_read
-
- end subroutine read_value_string
-
-!--------------------
-
- subroutine open_parameter_file()
-
- include 'constants.h'
- integer ierr
- common /param_err_common/ ierr
- character(len=50) filename
- filename = IN_DATA_FILES_PATH(1:len_trim(IN_DATA_FILES_PATH))//'Par_file'
-
- call param_open(filename, len(filename), ierr);
- if (ierr .ne. 0) return
-
- end subroutine open_parameter_file
-
-!--------------------
-
- subroutine close_parameter_file()
-
- call param_close();
-
- end subroutine close_parameter_file
-
-!--------------------
-
- integer function err_occurred()
-
- integer ierr
- common /param_err_common/ ierr
-
- err_occurred = ierr
-
- end function err_occurred
-
-!--------------------
-
-
-!
-! unused routines:
-!
-
-
-! subroutine read_value_integer(value_to_read, name)
-!
-! implicit none
-!
-! integer value_to_read
-! character(len=*) name
-! character(len=256) string_read
-!
-! call unused_string(name)
-!
-! call read_next_line(string_read)
-! read(string_read,*) value_to_read
-!
-! end subroutine read_value_integer
-!
-!!--------------------
-!
-! subroutine read_value_double_precision(value_to_read, name)
-!
-! implicit none
-!
-! double precision value_to_read
-! character(len=*) name
-! character(len=256) string_read
-!
-! call unused_string(name)
-!
-! call read_next_line(string_read)
-! read(string_read,*) value_to_read
-!
-! end subroutine read_value_double_precision
-!
-!!--------------------
-!
-! subroutine read_value_logical(value_to_read, name)
-!
-! implicit none
-!
-! logical value_to_read
-! character(len=*) name
-! character(len=256) string_read
-!
-! call unused_string(name)
-!
-! call read_next_line(string_read)
-! read(string_read,*) value_to_read
-!
-! end subroutine read_value_logical
-!
-!!--------------------
-!
-! subroutine read_value_string(value_to_read, name)
-!
-! implicit none
-!
-! character(len=*) value_to_read
-! character(len=*) name
-! character(len=256) string_read
-!
-! call unused_string(name)
-!
-! call read_next_line(string_read)
-! value_to_read = string_read
-!
-! end subroutine read_value_string
-!
-!!--------------------
-!
-! subroutine read_next_line(string_read)
-!
-! implicit none
-!
-! include "constants.h"
-!
-! character(len=256) string_read
-!
-! integer index_equal_sign,ios
-!
-! do
-! read(unit=IIN,fmt="(a256)",iostat=ios) string_read
-! if(ios /= 0) stop 'error while reading parameter file'
-!
-!! suppress leading white spaces, if any
-! string_read = adjustl(string_read)
-!
-!! suppress trailing carriage return (ASCII code 13) if any (e.g. if input text file coming from Windows/DOS)
-! if(index(string_read,achar(13)) > 0) string_read = string_read(1:index(string_read,achar(13))-1)
-!
-!! exit loop when we find the first line that is not a comment or a white line
-! if(len_trim(string_read) == 0) cycle
-! if(string_read(1:1) /= '#') exit
-!
-! enddo
-!
-!! suppress trailing white spaces, if any
-! string_read = string_read(1:len_trim(string_read))
-!
-!! suppress trailing comments, if any
-! if(index(string_read,'#') > 0) string_read = string_read(1:index(string_read,'#')-1)
-!
-!! suppress leading junk (up to the first equal sign, included)
-! index_equal_sign = index(string_read,'=')
-! if(index_equal_sign <= 1 .or. index_equal_sign == len_trim(string_read)) stop 'incorrect syntax detected in Par_file'
-! string_read = string_read(index_equal_sign + 1:len_trim(string_read))
-!
-!! suppress leading and trailing white spaces again, if any, after having suppressed the leading junk
-! string_read = adjustl(string_read)
-! string_read = string_read(1:len_trim(string_read))
-!
-! end subroutine read_next_line
-!
-!!--------------------
-!
-! subroutine open_parameter_file
-!
-! include "constants.h"
-!
-! open(unit=IIN,file='in_data_files/Par_file',status='old',action='read')
-!
-! end subroutine open_parameter_file
-!
-!!--------------------
-!
-! subroutine close_parameter_file
-!
-! include "constants.h"
-!
-! close(IIN)
-!
-! end subroutine close_parameter_file
-!
-!!--------------------
-!
-! integer function err_occurred()
-!
-! err_occurred = 0
-!
-! end function err_occurred
-!
-!!--------------------
-!
-!! dummy subroutine to avoid warnings about variable not used in other subroutines
-! subroutine unused_string(s)
-!
-! character(len=*) s
-!
-! if (len(s) == 1) continue
-!
-! end subroutine unused_string
-!
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! read values from parameter file, ignoring white lines and comments
+
+ subroutine read_value_integer(value_to_read, name)
+
+ implicit none
+
+ integer value_to_read
+ character(len=*) name
+ character(len=100) string_read
+ integer ierr
+ common /param_err_common/ ierr
+
+ call param_read(string_read, len(string_read), name, len(name), ierr)
+ if (ierr .ne. 0) return
+ read(string_read,*) value_to_read
+
+ end subroutine read_value_integer
+
+!--------------------
+
+ subroutine read_value_double_precision(value_to_read, name)
+
+ implicit none
+
+ double precision value_to_read
+ character(len=*) name
+ character(len=100) string_read
+ integer ierr
+ common /param_err_common/ ierr
+
+ call param_read(string_read, len(string_read), name, len(name), ierr)
+ if (ierr .ne. 0) return
+ read(string_read,*) value_to_read
+
+ end subroutine read_value_double_precision
+
+!--------------------
+
+ subroutine read_value_logical(value_to_read, name)
+
+ implicit none
+
+ logical value_to_read
+ character(len=*) name
+ character(len=100) string_read
+ integer ierr
+ common /param_err_common/ ierr
+
+ call param_read(string_read, len(string_read), name, len(name), ierr)
+ if (ierr .ne. 0) return
+ read(string_read,*) value_to_read
+
+ end subroutine read_value_logical
+
+!--------------------
+
+ subroutine read_value_string(value_to_read, name)
+
+ implicit none
+
+ character(len=*) value_to_read
+ character(len=*) name
+ character(len=100) string_read
+ integer ierr
+ common /param_err_common/ ierr
+
+ call param_read(string_read, len(string_read), name, len(name), ierr)
+ if (ierr .ne. 0) return
+ value_to_read = string_read
+
+ end subroutine read_value_string
+
+!--------------------
+
+ subroutine open_parameter_file()
+
+ include 'constants.h'
+ integer ierr
+ common /param_err_common/ ierr
+ character(len=50) filename
+ filename = IN_DATA_FILES_PATH(1:len_trim(IN_DATA_FILES_PATH))//'Par_file'
+
+ call param_open(filename, len(filename), ierr);
+ if (ierr .ne. 0) return
+
+ end subroutine open_parameter_file
+
+!--------------------
+
+ subroutine close_parameter_file()
+
+ call param_close();
+
+ end subroutine close_parameter_file
+
+!--------------------
+
+ integer function err_occurred()
+
+ integer ierr
+ common /param_err_common/ ierr
+
+ err_occurred = ierr
+
+ end function err_occurred
+
+!--------------------
+
+
+!
+! unused routines:
+!
+
+
+! subroutine read_value_integer(value_to_read, name)
+!
+! implicit none
+!
+! integer value_to_read
+! character(len=*) name
+! character(len=256) string_read
+!
+! call unused_string(name)
+!
+! call read_next_line(string_read)
+! read(string_read,*) value_to_read
+!
+! end subroutine read_value_integer
+!
+!!--------------------
+!
+! subroutine read_value_double_precision(value_to_read, name)
+!
+! implicit none
+!
+! double precision value_to_read
+! character(len=*) name
+! character(len=256) string_read
+!
+! call unused_string(name)
+!
+! call read_next_line(string_read)
+! read(string_read,*) value_to_read
+!
+! end subroutine read_value_double_precision
+!
+!!--------------------
+!
+! subroutine read_value_logical(value_to_read, name)
+!
+! implicit none
+!
+! logical value_to_read
+! character(len=*) name
+! character(len=256) string_read
+!
+! call unused_string(name)
+!
+! call read_next_line(string_read)
+! read(string_read,*) value_to_read
+!
+! end subroutine read_value_logical
+!
+!!--------------------
+!
+! subroutine read_value_string(value_to_read, name)
+!
+! implicit none
+!
+! character(len=*) value_to_read
+! character(len=*) name
+! character(len=256) string_read
+!
+! call unused_string(name)
+!
+! call read_next_line(string_read)
+! value_to_read = string_read
+!
+! end subroutine read_value_string
+!
+!!--------------------
+!
+! subroutine read_next_line(string_read)
+!
+! implicit none
+!
+! include "constants.h"
+!
+! character(len=256) string_read
+!
+! integer index_equal_sign,ios
+!
+! do
+! read(unit=IIN,fmt="(a256)",iostat=ios) string_read
+! if(ios /= 0) stop 'error while reading parameter file'
+!
+!! suppress leading white spaces, if any
+! string_read = adjustl(string_read)
+!
+!! suppress trailing carriage return (ASCII code 13) if any (e.g. if input text file coming from Windows/DOS)
+! if(index(string_read,achar(13)) > 0) string_read = string_read(1:index(string_read,achar(13))-1)
+!
+!! exit loop when we find the first line that is not a comment or a white line
+! if(len_trim(string_read) == 0) cycle
+! if(string_read(1:1) /= '#') exit
+!
+! enddo
+!
+!! suppress trailing white spaces, if any
+! string_read = string_read(1:len_trim(string_read))
+!
+!! suppress trailing comments, if any
+! if(index(string_read,'#') > 0) string_read = string_read(1:index(string_read,'#')-1)
+!
+!! suppress leading junk (up to the first equal sign, included)
+! index_equal_sign = index(string_read,'=')
+! if(index_equal_sign <= 1 .or. index_equal_sign == len_trim(string_read)) stop 'incorrect syntax detected in Par_file'
+! string_read = string_read(index_equal_sign + 1:len_trim(string_read))
+!
+!! suppress leading and trailing white spaces again, if any, after having suppressed the leading junk
+! string_read = adjustl(string_read)
+! string_read = string_read(1:len_trim(string_read))
+!
+! end subroutine read_next_line
+!
+!!--------------------
+!
+! subroutine open_parameter_file
+!
+! include "constants.h"
+!
+! open(unit=IIN,file='in_data_files/Par_file',status='old',action='read')
+!
+! end subroutine open_parameter_file
+!
+!!--------------------
+!
+! subroutine close_parameter_file
+!
+! include "constants.h"
+!
+! close(IIN)
+!
+! end subroutine close_parameter_file
+!
+!!--------------------
+!
+! integer function err_occurred()
+!
+! err_occurred = 0
+!
+! end function err_occurred
+!
+!!--------------------
+!
+!! dummy subroutine to avoid warnings about variable not used in other subroutines
+! subroutine unused_string(s)
+!
+! character(len=*) s
+!
+! if (len(s) == 1) continue
+!
+! end subroutine unused_string
+!
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/recompute_jacobian.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/recompute_jacobian.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/recompute_jacobian.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,158 +1,158 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! recompute 3D jacobian at a given point for a 8-node element
-
- subroutine recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
-
- implicit none
-
- include "constants.h"
-
- double precision x,y,z,xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
- double precision xi,eta,gamma,jacobian
-
-! coordinates of the control points
- double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
-
-! 3D shape functions and their derivatives at receiver
- double precision shape3D(NGNOD)
- double precision dershape3D(NDIM,NGNOD)
-
- double precision xxi,yxi,zxi
- double precision xeta,yeta,zeta
- double precision xgamma,ygamma,zgamma
- double precision ra1,ra2,rb1,rb2,rc1,rc2
-
- integer ia
-
-! for 8-node element
- double precision, parameter :: ONE_EIGHTH = 0.125d0
-
-! recompute jacobian for any (xi,eta,gamma) point, not necessarily a GLL point
-
-! check that the parameter file is correct
- if(NGNOD /= 8) stop 'elements should have 8 control nodes'
-
-! ***
-! *** create the 3D shape functions and the Jacobian for an 8-node element
-! ***
-
-!--- case of an 8-node 3D element (Dhatt-Touzot p. 115)
-
- ra1 = one + xi
- ra2 = one - xi
-
- rb1 = one + eta
- rb2 = one - eta
-
- rc1 = one + gamma
- rc2 = one - gamma
-
- shape3D(1) = ONE_EIGHTH*ra2*rb2*rc2
- shape3D(2) = ONE_EIGHTH*ra1*rb2*rc2
- shape3D(3) = ONE_EIGHTH*ra1*rb1*rc2
- shape3D(4) = ONE_EIGHTH*ra2*rb1*rc2
- shape3D(5) = ONE_EIGHTH*ra2*rb2*rc1
- shape3D(6) = ONE_EIGHTH*ra1*rb2*rc1
- shape3D(7) = ONE_EIGHTH*ra1*rb1*rc1
- shape3D(8) = ONE_EIGHTH*ra2*rb1*rc1
-
- dershape3D(1,1) = - ONE_EIGHTH*rb2*rc2
- dershape3D(1,2) = ONE_EIGHTH*rb2*rc2
- dershape3D(1,3) = ONE_EIGHTH*rb1*rc2
- dershape3D(1,4) = - ONE_EIGHTH*rb1*rc2
- dershape3D(1,5) = - ONE_EIGHTH*rb2*rc1
- dershape3D(1,6) = ONE_EIGHTH*rb2*rc1
- dershape3D(1,7) = ONE_EIGHTH*rb1*rc1
- dershape3D(1,8) = - ONE_EIGHTH*rb1*rc1
-
- dershape3D(2,1) = - ONE_EIGHTH*ra2*rc2
- dershape3D(2,2) = - ONE_EIGHTH*ra1*rc2
- dershape3D(2,3) = ONE_EIGHTH*ra1*rc2
- dershape3D(2,4) = ONE_EIGHTH*ra2*rc2
- dershape3D(2,5) = - ONE_EIGHTH*ra2*rc1
- dershape3D(2,6) = - ONE_EIGHTH*ra1*rc1
- dershape3D(2,7) = ONE_EIGHTH*ra1*rc1
- dershape3D(2,8) = ONE_EIGHTH*ra2*rc1
-
- dershape3D(3,1) = - ONE_EIGHTH*ra2*rb2
- dershape3D(3,2) = - ONE_EIGHTH*ra1*rb2
- dershape3D(3,3) = - ONE_EIGHTH*ra1*rb1
- dershape3D(3,4) = - ONE_EIGHTH*ra2*rb1
- dershape3D(3,5) = ONE_EIGHTH*ra2*rb2
- dershape3D(3,6) = ONE_EIGHTH*ra1*rb2
- dershape3D(3,7) = ONE_EIGHTH*ra1*rb1
- dershape3D(3,8) = ONE_EIGHTH*ra2*rb1
-
-! compute coordinates and jacobian matrix
- x=ZERO
- y=ZERO
- z=ZERO
- xxi=ZERO
- xeta=ZERO
- xgamma=ZERO
- yxi=ZERO
- yeta=ZERO
- ygamma=ZERO
- zxi=ZERO
- zeta=ZERO
- zgamma=ZERO
-
- do ia=1,NGNOD
- x=x+shape3D(ia)*xelm(ia)
- y=y+shape3D(ia)*yelm(ia)
- z=z+shape3D(ia)*zelm(ia)
-
- xxi=xxi+dershape3D(1,ia)*xelm(ia)
- xeta=xeta+dershape3D(2,ia)*xelm(ia)
- xgamma=xgamma+dershape3D(3,ia)*xelm(ia)
- yxi=yxi+dershape3D(1,ia)*yelm(ia)
- yeta=yeta+dershape3D(2,ia)*yelm(ia)
- ygamma=ygamma+dershape3D(3,ia)*yelm(ia)
- zxi=zxi+dershape3D(1,ia)*zelm(ia)
- zeta=zeta+dershape3D(2,ia)*zelm(ia)
- zgamma=zgamma+dershape3D(3,ia)*zelm(ia)
- enddo
-
- jacobian = xxi*(yeta*zgamma-ygamma*zeta) - xeta*(yxi*zgamma-ygamma*zxi) + xgamma*(yxi*zeta-yeta*zxi)
-
- if(jacobian <= ZERO) stop '3D Jacobian undefined'
-
-! invert the relation (Fletcher p. 50 vol. 2)
- xix=(yeta*zgamma-ygamma*zeta)/jacobian
- xiy=(xgamma*zeta-xeta*zgamma)/jacobian
- xiz=(xeta*ygamma-xgamma*yeta)/jacobian
- etax=(ygamma*zxi-yxi*zgamma)/jacobian
- etay=(xxi*zgamma-xgamma*zxi)/jacobian
- etaz=(xgamma*yxi-xxi*ygamma)/jacobian
- gammax=(yxi*zeta-yeta*zxi)/jacobian
- gammay=(xeta*zxi-xxi*zeta)/jacobian
- gammaz=(xxi*yeta-xeta*yxi)/jacobian
-
- end subroutine recompute_jacobian
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! recompute 3D jacobian at a given point for a 8-node element
+
+ subroutine recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+
+ implicit none
+
+ include "constants.h"
+
+ double precision x,y,z,xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+ double precision xi,eta,gamma,jacobian
+
+! coordinates of the control points
+ double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
+
+! 3D shape functions and their derivatives at receiver
+ double precision shape3D(NGNOD)
+ double precision dershape3D(NDIM,NGNOD)
+
+ double precision xxi,yxi,zxi
+ double precision xeta,yeta,zeta
+ double precision xgamma,ygamma,zgamma
+ double precision ra1,ra2,rb1,rb2,rc1,rc2
+
+ integer ia
+
+! for 8-node element
+ double precision, parameter :: ONE_EIGHTH = 0.125d0
+
+! recompute jacobian for any (xi,eta,gamma) point, not necessarily a GLL point
+
+! check that the parameter file is correct
+ if(NGNOD /= 8) stop 'elements should have 8 control nodes'
+
+! ***
+! *** create the 3D shape functions and the Jacobian for an 8-node element
+! ***
+
+!--- case of an 8-node 3D element (Dhatt-Touzot p. 115)
+
+ ra1 = one + xi
+ ra2 = one - xi
+
+ rb1 = one + eta
+ rb2 = one - eta
+
+ rc1 = one + gamma
+ rc2 = one - gamma
+
+ shape3D(1) = ONE_EIGHTH*ra2*rb2*rc2
+ shape3D(2) = ONE_EIGHTH*ra1*rb2*rc2
+ shape3D(3) = ONE_EIGHTH*ra1*rb1*rc2
+ shape3D(4) = ONE_EIGHTH*ra2*rb1*rc2
+ shape3D(5) = ONE_EIGHTH*ra2*rb2*rc1
+ shape3D(6) = ONE_EIGHTH*ra1*rb2*rc1
+ shape3D(7) = ONE_EIGHTH*ra1*rb1*rc1
+ shape3D(8) = ONE_EIGHTH*ra2*rb1*rc1
+
+ dershape3D(1,1) = - ONE_EIGHTH*rb2*rc2
+ dershape3D(1,2) = ONE_EIGHTH*rb2*rc2
+ dershape3D(1,3) = ONE_EIGHTH*rb1*rc2
+ dershape3D(1,4) = - ONE_EIGHTH*rb1*rc2
+ dershape3D(1,5) = - ONE_EIGHTH*rb2*rc1
+ dershape3D(1,6) = ONE_EIGHTH*rb2*rc1
+ dershape3D(1,7) = ONE_EIGHTH*rb1*rc1
+ dershape3D(1,8) = - ONE_EIGHTH*rb1*rc1
+
+ dershape3D(2,1) = - ONE_EIGHTH*ra2*rc2
+ dershape3D(2,2) = - ONE_EIGHTH*ra1*rc2
+ dershape3D(2,3) = ONE_EIGHTH*ra1*rc2
+ dershape3D(2,4) = ONE_EIGHTH*ra2*rc2
+ dershape3D(2,5) = - ONE_EIGHTH*ra2*rc1
+ dershape3D(2,6) = - ONE_EIGHTH*ra1*rc1
+ dershape3D(2,7) = ONE_EIGHTH*ra1*rc1
+ dershape3D(2,8) = ONE_EIGHTH*ra2*rc1
+
+ dershape3D(3,1) = - ONE_EIGHTH*ra2*rb2
+ dershape3D(3,2) = - ONE_EIGHTH*ra1*rb2
+ dershape3D(3,3) = - ONE_EIGHTH*ra1*rb1
+ dershape3D(3,4) = - ONE_EIGHTH*ra2*rb1
+ dershape3D(3,5) = ONE_EIGHTH*ra2*rb2
+ dershape3D(3,6) = ONE_EIGHTH*ra1*rb2
+ dershape3D(3,7) = ONE_EIGHTH*ra1*rb1
+ dershape3D(3,8) = ONE_EIGHTH*ra2*rb1
+
+! compute coordinates and jacobian matrix
+ x=ZERO
+ y=ZERO
+ z=ZERO
+ xxi=ZERO
+ xeta=ZERO
+ xgamma=ZERO
+ yxi=ZERO
+ yeta=ZERO
+ ygamma=ZERO
+ zxi=ZERO
+ zeta=ZERO
+ zgamma=ZERO
+
+ do ia=1,NGNOD
+ x=x+shape3D(ia)*xelm(ia)
+ y=y+shape3D(ia)*yelm(ia)
+ z=z+shape3D(ia)*zelm(ia)
+
+ xxi=xxi+dershape3D(1,ia)*xelm(ia)
+ xeta=xeta+dershape3D(2,ia)*xelm(ia)
+ xgamma=xgamma+dershape3D(3,ia)*xelm(ia)
+ yxi=yxi+dershape3D(1,ia)*yelm(ia)
+ yeta=yeta+dershape3D(2,ia)*yelm(ia)
+ ygamma=ygamma+dershape3D(3,ia)*yelm(ia)
+ zxi=zxi+dershape3D(1,ia)*zelm(ia)
+ zeta=zeta+dershape3D(2,ia)*zelm(ia)
+ zgamma=zgamma+dershape3D(3,ia)*zelm(ia)
+ enddo
+
+ jacobian = xxi*(yeta*zgamma-ygamma*zeta) - xeta*(yxi*zgamma-ygamma*zxi) + xgamma*(yxi*zeta-yeta*zxi)
+
+ if(jacobian <= ZERO) stop '3D Jacobian undefined'
+
+! invert the relation (Fletcher p. 50 vol. 2)
+ xix=(yeta*zgamma-ygamma*zeta)/jacobian
+ xiy=(xgamma*zeta-xeta*zgamma)/jacobian
+ xiz=(xeta*ygamma-xgamma*yeta)/jacobian
+ etax=(ygamma*zxi-yxi*zgamma)/jacobian
+ etay=(xxi*zgamma-xgamma*zxi)/jacobian
+ etaz=(xgamma*yxi-xxi*ygamma)/jacobian
+ gammax=(yxi*zeta-yeta*zxi)/jacobian
+ gammay=(xeta*zxi-xxi*zeta)/jacobian
+ gammaz=(xxi*yeta-xeta*yxi)/jacobian
+
+ end subroutine recompute_jacobian
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_header_file.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_header_file.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_header_file.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,145 +1,145 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! save header file OUTPUT_FILES/values_from_mesher.h
-
- subroutine save_header_file(NSPEC_AB,NGLOB_AB,NPROC, &
- ATTENUATION,ANISOTROPY,NSTEP,DT, &
- SIMULATION_TYPE,static_memory_size,nfaces_surface_glob_ext_mesh)
-
- implicit none
-
- include "constants.h"
-
- integer NSPEC_AB,NGLOB_AB,NPROC,NSTEP,SIMULATION_TYPE
-
- logical ATTENUATION,ANISOTROPY
-
- double precision DT
-
- double precision :: static_memory_size
-
- character(len=256) HEADER_FILE
-
- integer :: nfaces_surface_glob_ext_mesh
-
-! copy number of elements and points in an include file for the solver
- call get_value_string(HEADER_FILE, 'solver.HEADER_FILE', &
- OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/values_from_mesher.h')
-
- open(unit=IOUT,file=HEADER_FILE,status='unknown')
- write(IOUT,*)
- write(IOUT,*) '!'
- write(IOUT,*) '! purely informative use'
- write(IOUT,*) '!'
- write(IOUT,*) '! mesh statistics:'
- write(IOUT,*) '! ---------------'
- write(IOUT,*) '!'
- write(IOUT,*) '! note: '
- write(IOUT,*) '! the values are only approximate and differ for different processes'
- write(IOUT,*) '! because the CUBIT + SCOTCH mesh has'
- write(IOUT,*) '! a different number of mesh elements and points in each slice'
- write(IOUT,*) '!'
- write(IOUT,*) '! number of processors = ',NPROC
- write(IOUT,*) '!'
- write(IOUT,*) '! number of ES nodes = ',real(NPROC)/8.
- write(IOUT,*) '! percentage of total 640 ES nodes = ',100.*(real(NPROC)/8.)/640.,' %'
- write(IOUT,*) '! total memory available on these ES nodes (Gb) = ',16.*real(NPROC)/8.
- write(IOUT,*) '!'
- write(IOUT,*) '! min vector length = ',NGLLSQUARE
- write(IOUT,*) '! min critical vector length = ',NGLLSQUARE_NDIM
- write(IOUT,*) '!'
- write(IOUT,*) '! master process: total points per AB slice = ',NGLOB_AB
- write(IOUT,*) '! total elements per AB slice = (will be read in external file)'
- write(IOUT,*) '! total points per AB slice = (will be read in external file)'
- write(IOUT,*) '!'
- write(IOUT,*) '! total for full mesh:'
- write(IOUT,*) '! -------------------'
- write(IOUT,*) '!'
- write(IOUT,*) '!'
- write(IOUT,*) '! number of time steps = ',NSTEP
- write(IOUT,*) '!'
- write(IOUT,*) '! time step = ',DT
- write(IOUT,*) '!'
- write(IOUT,*) '! attenuation uses:'
- if(ATTENUATION) then
- write(IOUT,*) '! NSPEC_ATTENUATION = ', NSPEC_AB
- else
- write(IOUT,*) '! NSPEC_ATTENUATION = ', 1
- endif
- write(IOUT,*) '! '
- write(IOUT,*) '! anisotropy uses:'
- if(ANISOTROPY) then
- write(IOUT,*) '! NSPEC_ANISO = ',NSPEC_AB
- else
- write(IOUT,*) '! NSPEC_ANISO = ', 1
- endif
- write(IOUT,*) '! '
- write(IOUT,*) '! adjoint uses:'
- if (SIMULATION_TYPE == 3) then
- write(IOUT,*) '! NSPEC_ADJOINT = ', NSPEC_AB
- write(IOUT,*) '! NGLOB_ADJOINT = ', NGLOB_AB
- else
- write(IOUT,*) '! NSPEC_ADJOINT = ', 1
- write(IOUT,*) '! NGLOB_ADJOINT = ', 1
- endif
- write(IOUT,*) '! '
- write(IOUT,*) '! approximate least memory needed by the solver:'
- write(IOUT,*) '! ----------------------------------------------'
- write(IOUT,*) '!'
- write(IOUT,*) '! size of static arrays for the biggest slice = ',static_memory_size/1048576.d0,' MB'
- write(IOUT,*) '! = ',static_memory_size/1073741824.d0,' GB'
- write(IOUT,*) '!'
- write(IOUT,*) '! (should be below to 80% of 1.5 GB = 1.2 GB on pangu'
- write(IOUT,*) '! at Caltech, and below and typically equal to 85% of 2 GB = 1.7 GB'
- write(IOUT,*) '! on Marenostrum in Barcelona)'
- write(IOUT,*) '! (if significantly more, the job will not run by lack of memory)'
- write(IOUT,*) '! (if significantly less, you waste a significant amount of memory)'
- write(IOUT,*) '!'
- write(IOUT,*)
- close(IOUT)
-
-! copy number of surface elements in an include file for the movies
- if( nfaces_surface_glob_ext_mesh > 0 ) then
-
- call get_value_string(HEADER_FILE, 'solver.HEADER_FILE', &
- OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/surface_from_mesher.h')
-
- open(unit=IOUT,file=HEADER_FILE,status='unknown')
- write(IOUT,*) '!'
- write(IOUT,*) '! this is the parameter file for static compilation for movie creation'
- write(IOUT,*) '!'
- write(IOUT,*) '! number of elements containing surface faces '
- write(IOUT,*) '! ---------------'
- write(IOUT,*)
- write(IOUT,*) 'integer,parameter :: NSPEC_SURFACE_EXT_MESH = ',nfaces_surface_glob_ext_mesh
- write(IOUT,*)
- close(IOUT)
-
- endif
-
- end subroutine save_header_file
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! save header file OUTPUT_FILES/values_from_mesher.h
+
+ subroutine save_header_file(NSPEC_AB,NGLOB_AB,NPROC, &
+ ATTENUATION,ANISOTROPY,NSTEP,DT, &
+ SIMULATION_TYPE,static_memory_size,nfaces_surface_glob_ext_mesh)
+
+ implicit none
+
+ include "constants.h"
+
+ integer NSPEC_AB,NGLOB_AB,NPROC,NSTEP,SIMULATION_TYPE
+
+ logical ATTENUATION,ANISOTROPY
+
+ double precision DT
+
+ double precision :: static_memory_size
+
+ character(len=256) HEADER_FILE
+
+ integer :: nfaces_surface_glob_ext_mesh
+
+! copy number of elements and points in an include file for the solver
+ call get_value_string(HEADER_FILE, 'solver.HEADER_FILE', &
+ OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/values_from_mesher.h')
+
+ open(unit=IOUT,file=HEADER_FILE,status='unknown')
+ write(IOUT,*)
+ write(IOUT,*) '!'
+ write(IOUT,*) '! purely informative use'
+ write(IOUT,*) '!'
+ write(IOUT,*) '! mesh statistics:'
+ write(IOUT,*) '! ---------------'
+ write(IOUT,*) '!'
+ write(IOUT,*) '! note: '
+ write(IOUT,*) '! the values are only approximate and differ for different processes'
+ write(IOUT,*) '! because the CUBIT + SCOTCH mesh has'
+ write(IOUT,*) '! a different number of mesh elements and points in each slice'
+ write(IOUT,*) '!'
+ write(IOUT,*) '! number of processors = ',NPROC
+ write(IOUT,*) '!'
+ write(IOUT,*) '! number of ES nodes = ',real(NPROC)/8.
+ write(IOUT,*) '! percentage of total 640 ES nodes = ',100.*(real(NPROC)/8.)/640.,' %'
+ write(IOUT,*) '! total memory available on these ES nodes (Gb) = ',16.*real(NPROC)/8.
+ write(IOUT,*) '!'
+ write(IOUT,*) '! min vector length = ',NGLLSQUARE
+ write(IOUT,*) '! min critical vector length = ',NGLLSQUARE_NDIM
+ write(IOUT,*) '!'
+ write(IOUT,*) '! master process: total points per AB slice = ',NGLOB_AB
+ write(IOUT,*) '! total elements per AB slice = (will be read in external file)'
+ write(IOUT,*) '! total points per AB slice = (will be read in external file)'
+ write(IOUT,*) '!'
+ write(IOUT,*) '! total for full mesh:'
+ write(IOUT,*) '! -------------------'
+ write(IOUT,*) '!'
+ write(IOUT,*) '!'
+ write(IOUT,*) '! number of time steps = ',NSTEP
+ write(IOUT,*) '!'
+ write(IOUT,*) '! time step = ',DT
+ write(IOUT,*) '!'
+ write(IOUT,*) '! attenuation uses:'
+ if(ATTENUATION) then
+ write(IOUT,*) '! NSPEC_ATTENUATION = ', NSPEC_AB
+ else
+ write(IOUT,*) '! NSPEC_ATTENUATION = ', 1
+ endif
+ write(IOUT,*) '! '
+ write(IOUT,*) '! anisotropy uses:'
+ if(ANISOTROPY) then
+ write(IOUT,*) '! NSPEC_ANISO = ',NSPEC_AB
+ else
+ write(IOUT,*) '! NSPEC_ANISO = ', 1
+ endif
+ write(IOUT,*) '! '
+ write(IOUT,*) '! adjoint uses:'
+ if (SIMULATION_TYPE == 3) then
+ write(IOUT,*) '! NSPEC_ADJOINT = ', NSPEC_AB
+ write(IOUT,*) '! NGLOB_ADJOINT = ', NGLOB_AB
+ else
+ write(IOUT,*) '! NSPEC_ADJOINT = ', 1
+ write(IOUT,*) '! NGLOB_ADJOINT = ', 1
+ endif
+ write(IOUT,*) '! '
+ write(IOUT,*) '! approximate least memory needed by the solver:'
+ write(IOUT,*) '! ----------------------------------------------'
+ write(IOUT,*) '!'
+ write(IOUT,*) '! size of static arrays for the biggest slice = ',static_memory_size/1048576.d0,' MB'
+ write(IOUT,*) '! = ',static_memory_size/1073741824.d0,' GB'
+ write(IOUT,*) '!'
+ write(IOUT,*) '! (should be below to 80% of 1.5 GB = 1.2 GB on pangu'
+ write(IOUT,*) '! at Caltech, and below and typically equal to 85% of 2 GB = 1.7 GB'
+ write(IOUT,*) '! on Marenostrum in Barcelona)'
+ write(IOUT,*) '! (if significantly more, the job will not run by lack of memory)'
+ write(IOUT,*) '! (if significantly less, you waste a significant amount of memory)'
+ write(IOUT,*) '!'
+ write(IOUT,*)
+ close(IOUT)
+
+! copy number of surface elements in an include file for the movies
+ if( nfaces_surface_glob_ext_mesh > 0 ) then
+
+ call get_value_string(HEADER_FILE, 'solver.HEADER_FILE', &
+ OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH))//'/surface_from_mesher.h')
+
+ open(unit=IOUT,file=HEADER_FILE,status='unknown')
+ write(IOUT,*) '!'
+ write(IOUT,*) '! this is the parameter file for static compilation for movie creation'
+ write(IOUT,*) '!'
+ write(IOUT,*) '! number of elements containing surface faces '
+ write(IOUT,*) '! ---------------'
+ write(IOUT,*)
+ write(IOUT,*) 'integer,parameter :: NSPEC_SURFACE_EXT_MESH = ',nfaces_surface_glob_ext_mesh
+ write(IOUT,*)
+ close(IOUT)
+
+ endif
+
+ end subroutine save_header_file
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/smooth_vol_data.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/smooth_vol_data.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/smooth_vol_data.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -200,7 +200,7 @@
print*," element size : ",element_size
print*," smoothing sigma_h , sigma_v: ",sigma_h,sigma_v
! scalelength: approximately S ~ sigma * sqrt(8.0) for a gaussian smoothing
- print*," smoothing scalelengths horizontal, vertical : ",sigma_h*sqrt(8.0),sigma_v*sqrt(8.0)
+ print*," smoothing scalelengths horizontal, vertical : ",sigma_h*sqrt(8.0),sigma_v*sqrt(8.0)
print*," in dir : ",trim(indir)
print*," out dir: ",trim(outdir)
endif
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/utm_geo.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/utm_geo.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/utm_geo.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,259 +1,259 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!=====================================================================
-!
-! UTM (Universal Transverse Mercator) projection from the USGS
-!
-!=====================================================================
-
- subroutine utm_geo(rlon,rlat,rx,ry,UTM_PROJECTION_ZONE,iway,SUPPRESS_UTM_PROJECTION)
-
-! convert geodetic longitude and latitude to UTM, and back
-! use iway = ILONGLAT2UTM for long/lat to UTM, IUTM2LONGLAT for UTM to lat/long
-! a list of UTM zones of the world is available at www.dmap.co.uk/utmworld.htm
-
- implicit none
-
- include "constants.h"
-
-!
-!-----CAMx v2.03
-!
-! UTM_GEO performs UTM to geodetic (long/lat) translation, and back.
-!
-! This is a Fortran version of the BASIC program "Transverse Mercator
-! Conversion", Copyright 1986, Norman J. Berls (Stefan Musarra, 2/94)
-! Based on algorithm taken from "Map Projections Used by the USGS"
-! by John P. Snyder, Geological Survey Bulletin 1532, USDI.
-!
-! Input/Output arguments:
-!
-! rlon Longitude (deg, negative for West)
-! rlat Latitude (deg)
-! rx UTM easting (m)
-! ry UTM northing (m)
-! UTM_PROJECTION_ZONE UTM zone
-! iway Conversion type
-! ILONGLAT2UTM = geodetic to UTM
-! IUTM2LONGLAT = UTM to geodetic
-!
-
- integer UTM_PROJECTION_ZONE,iway
- double precision rx,ry,rlon,rlat
- logical SUPPRESS_UTM_PROJECTION
-
- double precision, parameter :: degrad=PI/180.d0, raddeg=180.d0/PI
- double precision, parameter :: semimaj=6378206.4d0, semimin=6356583.8d0
- double precision, parameter :: scfa=0.9996d0
-
-! some extracts about UTM:
-!
-! There are 60 longitudinal projection zones numbered 1 to 60 starting at 180°W.
-! Each of these zones is 6 degrees wide, apart from a few exceptions around Norway and Svalbard.
-! There are 20 latitudinal zones spanning the latitudes 80°S to 84°N and denoted
-! by the letters C to X, ommitting the letter O.
-! Each of these is 8 degrees south-north, apart from zone X which is 12 degrees south-north.
-!
-! To change the UTM zone and the hemisphere in which the
-! calculations are carried out, need to change the fortran code and recompile. The UTM zone is described
-! actually by the central meridian of that zone, i.e. the longitude at the midpoint of the zone, 3 degrees
-! from either zone boundary.
-! To change hemisphere need to change the "north" variable:
-! - north=0 for northern hemisphere and
-! - north=10000000 (10000km) for southern hemisphere. values must be in metres i.e. north=10000000.
-!
-! Note that the UTM grids are actually Mercators which
-! employ the standard UTM scale factor 0.9996 and set the
-! Easting Origin to 500,000;
-! the Northing origin in the southern
-! hemisphere is kept at 0 rather than set to 10,000,000
-! and this gives a uniform scale across the equator if the
-! normal convention of selecting the Base Latitude (origin)
-! at the equator (0 deg.) is followed. Northings are
-! positive in the northern hemisphere and negative in the
-! southern hemisphere.
- double precision, parameter :: north=0.d0
- double precision, parameter :: east=500000.d0
-
- double precision e2,e4,e6,ep2,xx,yy,dlat,dlon,zone,cm,cmr,delam
- double precision f1,f2,f3,f4,rm,rn,t,c,a,e1,u,rlat1,dlat1,c1,t1,rn1,r1,d
- double precision rx_save,ry_save,rlon_save,rlat_save
-
- ! checks if conversion to utm has to be done
- if(SUPPRESS_UTM_PROJECTION) then
- if (iway == ILONGLAT2UTM) then
- rx = rlon
- ry = rlat
- else
- rlon = rx
- rlat = ry
- endif
- return
- endif
-
-! save original parameters
- rlon_save = rlon
- rlat_save = rlat
- rx_save = rx
- ry_save = ry
-
- xx = 0.d0
- yy = 0.d0
- dlat = 0.d0
- dlon = 0.d0
-
-! define parameters of reference ellipsoid
- e2=1.0-(semimin/semimaj)**2.0
- e4=e2*e2
- e6=e2*e4
- ep2=e2/(1.-e2)
-
- if (iway == IUTM2LONGLAT) then
- xx = rx
- yy = ry
- else
- dlon = rlon
- dlat = rlat
- endif
-!
-!----- Set Zone parameters
-!
- zone = dble(UTM_PROJECTION_ZONE)
- ! sets central meridian for this zone
- cm = zone*6.0 - 183.0
- cmr = cm*degrad
-!
-!---- Lat/Lon to UTM conversion
-!
- if (iway == ILONGLAT2UTM) then
-
- rlon = degrad*dlon
- rlat = degrad*dlat
-
- delam = dlon - cm
- if (delam < -180.) delam = delam + 360.
- if (delam > 180.) delam = delam - 360.
- delam = delam*degrad
-
- f1 = (1. - e2/4. - 3.*e4/64. - 5.*e6/256)*rlat
- f2 = 3.*e2/8. + 3.*e4/32. + 45.*e6/1024.
- f2 = f2*sin(2.*rlat)
- f3 = 15.*e4/256.*45.*e6/1024.
- f3 = f3*sin(4.*rlat)
- f4 = 35.*e6/3072.
- f4 = f4*sin(6.*rlat)
- rm = semimaj*(f1 - f2 + f3 - f4)
- if (dlat == 90. .or. dlat == -90.) then
- xx = 0.
- yy = scfa*rm
- else
- rn = semimaj/sqrt(1. - e2*sin(rlat)**2)
- t = tan(rlat)**2
- c = ep2*cos(rlat)**2
- a = cos(rlat)*delam
-
- f1 = (1. - t + c)*a**3/6.
- f2 = 5. - 18.*t + t**2 + 72.*c - 58.*ep2
- f2 = f2*a**5/120.
- xx = scfa*rn*(a + f1 + f2)
- f1 = a**2/2.
- f2 = 5. - t + 9.*c + 4.*c**2
- f2 = f2*a**4/24.
- f3 = 61. - 58.*t + t**2 + 600.*c - 330.*ep2
- f3 = f3*a**6/720.
- yy = scfa*(rm + rn*tan(rlat)*(f1 + f2 + f3))
- endif
- xx = xx + east
- yy = yy + north
-
-!
-!---- UTM to Lat/Lon conversion
-!
- else
-
- xx = xx - east
- yy = yy - north
- e1 = sqrt(1. - e2)
- e1 = (1. - e1)/(1. + e1)
- rm = yy/scfa
- u = 1. - e2/4. - 3.*e4/64. - 5.*e6/256.
- u = rm/(semimaj*u)
-
- f1 = 3.*e1/2. - 27.*e1**3./32.
- f1 = f1*sin(2.*u)
- f2 = 21.*e1**2/16. - 55.*e1**4/32.
- f2 = f2*sin(4.*u)
- f3 = 151.*e1**3./96.
- f3 = f3*sin(6.*u)
- rlat1 = u + f1 + f2 + f3
- dlat1 = rlat1*raddeg
- if (dlat1 >= 90. .or. dlat1 <= -90.) then
- dlat1 = dmin1(dlat1,dble(90.) )
- dlat1 = dmax1(dlat1,dble(-90.) )
- dlon = cm
- else
- c1 = ep2*cos(rlat1)**2.
- t1 = tan(rlat1)**2.
- f1 = 1. - e2*sin(rlat1)**2.
- rn1 = semimaj/sqrt(f1)
- r1 = semimaj*(1. - e2)/sqrt(f1**3)
- d = xx/(rn1*scfa)
-
- f1 = rn1*tan(rlat1)/r1
- f2 = d**2/2.
- f3 = 5.*3.*t1 + 10.*c1 - 4.*c1**2 - 9.*ep2
- f3 = f3*d**2*d**2/24.
- f4 = 61. + 90.*t1 + 298.*c1 + 45.*t1**2. - 252.*ep2 - 3.*c1**2
- f4 = f4*(d**2)**3./720.
- rlat = rlat1 - f1*(f2 - f3 + f4)
- dlat = rlat*raddeg
-
- f1 = 1. + 2.*t1 + c1
- f1 = f1*d**2*d/6.
- f2 = 5. - 2.*c1 + 28.*t1 - 3.*c1**2 + 8.*ep2 + 24.*t1**2.
- f2 = f2*(d**2)**2*d/120.
- rlon = cmr + (d - f1 + f2)/cos(rlat1)
- dlon = rlon*raddeg
- if (dlon < -180.) dlon = dlon + 360.
- if (dlon > 180.) dlon = dlon - 360.
- endif
- endif
-
- if (iway == IUTM2LONGLAT) then
- rlon = dlon
- rlat = dlat
- rx = rx_save
- ry = ry_save
- else
- rx = xx
- ry = yy
- rlon = rlon_save
- rlat = rlat_save
- endif
-
- end subroutine utm_geo
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!=====================================================================
+!
+! UTM (Universal Transverse Mercator) projection from the USGS
+!
+!=====================================================================
+
+ subroutine utm_geo(rlon,rlat,rx,ry,UTM_PROJECTION_ZONE,iway,SUPPRESS_UTM_PROJECTION)
+
+! convert geodetic longitude and latitude to UTM, and back
+! use iway = ILONGLAT2UTM for long/lat to UTM, IUTM2LONGLAT for UTM to lat/long
+! a list of UTM zones of the world is available at www.dmap.co.uk/utmworld.htm
+
+ implicit none
+
+ include "constants.h"
+
+!
+!-----CAMx v2.03
+!
+! UTM_GEO performs UTM to geodetic (long/lat) translation, and back.
+!
+! This is a Fortran version of the BASIC program "Transverse Mercator
+! Conversion", Copyright 1986, Norman J. Berls (Stefan Musarra, 2/94)
+! Based on algorithm taken from "Map Projections Used by the USGS"
+! by John P. Snyder, Geological Survey Bulletin 1532, USDI.
+!
+! Input/Output arguments:
+!
+! rlon Longitude (deg, negative for West)
+! rlat Latitude (deg)
+! rx UTM easting (m)
+! ry UTM northing (m)
+! UTM_PROJECTION_ZONE UTM zone
+! iway Conversion type
+! ILONGLAT2UTM = geodetic to UTM
+! IUTM2LONGLAT = UTM to geodetic
+!
+
+ integer UTM_PROJECTION_ZONE,iway
+ double precision rx,ry,rlon,rlat
+ logical SUPPRESS_UTM_PROJECTION
+
+ double precision, parameter :: degrad=PI/180.d0, raddeg=180.d0/PI
+ double precision, parameter :: semimaj=6378206.4d0, semimin=6356583.8d0
+ double precision, parameter :: scfa=0.9996d0
+
+! some extracts about UTM:
+!
+! There are 60 longitudinal projection zones numbered 1 to 60 starting at 180°W.
+! Each of these zones is 6 degrees wide, apart from a few exceptions around Norway and Svalbard.
+! There are 20 latitudinal zones spanning the latitudes 80°S to 84°N and denoted
+! by the letters C to X, ommitting the letter O.
+! Each of these is 8 degrees south-north, apart from zone X which is 12 degrees south-north.
+!
+! To change the UTM zone and the hemisphere in which the
+! calculations are carried out, need to change the fortran code and recompile. The UTM zone is described
+! actually by the central meridian of that zone, i.e. the longitude at the midpoint of the zone, 3 degrees
+! from either zone boundary.
+! To change hemisphere need to change the "north" variable:
+! - north=0 for northern hemisphere and
+! - north=10000000 (10000km) for southern hemisphere. values must be in metres i.e. north=10000000.
+!
+! Note that the UTM grids are actually Mercators which
+! employ the standard UTM scale factor 0.9996 and set the
+! Easting Origin to 500,000;
+! the Northing origin in the southern
+! hemisphere is kept at 0 rather than set to 10,000,000
+! and this gives a uniform scale across the equator if the
+! normal convention of selecting the Base Latitude (origin)
+! at the equator (0 deg.) is followed. Northings are
+! positive in the northern hemisphere and negative in the
+! southern hemisphere.
+ double precision, parameter :: north=0.d0
+ double precision, parameter :: east=500000.d0
+
+ double precision e2,e4,e6,ep2,xx,yy,dlat,dlon,zone,cm,cmr,delam
+ double precision f1,f2,f3,f4,rm,rn,t,c,a,e1,u,rlat1,dlat1,c1,t1,rn1,r1,d
+ double precision rx_save,ry_save,rlon_save,rlat_save
+
+ ! checks if conversion to utm has to be done
+ if(SUPPRESS_UTM_PROJECTION) then
+ if (iway == ILONGLAT2UTM) then
+ rx = rlon
+ ry = rlat
+ else
+ rlon = rx
+ rlat = ry
+ endif
+ return
+ endif
+
+! save original parameters
+ rlon_save = rlon
+ rlat_save = rlat
+ rx_save = rx
+ ry_save = ry
+
+ xx = 0.d0
+ yy = 0.d0
+ dlat = 0.d0
+ dlon = 0.d0
+
+! define parameters of reference ellipsoid
+ e2=1.0-(semimin/semimaj)**2.0
+ e4=e2*e2
+ e6=e2*e4
+ ep2=e2/(1.-e2)
+
+ if (iway == IUTM2LONGLAT) then
+ xx = rx
+ yy = ry
+ else
+ dlon = rlon
+ dlat = rlat
+ endif
+!
+!----- Set Zone parameters
+!
+ zone = dble(UTM_PROJECTION_ZONE)
+ ! sets central meridian for this zone
+ cm = zone*6.0 - 183.0
+ cmr = cm*degrad
+!
+!---- Lat/Lon to UTM conversion
+!
+ if (iway == ILONGLAT2UTM) then
+
+ rlon = degrad*dlon
+ rlat = degrad*dlat
+
+ delam = dlon - cm
+ if (delam < -180.) delam = delam + 360.
+ if (delam > 180.) delam = delam - 360.
+ delam = delam*degrad
+
+ f1 = (1. - e2/4. - 3.*e4/64. - 5.*e6/256)*rlat
+ f2 = 3.*e2/8. + 3.*e4/32. + 45.*e6/1024.
+ f2 = f2*sin(2.*rlat)
+ f3 = 15.*e4/256.*45.*e6/1024.
+ f3 = f3*sin(4.*rlat)
+ f4 = 35.*e6/3072.
+ f4 = f4*sin(6.*rlat)
+ rm = semimaj*(f1 - f2 + f3 - f4)
+ if (dlat == 90. .or. dlat == -90.) then
+ xx = 0.
+ yy = scfa*rm
+ else
+ rn = semimaj/sqrt(1. - e2*sin(rlat)**2)
+ t = tan(rlat)**2
+ c = ep2*cos(rlat)**2
+ a = cos(rlat)*delam
+
+ f1 = (1. - t + c)*a**3/6.
+ f2 = 5. - 18.*t + t**2 + 72.*c - 58.*ep2
+ f2 = f2*a**5/120.
+ xx = scfa*rn*(a + f1 + f2)
+ f1 = a**2/2.
+ f2 = 5. - t + 9.*c + 4.*c**2
+ f2 = f2*a**4/24.
+ f3 = 61. - 58.*t + t**2 + 600.*c - 330.*ep2
+ f3 = f3*a**6/720.
+ yy = scfa*(rm + rn*tan(rlat)*(f1 + f2 + f3))
+ endif
+ xx = xx + east
+ yy = yy + north
+
+!
+!---- UTM to Lat/Lon conversion
+!
+ else
+
+ xx = xx - east
+ yy = yy - north
+ e1 = sqrt(1. - e2)
+ e1 = (1. - e1)/(1. + e1)
+ rm = yy/scfa
+ u = 1. - e2/4. - 3.*e4/64. - 5.*e6/256.
+ u = rm/(semimaj*u)
+
+ f1 = 3.*e1/2. - 27.*e1**3./32.
+ f1 = f1*sin(2.*u)
+ f2 = 21.*e1**2/16. - 55.*e1**4/32.
+ f2 = f2*sin(4.*u)
+ f3 = 151.*e1**3./96.
+ f3 = f3*sin(6.*u)
+ rlat1 = u + f1 + f2 + f3
+ dlat1 = rlat1*raddeg
+ if (dlat1 >= 90. .or. dlat1 <= -90.) then
+ dlat1 = dmin1(dlat1,dble(90.) )
+ dlat1 = dmax1(dlat1,dble(-90.) )
+ dlon = cm
+ else
+ c1 = ep2*cos(rlat1)**2.
+ t1 = tan(rlat1)**2.
+ f1 = 1. - e2*sin(rlat1)**2.
+ rn1 = semimaj/sqrt(f1)
+ r1 = semimaj*(1. - e2)/sqrt(f1**3)
+ d = xx/(rn1*scfa)
+
+ f1 = rn1*tan(rlat1)/r1
+ f2 = d**2/2.
+ f3 = 5.*3.*t1 + 10.*c1 - 4.*c1**2 - 9.*ep2
+ f3 = f3*d**2*d**2/24.
+ f4 = 61. + 90.*t1 + 298.*c1 + 45.*t1**2. - 252.*ep2 - 3.*c1**2
+ f4 = f4*(d**2)**3./720.
+ rlat = rlat1 - f1*(f2 - f3 + f4)
+ dlat = rlat*raddeg
+
+ f1 = 1. + 2.*t1 + c1
+ f1 = f1*d**2*d/6.
+ f2 = 5. - 2.*c1 + 28.*t1 - 3.*c1**2 + 8.*ep2 + 24.*t1**2.
+ f2 = f2*(d**2)**2*d/120.
+ rlon = cmr + (d - f1 + f2)/cos(rlat1)
+ dlon = rlon*raddeg
+ if (dlon < -180.) dlon = dlon + 360.
+ if (dlon > 180.) dlon = dlon - 360.
+ endif
+ endif
+
+ if (iway == IUTM2LONGLAT) then
+ rlon = dlon
+ rlat = dlat
+ rx = rx_save
+ ry = ry_save
+ else
+ rx = xx
+ ry = yy
+ rlon = rlon_save
+ rlat = rlat_save
+ endif
+
+ end subroutine utm_geo
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -128,17 +128,17 @@
endif
end subroutine assemble_MPI_vector_ext_mesh
-
+
!
!-------------------------------------------------------------------------------------------------
!
-
+
subroutine assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,array_val, &
- buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,my_neighbours_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh &
- )
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
! sends data
@@ -195,17 +195,17 @@
endif
end subroutine assemble_MPI_vector_ext_mesh_s
-
+
!
!-------------------------------------------------------------------------------------------------
!
- subroutine assemble_MPI_vector_ext_mesh_send_cuda(NPROC, &
- buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh, &
- my_neighbours_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+ subroutine assemble_MPI_vector_send_cuda(NPROC, &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
! sends data
! note: array to assemble already filled into buffer_send_vector_ext_mesh array
@@ -226,9 +226,10 @@
integer iinterface
- ! here we have to assemble all the contributions between partitions using MPI
+ ! note: preparation of the contribution between partitions using MPI
+ ! already done in transfer_boun_accel routine
- ! assemble only if more than one partition
+ ! send only if more than one partition
if(NPROC > 1) then
! send messages
@@ -249,7 +250,7 @@
endif
- end subroutine assemble_MPI_vector_ext_mesh_send_cuda
+ end subroutine assemble_MPI_vector_send_cuda
!
!-------------------------------------------------------------------------------------------------
@@ -315,11 +316,12 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine assemble_MPI_vector_ext_mesh_write_cuda(NPROC,NGLOB_AB,array_val, Mesh_pointer, &
- buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh,&
- FORWARD_OR_ADJOINT )
+ subroutine assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,array_val, Mesh_pointer, &
+ buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh, &
+ FORWARD_OR_ADJOINT )
! waits for data to receive and assembles
@@ -344,7 +346,7 @@
integer iinterface ! ipoin
integer FORWARD_OR_ADJOINT
-
+
! here we have to assemble all the contributions between partitions using MPI
! assemble only if more than one partition
@@ -356,10 +358,11 @@
enddo
! adding contributions of neighbours
- call transfer_and_assemble_accel_to_device(Mesh_pointer, array_val, buffer_recv_vector_ext_mesh, &
- num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh,&
- ibool_interfaces_ext_mesh,FORWARD_OR_ADJOINT)
-
+ call transfer_asmbl_accel_to_device(Mesh_pointer, array_val, buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,&
+ ibool_interfaces_ext_mesh,FORWARD_OR_ADJOINT)
+
! This step is done via previous function transfer_and_assemble...
! do iinterface = 1, num_interfaces_ext_mesh
! do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
@@ -367,7 +370,7 @@
! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
! enddo
! enddo
-
+
! wait for communications completion (send)
do iinterface = 1, num_interfaces_ext_mesh
call wait_req(request_send_vector_ext_mesh(iinterface))
@@ -375,13 +378,13 @@
endif
- end subroutine assemble_MPI_vector_ext_mesh_write_cuda
+ end subroutine assemble_MPI_vector_write_cuda
!
!-------------------------------------------------------------------------------------------------
!
- subroutine assemble_MPI_scalar_ext_mesh_send_cuda(NPROC, &
+ subroutine assemble_MPI_scalar_send_cuda(NPROC, &
buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh, &
@@ -410,8 +413,8 @@
! sends only if more than one partition
if(NPROC > 1) then
- ! note: partition border copy into the buffer has already been done
- ! by routine transfer_boundary_potential_from_device()
+ ! note: partition border copy into the buffer has already been done
+ ! by routine transfer_boun_pot_from_device()
! send messages
do iinterface = 1, num_interfaces_ext_mesh
@@ -434,13 +437,13 @@
endif
- end subroutine assemble_MPI_scalar_ext_mesh_send_cuda
+ end subroutine assemble_MPI_scalar_send_cuda
!
!-------------------------------------------------------------------------------------------------
!
- subroutine assemble_MPI_scalar_ext_mesh_write_cuda(NPROC,NGLOB_AB,array_val, &
+ subroutine assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,array_val, &
Mesh_pointer, &
buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
max_nibool_interfaces_ext_mesh, &
@@ -457,7 +460,7 @@
integer :: NPROC
integer :: NGLOB_AB
integer(kind=8) :: Mesh_pointer
-
+
integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
! array to assemble
@@ -484,7 +487,7 @@
enddo
! adding contributions of neighbours
- call transfer_and_assemble_potential_to_device(Mesh_pointer, array_val, buffer_recv_scalar_ext_mesh, &
+ call transfer_asmbl_pot_to_device(Mesh_pointer, array_val, buffer_recv_scalar_ext_mesh, &
num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh,&
ibool_interfaces_ext_mesh,FORWARD_OR_ADJOINT)
@@ -504,6 +507,6 @@
endif
- end subroutine assemble_MPI_scalar_ext_mesh_write_cuda
+ end subroutine assemble_MPI_scalar_write_cuda
-
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/comp_source_time_function.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/comp_source_time_function.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/comp_source_time_function.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,95 +1,95 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- double precision function comp_source_time_function(t,hdur)
-
- implicit none
-
- include "constants.h"
-
- double precision t,hdur
-
- double precision, external :: netlib_specfun_erf
-
- ! quasi Heaviside, small Gaussian moment-rate tensor with hdur
- comp_source_time_function = 0.5d0*(1.0d0 + netlib_specfun_erf(t/hdur))
-
- end function comp_source_time_function
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- double precision function comp_source_time_function_gauss(t,hdur)
-
- implicit none
-
- include "constants.h"
-
- double precision :: t,hdur
- double precision :: hdur_decay
- double precision,parameter :: SOURCE_DECAY_STRONG = 2.0d0/SOURCE_DECAY_MIMIC_TRIANGLE
-
- ! note: hdur given is hdur_gaussian = hdur/SOURCE_DECAY_MIMIC_TRIANGLE
- ! and SOURCE_DECAY_MIMIC_TRIANGLE ~ 1.68
- hdur_decay = hdur
-
- ! this here uses a stronger gaussian decay rate (empirical value) to avoid non-zero onset times;
- ! however, it should mimik a triangle source time function...
- !hdur_decay = hdur / SOURCE_DECAY_STRONG
-
- ! note: a nonzero time to start the simulation with would lead to more high-frequency noise
- ! due to the (spatial) discretization of the point source on the mesh
-
- ! gaussian
- comp_source_time_function_gauss = exp(-(t/hdur_decay)**2)/(sqrt(PI)*hdur_decay)
-
- end function comp_source_time_function_gauss
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- double precision function comp_source_time_function_rickr(t,f0)
-
- implicit none
-
- include "constants.h"
-
- double precision t,f0
-
- ! ricker
- comp_source_time_function_rickr = (1.d0 - 2.d0*PI*PI*f0*f0*t*t ) &
- * exp( -PI*PI*f0*f0*t*t )
-
- !!! another source time function they have called 'ricker' in some old papers,
- !!! e.g., 'Finite-Frequency Kernels Based on Adjoint Methods' by Liu & Tromp, BSSA (2006)
- !!! in order to benchmark those simulations, the following formula is needed.
- ! comp_source_time_function_rickr = -2.d0*PI*PI*f0*f0*f0*t * exp(-PI*PI*f0*f0*t*t)
-
- end function comp_source_time_function_rickr
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ double precision function comp_source_time_function(t,hdur)
+
+ implicit none
+
+ include "constants.h"
+
+ double precision t,hdur
+
+ double precision, external :: netlib_specfun_erf
+
+ ! quasi Heaviside, small Gaussian moment-rate tensor with hdur
+ comp_source_time_function = 0.5d0*(1.0d0 + netlib_specfun_erf(t/hdur))
+
+ end function comp_source_time_function
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ double precision function comp_source_time_function_gauss(t,hdur)
+
+ implicit none
+
+ include "constants.h"
+
+ double precision :: t,hdur
+ double precision :: hdur_decay
+ double precision,parameter :: SOURCE_DECAY_STRONG = 2.0d0/SOURCE_DECAY_MIMIC_TRIANGLE
+
+ ! note: hdur given is hdur_gaussian = hdur/SOURCE_DECAY_MIMIC_TRIANGLE
+ ! and SOURCE_DECAY_MIMIC_TRIANGLE ~ 1.68
+ hdur_decay = hdur
+
+ ! this here uses a stronger gaussian decay rate (empirical value) to avoid non-zero onset times;
+ ! however, it should mimik a triangle source time function...
+ !hdur_decay = hdur / SOURCE_DECAY_STRONG
+
+ ! note: a nonzero time to start the simulation with would lead to more high-frequency noise
+ ! due to the (spatial) discretization of the point source on the mesh
+
+ ! gaussian
+ comp_source_time_function_gauss = exp(-(t/hdur_decay)**2)/(sqrt(PI)*hdur_decay)
+
+ end function comp_source_time_function_gauss
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ double precision function comp_source_time_function_rickr(t,f0)
+
+ implicit none
+
+ include "constants.h"
+
+ double precision t,f0
+
+ ! ricker
+ comp_source_time_function_rickr = (1.d0 - 2.d0*PI*PI*f0*f0*t*t ) &
+ * exp( -PI*PI*f0*f0*t*t )
+
+ !!! another source time function they have called 'ricker' in some old papers,
+ !!! e.g., 'Finite-Frequency Kernels Based on Adjoint Methods' by Liu & Tromp, BSSA (2006)
+ !!! in order to benchmark those simulations, the following formula is needed.
+ ! comp_source_time_function_rickr = -2.d0*PI*PI*f0*f0*f0*t * exp(-PI*PI*f0*f0*t*t)
+
+ end function comp_source_time_function_rickr
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -40,7 +40,8 @@
use specfem_par,only: PRINT_SOURCE_TIME_FUNCTION,stf_used_total, &
xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,&
- station_name,network_name,adj_source_file,nrec_local,number_receiver_global
+ station_name,network_name,adj_source_file,nrec_local,number_receiver_global, &
+ nsources_local
implicit none
include "constants.h"
@@ -75,7 +76,7 @@
!adjoint simulations
integer:: SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT
logical:: GPU_MODE
- integer(kind=8) :: Mesh_pointer
+ integer(kind=8) :: Mesh_pointer
integer:: nrec
integer,dimension(nrec) :: islice_selected_rec,ispec_selected_rec
integer:: nadj_rec_local
@@ -92,7 +93,7 @@
integer :: isource,iglob,ispec,i,j,k,ier
integer :: irec_local,irec
double precision, dimension(NSOURCES) :: stf_pre_compute
-
+
! adjoint sources in SU format
integer :: it_start,it_end
real(kind=CUSTOM_REAL) :: adj_temp(NSTEP)
@@ -112,7 +113,7 @@
endif
! forward simulations
- if (SIMULATION_TYPE == 1) then
+ if (SIMULATION_TYPE == 1 .and. nsources_local > 0) then
!way 2
if(GPU_MODE) then
@@ -121,21 +122,21 @@
if(USE_FORCE_POINT_SOURCE) then
! precomputes source time function factor
stf_pre_compute(isource) = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr( &
- dble(it-1)*DT-t0-tshift_cmt(isource),hdur(isource))
+ dble(it-1)*DT-t0-tshift_cmt(isource),hdur(isource))
else
stf_pre_compute(isource) = comp_source_time_function_gauss( &
- dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
endif
enddo
- stf_used_total = stf_used_total + sum(stf_pre_compute(:))
+ stf_used_total = stf_used_total + sum(stf_pre_compute(:))
! only implements SIMTYPE=1 and NOISE_TOM=0
! write(*,*) "fortran dt = ", dt
! change dt -> DT
- call compute_add_sources_acoustic_cuda(Mesh_pointer, phase_is_inner, &
+ call compute_add_sources_ac_cuda(Mesh_pointer, phase_is_inner, &
NSOURCES, SIMULATION_TYPE, &
USE_FORCE_POINT_SOURCE, stf_pre_compute, myrank)
endif
-
+
else ! .NOT. GPU_MODE
! adds acoustic sources
@@ -225,7 +226,7 @@
endif ! myrank
enddo ! NSOURCES
- endif ! GPU_MODE
+ endif ! GPU_MODE
endif
! NOTE: adjoint sources and backward wavefield timing:
@@ -331,7 +332,7 @@
enddo
close(IIN_SU1)
endif !if (.not. SU_FORMAT)
-
+
deallocate(adj_sourcearray)
endif ! if(ibool_read_adj_arrays)
@@ -339,11 +340,11 @@
if( it < NSTEP ) then
! receivers act as sources
if( GPU_MODE) then
- call add_sources_acoustic_sim_type_2_or_3_cuda(Mesh_pointer, adj_sourcearrays, &
+ call add_sources_ac_sim_2_or_3_cuda(Mesh_pointer, adj_sourcearrays, &
size(adj_sourcearrays), phase_is_inner, myrank, nrec, &
NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC),&
islice_selected_rec, nadj_rec_local, NTSTEP_BETWEEN_READ_ADJSRC)
- else
+ else
irec_local = 0
do irec = 1,nrec
! add the source (only if this proc carries the source)
@@ -353,14 +354,14 @@
! adds source array
ispec = ispec_selected_rec(irec)
if( ispec_is_acoustic(ispec) ) then
-
+
! checks if element is in phase_is_inner run
if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
do k = 1,NGLLZ
do j = 1,NGLLY
do i = 1,NGLLX
iglob = ibool(i,j,k,ispec)
- ! beware, for acoustic medium, a pressure source would be taking the negative
+ ! beware, for acoustic medium, a pressure source would be taking the negative
! and divide by Kappa of the fluid;
! this would have to be done when constructing the adjoint source.
!
@@ -387,31 +388,31 @@
! thus indexing is NSTEP - it , instead of NSTEP - it - 1
! adjoint simulations
- if (SIMULATION_TYPE == 3) then
+ if (SIMULATION_TYPE == 3 .and. nsources_local > 0) then
! on GPU
if(GPU_MODE) then
- if( NSOURCES > 0 ) then
+ if( NSOURCES > 0 ) then
do isource = 1,NSOURCES
if(USE_FORCE_POINT_SOURCE) then
! precomputes source time function factors
stf_pre_compute(isource) = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr( &
- dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur(isource))
+ dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur(isource))
else
stf_pre_compute(isource) = comp_source_time_function_gauss( &
- dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- endif
+ dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ endif
enddo
stf_used_total = stf_used_total + sum(stf_pre_compute(:))
-
+
! only implements SIMTYPE=3
- call compute_add_sources_acoustic_sim3_cuda(Mesh_pointer, phase_is_inner, &
+ call compute_add_sources_ac_s3_cuda(Mesh_pointer, phase_is_inner, &
NSOURCES, SIMULATION_TYPE, &
USE_FORCE_POINT_SOURCE, stf_pre_compute, myrank)
endif
else ! .NOT. GPU_MODE
-
+
! adds acoustic sources
do isource = 1,NSOURCES
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -40,13 +40,17 @@
xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,&
station_name,network_name,adj_source_file, &
LOCAL_PATH,wgllwgll_xy, &
- num_free_surface_faces,free_surface_ispec,free_surface_ijk,free_surface_jacobian2Dw, &
+ num_free_surface_faces,free_surface_ispec, &
+ free_surface_ijk,free_surface_jacobian2Dw, &
noise_sourcearray,irec_master_noise, &
- normal_x_noise,normal_y_noise,normal_z_noise,mask_noise,noise_surface_movie, &
- nrec_local,number_receiver_global
+ normal_x_noise,normal_y_noise,normal_z_noise, &
+ mask_noise,noise_surface_movie, &
+ nrec_local,number_receiver_global, &
+ nsources_local
use specfem_par_movie,only: &
- store_val_ux_external_mesh,store_val_uy_external_mesh,store_val_uz_external_mesh
+ store_val_ux_external_mesh,store_val_uy_external_mesh, &
+ store_val_uz_external_mesh
implicit none
@@ -95,7 +99,7 @@
real(kind=CUSTOM_REAL),dimension(:,:,:,:,:),allocatable:: adj_sourcearray
real(kind=CUSTOM_REAL) stf_used,stf_used_total_all,time_source
! for GPU_MODE
- real(kind=SIZE_DOUBLE), dimension(NSOURCES) :: stf_pre_compute
+ double precision, dimension(NSOURCES) :: stf_pre_compute
integer :: isource,iglob,i,j,k,ispec
integer :: irec_local,irec, ier
@@ -109,7 +113,7 @@
integer(kind=4) :: i4head(nheader/4) ! 4-byte-integer
real(kind=4) :: r4head(nheader/4) ! 4-byte-real
equivalence (i2head,i4head,r4head) ! share the same 240-byte-memory
- double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY),hgammar(NGLLZ), hpgammar(NGLLZ)
+ double precision :: hxir(NGLLX),hpxir(NGLLX),hetar(NGLLY),hpetar(NGLLY),hgammar(NGLLZ),hpgammar(NGLLZ)
! plotting source time function
if(PRINT_SOURCE_TIME_FUNCTION .and. .not. phase_is_inner ) then
@@ -118,89 +122,93 @@
endif
! forward simulations
- if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0) then
+ if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then
- if(GPU_MODE) then
+ if(GPU_MODE) then
+ do isource = 1,NSOURCES
+ stf_pre_compute(isource) = comp_source_time_function( &
+ dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ enddo
+ ! only implements SIMTYPE=1 and NOISE_TOM=0
+ ! write(*,*) "fortran dt = ", dt
+ ! change dt -> DT
+ call compute_add_sources_el_cuda(Mesh_pointer, &
+ !NSPEC_AB, NGLOB_AB,
+ phase_is_inner,NSOURCES, &
+ !it, DT, t0, &
+ !SIMULATION_TYPE, NSTEP, NOISE_TOMOGRAPHY,&
+ !USE_FORCE_POINT_SOURCE, &
+ stf_pre_compute, myrank)
- do isource = 1,NSOURCES
- stf_pre_compute(isource) = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian)
- enddo
- ! only implements SIMTYPE=1 and NOISE_TOM=0
- ! write(*,*) "fortran dt = ", dt
- ! change dt -> DT
- call compute_add_sources_elastic_cuda(Mesh_pointer, NSPEC_AB, NGLOB_AB, phase_is_inner,&
- NSOURCES, it, DT, t0, SIMULATION_TYPE, NSTEP, NOISE_TOMOGRAPHY,&
- USE_FORCE_POINT_SOURCE, stf_pre_compute, myrank)
+ else ! .NOT. GPU_MODE
- else ! .NOT. GPU_MODE
+ do isource = 1,NSOURCES
- do isource = 1,NSOURCES
+ ! add the source (only if this proc carries the source)
+ if(myrank == islice_selected_source(isource)) then
- ! add the source (only if this proc carries the source)
- if(myrank == islice_selected_source(isource)) then
+ ispec = ispec_selected_source(isource)
- ispec = ispec_selected_source(isource)
+ if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
- if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+ if( ispec_is_elastic(ispec) ) then
- if( ispec_is_elastic(ispec) ) then
+ if(USE_FORCE_POINT_SOURCE) then
- if(USE_FORCE_POINT_SOURCE) then
+ ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+ iglob = ibool(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)), &
+ ispec_selected_source(isource))
- ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
- iglob = ibool(nint(xi_source(isource)), &
- nint(eta_source(isource)), &
- nint(gamma_source(isource)), &
- ispec_selected_source(isource))
+ f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
- f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
+ !if (it == 1 .and. myrank == 0) then
+ ! write(IMAIN,*) 'using a source of dominant frequency ',f0
+ ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+ ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+ !endif
- !if (it == 1 .and. myrank == 0) then
- ! write(IMAIN,*) 'using a source of dominant frequency ',f0
- ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
- ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
- !endif
+ ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
+ stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
- ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
- stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
+ ! we use a force in a single direction along one of the components:
+ ! x/y/z or E/N/Z-direction would correspond to 1/2/3 = COMPONENT_FORCE_SOURCE
+ ! e.g. nu_source(:,3) here would be a source normal to the surface (z-direction).
+ accel(:,iglob) = accel(:,iglob) &
+ + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
- ! we use a force in a single direction along one of the components:
- ! x/y/z or E/N/Z-direction would correspond to 1/2/3 = COMPONENT_FORCE_SOURCE
- ! e.g. nu_source(:,3) here would be a source normal to the surface (z-direction).
- accel(:,iglob) = accel(:,iglob) &
- + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
+ else
- else
+ stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ stf_used = sngl(stf)
+ else
+ stf_used = stf
+ endif
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- stf_used = sngl(stf)
- else
- stf_used = stf
- endif
+ ! add source array
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+ accel(:,iglob) = accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
+ enddo
+ enddo
+ enddo
- ! add source array
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
- accel(:,iglob) = accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
- enddo
- enddo
- enddo
+ endif ! USE_FORCE_POINT_SOURCE
- endif ! USE_FORCE_POINT_SOURCE
+ stf_used_total = stf_used_total + stf_used
- stf_used_total = stf_used_total + stf_used
+ endif ! ispec_is_elastic
+ endif ! ispec_is_inner
+ endif ! myrank
- endif ! ispec_is_elastic
- endif ! ispec_is_inner
- endif ! myrank
-
- enddo ! NSOURCES
- endif ! GPU_MODE
+ enddo ! NSOURCES
+ endif ! GPU_MODE
endif ! forward
! NOTE: adjoint sources and backward wavefield timing:
@@ -350,10 +358,10 @@
enddo ! nrec
else ! GPU_MODE == .true.
call add_sources_sim_type_2_or_3(Mesh_pointer, adj_sourcearrays, &
- size(adj_sourcearrays), ispec_is_inner,&
- phase_is_inner, ispec_selected_rec,ibool,myrank, nrec, &
- NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC),&
- islice_selected_rec, nadj_rec_local, NTSTEP_BETWEEN_READ_ADJSRC)
+ size(adj_sourcearrays), ispec_is_inner,&
+ phase_is_inner, ispec_selected_rec,ibool,myrank, nrec, &
+ NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC),&
+ islice_selected_rec, nadj_rec_local, NTSTEP_BETWEEN_READ_ADJSRC)
endif ! GPU_MODE
endif ! it
@@ -364,84 +372,87 @@
! thus indexing is NSTEP - it , instead of NSTEP - it - 1
! adjoint simulations
- if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0) then
+ if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then
- if(GPU_MODE) then
- do isource = 1,NSOURCES
- stf_pre_compute(isource) = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- enddo
- call add_sourcearrays_adjoint_cuda(Mesh_pointer, USE_FORCE_POINT_SOURCE,&
- stf_pre_compute, NSOURCES,phase_is_inner,myrank)
- else ! .NOT. GPU_MODE
+ if(GPU_MODE) then
+ do isource = 1,NSOURCES
+ stf_pre_compute(isource) = comp_source_time_function( &
+ dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ enddo
- ! backward source reconstruction
- do isource = 1,NSOURCES
+ call compute_add_sources_el_s3_cuda(Mesh_pointer, USE_FORCE_POINT_SOURCE,&
+ stf_pre_compute, NSOURCES,phase_is_inner,myrank)
- ! add the source (only if this proc carries the source)
- if(myrank == islice_selected_source(isource)) then
+ else ! .NOT. GPU_MODE
- ispec = ispec_selected_source(isource)
+ ! backward source reconstruction
+ do isource = 1,NSOURCES
- if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+ ! add the source (only if this proc carries the source)
+ if(myrank == islice_selected_source(isource)) then
- if( ispec_is_elastic(ispec) ) then
+ ispec = ispec_selected_source(isource)
- if(USE_FORCE_POINT_SOURCE) then
+ if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
- ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
- iglob = ibool(nint(xi_source(isource)), &
- nint(eta_source(isource)), &
- nint(gamma_source(isource)), &
- ispec_selected_source(isource))
+ if( ispec_is_elastic(ispec) ) then
- f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
+ if(USE_FORCE_POINT_SOURCE) then
- !if (it == 1 .and. myrank == 0) then
- ! write(IMAIN,*) 'using a source of dominant frequency ',f0
- ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
- ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
- !endif
+ ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+ iglob = ibool(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)), &
+ ispec_selected_source(isource))
- ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
- stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),f0)
+ f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
- ! e.g. we use nu_source(:,3) here if we want a source normal to the surface.
- ! note: time step is now at NSTEP-it
- b_accel(:,iglob) = b_accel(:,iglob) &
- + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
+ !if (it == 1 .and. myrank == 0) then
+ ! write(IMAIN,*) 'using a source of dominant frequency ',f0
+ ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+ ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+ !endif
- else
+ ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
+ stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),f0)
- ! see note above: time step corresponds now to NSTEP-it
- ! (also compare to it-1 for forward simulation)
- stf = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ ! e.g. we use nu_source(:,3) here if we want a source normal to the surface.
+ ! note: time step is now at NSTEP-it
+ b_accel(:,iglob) = b_accel(:,iglob) &
+ + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- stf_used = sngl(stf)
- else
- stf_used = stf
- endif
+ else
- ! add source array
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec_selected_source(isource))
- b_accel(:,iglob) = b_accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
- enddo
- enddo
- enddo
- endif ! USE_FORCE_POINT_SOURCE
+ ! see note above: time step corresponds now to NSTEP-it
+ ! (also compare to it-1 for forward simulation)
+ stf = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- stf_used_total = stf_used_total + stf_used
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ stf_used = sngl(stf)
+ else
+ stf_used = stf
+ endif
- endif ! elastic
- endif ! phase_inner
- endif ! myrank
+ ! add source array
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec_selected_source(isource))
+ b_accel(:,iglob) = b_accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
+ enddo
+ enddo
+ enddo
+ endif ! USE_FORCE_POINT_SOURCE
- enddo ! NSOURCES
- endif ! GPU_MODE
+ stf_used_total = stf_used_total + stf_used
+
+ endif ! elastic
+ endif ! phase_inner
+ endif ! myrank
+
+ enddo ! NSOURCES
+ endif ! GPU_MODE
endif ! adjoint
! master prints out source time function to file
@@ -468,7 +479,7 @@
it,irec_master_noise, &
NSPEC_AB,NGLOB_AB)
else ! GPU_MODE == .true.
- call add_source_master_rec_noise_cuda(Mesh_pointer, myrank, it, irec_master_noise, islice_selected_rec)
+ call add_source_master_rec_noise_cu(Mesh_pointer, myrank, it, irec_master_noise, islice_selected_rec)
endif
elseif ( NOISE_TOMOGRAPHY == 2 ) then
! second step of noise tomography, i.e., read the surface movie saved at every timestep
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -81,20 +81,20 @@
num_free_surface_faces,ispec_is_acoustic)
else
! on GPU
- call acoustic_enforce_free_surface_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
+ call acoustic_enforce_free_surf_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
endif
if(PML) then
! enforces free surface on PML elements
- ! note:
- ! PML routines are not implemented as CUDA kernels, we just transfer the fields
+ ! note:
+ ! PML routines are not implemented as CUDA kernels, we just transfer the fields
! from the GPU to the CPU and vice versa
- ! transfers potentials to the CPU
- if(GPU_MODE) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
-
+ ! transfers potentials to the CPU
+ if(GPU_MODE) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
call PML_acoustic_enforce_free_srfc(NSPEC_AB,NGLOB_AB, &
potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
ibool,free_surface_ijk,free_surface_ispec, &
@@ -106,8 +106,8 @@
chi3_dot_dot,chi4_dot_dot)
! transfers potentials back to GPU
- if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
endif
! distinguishes two runs: for points on MPI interfaces, and points within the partitions
@@ -147,15 +147,16 @@
else
! on GPU
! includes code for SIMULATION_TYPE==3
- call compute_forces_acoustic_cuda(Mesh_pointer, iphase, nspec_outer_acoustic, nspec_inner_acoustic, &
- SIMULATION_TYPE)
+ call compute_forces_acoustic_cuda(Mesh_pointer, iphase, &
+ nspec_outer_acoustic, nspec_inner_acoustic, &
+ SIMULATION_TYPE)
endif
if(PML) then
! transfers potentials to CPU
- if(GPU_MODE) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ if(GPU_MODE) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
call compute_forces_acoustic_PML(NSPEC_AB,NGLOB_AB, &
ibool,ispec_is_inner,phase_is_inner, &
@@ -178,8 +179,8 @@
chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
! transfers potentials back to GPU
- if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
endif ! PML
@@ -197,9 +198,9 @@
chi1_dot,chi2_t,chi2_t_dot,chi3_dot,chi4_dot,&
chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
! transfers potentials back to GPU
- if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
-
+ if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
else
! Stacey boundary conditions
call compute_stacey_acoustic(NSPEC_AB,NGLOB_AB, &
@@ -215,7 +216,7 @@
endif
! elastic coupling
- if(ELASTIC_SIMULATION ) then
+ if(ELASTIC_SIMULATION ) then
if( .NOT. GPU_MODE ) then
call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
ibool,displ,potential_dot_dot_acoustic, &
@@ -236,9 +237,9 @@
else
! on GPU
if( num_coupling_ac_el_faces > 0 ) &
- call compute_coupling_acoustic_el_cuda(Mesh_pointer,phase_is_inner, &
+ call compute_coupling_ac_el_cuda(Mesh_pointer,phase_is_inner, &
num_coupling_ac_el_faces,SIMULATION_TYPE)
-
+
endif
endif
@@ -264,7 +265,7 @@
! assemble all the contributions between slices using MPI
if( phase_is_inner .eqv. .false. ) then
! sends potential_dot_dot_acoustic values to corresponding MPI interface neighbors (non-blocking)
- if(.NOT. GPU_MODE) then
+ if(.NOT. GPU_MODE) then
call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
@@ -273,7 +274,7 @@
request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
else
! on GPU
- call transfer_boundary_potential_from_device(NGLOB_AB, Mesh_pointer, &
+ call transfer_boun_pot_from_device(NGLOB_AB, Mesh_pointer, &
potential_dot_dot_acoustic, &
buffer_send_scalar_ext_mesh, &
num_interfaces_ext_mesh, &
@@ -281,14 +282,14 @@
nibool_interfaces_ext_mesh, &
ibool_interfaces_ext_mesh, &
1) ! <-- 1 == fwd accel
- call assemble_MPI_scalar_ext_mesh_send_cuda(NPROC, &
+ call assemble_MPI_scalar_send_cuda(NPROC, &
buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,&
my_neighbours_ext_mesh, &
- request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
endif
-
+
! adjoint simulations
if( SIMULATION_TYPE == 3 ) then
if(.NOT. GPU_MODE) then
@@ -300,7 +301,7 @@
b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
else
! on GPU
- call transfer_boundary_potential_from_device(NGLOB_AB, Mesh_pointer, &
+ call transfer_boun_pot_from_device(NGLOB_AB, Mesh_pointer, &
b_potential_dot_dot_acoustic, &
b_buffer_send_scalar_ext_mesh,&
num_interfaces_ext_mesh, &
@@ -308,21 +309,21 @@
nibool_interfaces_ext_mesh, &
ibool_interfaces_ext_mesh, &
3) ! <-- 3 == adjoint b_accel
-
- call assemble_MPI_scalar_ext_mesh_send_cuda(NPROC, &
+
+ call assemble_MPI_scalar_send_cuda(NPROC, &
b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,&
my_neighbours_ext_mesh, &
b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
-
+
endif
endif
-
+
else
-
+
! waits for send/receive requests to be completed and assembles values
- if(.NOT. GPU_MODE) then
+ if(.NOT. GPU_MODE) then
call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
max_nibool_interfaces_ext_mesh, &
@@ -330,18 +331,18 @@
request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
else
! on GPU
- call assemble_MPI_scalar_ext_mesh_write_cuda(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
+ call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
Mesh_pointer,&
buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
- 1)
+ 1)
endif
-
+
! adjoint simulations
if( SIMULATION_TYPE == 3 ) then
- if(.NOT. GPU_MODE) then
+ if(.NOT. GPU_MODE) then
call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
max_nibool_interfaces_ext_mesh, &
@@ -349,14 +350,14 @@
b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
else
! on GPU
- call assemble_MPI_scalar_ext_mesh_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, &
+ call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, &
Mesh_pointer, &
b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
b_request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
- 3)
- endif
+ 3)
+ endif
endif
endif !phase_is_inner
@@ -376,9 +377,9 @@
if(PML) then
- ! note: no need to transfer fields between CPU and GPU;
+ ! note: no need to transfer fields between CPU and GPU;
! PML arrays are all handled on the CPU
-
+
! divides local contributions with mass term
call PML_acoustic_mass_update(NSPEC_AB,NGLOB_AB,&
ispec_is_acoustic,rmass_acoustic,ibool,&
@@ -410,7 +411,7 @@
! corrector:
! updates the chi_dot term which requires chi_dot_dot(t+delta)
if( .NOT. GPU_MODE ) then
- ! corrector
+ ! corrector
potential_dot_acoustic(:) = potential_dot_acoustic(:) + deltatover2*potential_dot_dot_acoustic(:)
! adjoint simulations
@@ -418,14 +419,14 @@
b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) + b_deltatover2*b_potential_dot_dot_acoustic(:)
else
! on GPU
- call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,SIMULATION_TYPE,b_deltatover2)
+ call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,SIMULATION_TYPE,b_deltatover2)
endif
! updates potential_dot_acoustic and potential_dot_dot_acoustic inside PML region for plotting seismograms/movies
- if(PML) then
+ if(PML) then
! transfers potentials to CPU
- if(GPU_MODE) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ if(GPU_MODE) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
call PML_acoustic_update_potentials(NGLOB_AB,NSPEC_AB, &
ibool,ispec_is_acoustic, &
@@ -439,9 +440,9 @@
chi1_dot,chi2_t_dot,chi3_dot,chi4_dot,&
chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
- ! transfers potentials to GPU
- if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ ! transfers potentials to GPU
+ if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
endif
@@ -461,15 +462,15 @@
num_free_surface_faces,ispec_is_acoustic)
else
! on GPU
- call acoustic_enforce_free_surface_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
+ call acoustic_enforce_free_surf_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
endif
if(PML) then
! enforces free surface on PML elements
- if( GPU_MODE ) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
-
+ if( GPU_MODE ) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
call PML_acoustic_enforce_free_srfc(NSPEC_AB,NGLOB_AB, &
potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
ibool,free_surface_ijk,free_surface_ispec, &
@@ -481,9 +482,9 @@
chi1_dot_dot,chi2_t_dot_dot,&
chi3_dot_dot,chi4_dot_dot)
- if( GPU_MODE ) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
- endif
+ if( GPU_MODE ) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ endif
end subroutine compute_forces_acoustic
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -50,172 +50,19 @@
! elastic term
- if(USE_DEVILLE_PRODUCTS) then
- if (NGLLX == 5) then
- if(.NOT. GPU_MODE) then
- call compute_forces_elastic_Dev_5points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
+ if( .NOT. GPU_MODE ) then
+ if(USE_DEVILLE_PRODUCTS) then
+ ! uses Deville (2002) optimizations
+ call compute_forces_elastic_Dev_sim1(iphase)
- else ! GPU_MODE==.true.
- ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
- call compute_forces_elastic_cuda(Mesh_pointer, iphase, nspec_outer_elastic, &
- nspec_inner_elastic,COMPUTE_AND_STORE_STRAIN,SIMULATION_TYPE,ATTENUATION)
- endif
- else if (NGLLX == 6) then
- call compute_forces_elastic_Dev_6points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
+ ! adjoint simulations: backward/reconstructed wavefield
+ if( SIMULATION_TYPE == 3 ) &
+ call compute_forces_elastic_Dev_sim3(iphase)
- else if (NGLLX == 7) then
- call compute_forces_elastic_Dev_7points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ else
+ ! no optimizations used
+ call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- else if (NGLLX == 8) then
- call compute_forces_elastic_Dev_8points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- else if (NGLLX == 9) then
- call compute_forces_elastic_Dev_9points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- else
- call compute_forces_elastic_Dev_10points(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
- endif
- else
- call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_yy,hprime_zz, &
hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
@@ -239,177 +86,12 @@
ispec2D_moho_top,ispec2D_moho_bot, &
num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
phase_ispec_inner_elastic )
- endif
-
- ! adjoint simulations: backward/reconstructed wavefield
- ! GPU_MODE for SIM_TYPE==3 contained above in compute_forces_elastic_cuda
- if( SIMULATION_TYPE == 3 ) then
- if(USE_DEVILLE_PRODUCTS) then
- if (NGLLX == 5) then
- call compute_forces_elastic_Dev_5points(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
- else if (NGLLX == 6) then
- call compute_forces_elastic_Dev_6points(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
- else if (NGLLX == 7) then
- call compute_forces_elastic_Dev_7points(iphase, NSPEC_AB,NGLOB_AB, &
+ ! adjoint simulations: backward/reconstructed wavefield
+ if( SIMULATION_TYPE == 3 ) &
+ call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,&
b_displ,b_accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- else if (NGLLX == 8) then
- call compute_forces_elastic_Dev_8points(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- else if (NGLLX == 9) then
- call compute_forces_elastic_Dev_9points(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- else
- call compute_forces_elastic_Dev_10points(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
- endif
- else
- call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,&
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_yy,hprime_zz, &
hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
@@ -435,9 +117,17 @@
phase_ispec_inner_elastic )
endif
- endif
+ else
+ ! on GPU
+ ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
+ call compute_forces_elastic_cuda(Mesh_pointer, iphase, &
+ nspec_outer_elastic, &
+ nspec_inner_elastic, &
+ SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,ATTENUATION)
+ endif ! GPU_MODE
+
! adds elastic absorbing boundary term to acceleration (Stacey conditions)
if(ABSORBING_CONDITIONS) &
call compute_stacey_elastic(NSPEC_AB,NGLOB_AB,accel, &
@@ -452,8 +142,8 @@
GPU_MODE,Mesh_pointer)
! acoustic coupling
- if( ACOUSTIC_SIMULATION ) then
- if( .NOT. GPU_MODE ) then
+ if( ACOUSTIC_SIMULATION ) then
+ if( .NOT. GPU_MODE ) then
call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
ibool,accel,potential_dot_dot_acoustic, &
num_coupling_ac_el_faces, &
@@ -474,9 +164,9 @@
else
! on GPU
if( num_coupling_ac_el_faces > 0 ) &
- call compute_coupling_elastic_ac_cuda(Mesh_pointer,phase_is_inner, &
+ call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, &
num_coupling_ac_el_faces,SIMULATION_TYPE)
-
+
endif
endif
@@ -495,8 +185,9 @@
ispec_is_elastic,SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
nrec,islice_selected_rec,ispec_selected_rec, &
nadj_rec_local,adj_sourcearrays,b_accel, &
- NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY,GPU_MODE, Mesh_pointer )
-
+ NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, &
+ GPU_MODE, Mesh_pointer )
+
! assemble all the contributions between slices using MPI
if( phase_is_inner .eqv. .false. ) then
! sends accel values to corresponding MPI interface neighbors
@@ -508,18 +199,18 @@
my_neighbours_ext_mesh, &
request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
else ! GPU_MODE==1
- call transfer_boundary_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, accel,&
- buffer_send_vector_ext_mesh,&
- num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
- nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,1) ! <-- 1 == fwd accel
- call assemble_MPI_vector_ext_mesh_send_cuda(NPROC, &
+ call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, accel,&
+ buffer_send_vector_ext_mesh,&
+ num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
+ nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,1) ! <-- 1 == fwd accel
+ call assemble_MPI_vector_send_cuda(NPROC, &
buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,&
my_neighbours_ext_mesh, &
request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
endif ! GPU_MODE
-
+
! adjoint simulations
if( SIMULATION_TYPE == 3 ) then
if(.NOT. GPU_MODE) then
@@ -530,17 +221,17 @@
my_neighbours_ext_mesh, &
b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
else ! GPU_MODE == 1
- call transfer_boundary_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
- b_buffer_send_vector_ext_mesh,&
- num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
- nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
- call assemble_MPI_vector_ext_mesh_send_cuda(NPROC, &
+ call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
+ b_buffer_send_vector_ext_mesh,&
+ num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
+ nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
+ call assemble_MPI_vector_send_cuda(NPROC, &
b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,&
my_neighbours_ext_mesh, &
b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
-
+
endif ! GPU
endif !adjoint
@@ -548,31 +239,31 @@
! waits for send/receive requests to be completed and assembles values
if(.NOT. GPU_MODE) then
call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_AB,accel, &
- buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
- max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+ buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
else ! GPU_MODE == 1
- call assemble_MPI_vector_ext_mesh_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
- buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
- max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
+ call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
+ buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
endif
! adjoint simulations
if( SIMULATION_TYPE == 3 ) then
if(.NOT. GPU_MODE) then
call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_accel, &
- b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
- max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
+ b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
else ! GPU_MODE == 1
- call assemble_MPI_vector_ext_mesh_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
- b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
- max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
+ call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
+ b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
endif
endif !adjoint
@@ -585,7 +276,7 @@
!! DK DK May 2009: For adjoint runs below (SIMULATION_TYPE == 3) they should be used as well.
enddo
-
+
! multiplies with inverse of mass matrix (note: rmass has been inverted already)
if(.NOT. GPU_MODE) then
accel(1,:) = accel(1,:)*rmass(:)
@@ -597,7 +288,7 @@
b_accel(2,:) = b_accel(2,:)*rmass(:)
b_accel(3,:) = b_accel(3,:)*rmass(:)
endif !adjoint
- else ! GPU_MODE == 1
+ else ! GPU_MODE == 1
call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS)
endif
@@ -636,8 +327,8 @@
veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
! adjoint simulations
if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
- else ! GPU_MODE == 1
- if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2)
+ else ! GPU_MODE == 1
+ if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2)
endif
@@ -746,3 +437,385 @@
end subroutine elastic_ocean_load
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! distributes routines according to chosen NGLLX in constants.h
+
+!daniel
+! note:
+! i put it here rather than in compute_forces_elastic_Dev.f90 because compiler complains that:
+! " The storage extent of the dummy argument exceeds that of the actual argument. "
+
+subroutine compute_forces_elastic_Dev_sim1(iphase)
+
+! forward simulations
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+
+ implicit none
+
+ integer,intent(in) :: iphase
+
+ select case(NGLLX)
+
+ case (5)
+ call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (6)
+ call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (7)
+ call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (8)
+ call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (9)
+ call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (10)
+ call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case default
+
+ stop 'error no Deville routine available for chosen NGLLX'
+
+ end select
+
+end subroutine compute_forces_elastic_Dev_sim1
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+subroutine compute_forces_elastic_Dev_sim3(iphase)
+
+! uses backward/reconstructed displacement and acceleration arrays
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+
+ implicit none
+
+ integer,intent(in) :: iphase
+
+ select case(NGLLX)
+
+ case (5)
+ call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (6)
+ call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (7)
+ call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (8)
+ call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (9)
+ call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (10)
+ call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case default
+
+ stop 'error no Deville routine available for chosen NGLLX'
+
+ end select
+
+
+end subroutine compute_forces_elastic_Dev_sim3
+
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -25,7 +25,8 @@
!=====================================================================
-subroutine compute_forces_elastic_Dev_5points( iphase ,NSPEC_AB,NGLOB_AB, &
+
+subroutine compute_forces_elastic_Dev_5p( iphase ,NSPEC_AB,NGLOB_AB, &
displ,accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_xxT, &
@@ -72,7 +73,8 @@
kappastore,mustore,jacobian
! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(NGLLX,5) :: hprime_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(5,NGLLX) :: hprime_xxT,hprimewgll_xx
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -117,15 +119,15 @@
integer :: ispec2D_moho_top, ispec2D_moho_bot
! local parameters
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+ real(kind=CUSTOM_REAL), dimension(5,5,5) :: dummyx_loc,dummyy_loc,dummyz_loc, &
newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+ real(kind=CUSTOM_REAL), dimension(5,5,5) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
! manually inline the calls to the Deville et al. (2002) routines
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_5points,E2_m1_m2_5points,E3_m1_m2_5points
+ real(kind=CUSTOM_REAL), dimension(5,25) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
+ real(kind=CUSTOM_REAL), dimension(5,25) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
+ real(kind=CUSTOM_REAL), dimension(5,25) :: E1_m1_m2_5points,E2_m1_m2_5points,E3_m1_m2_5points
equivalence(dummyx_loc,B1_m1_m2_5points)
equivalence(dummyy_loc,B2_m1_m2_5points)
@@ -137,11 +139,11 @@
equivalence(newtempy1,E2_m1_m2_5points)
equivalence(newtempz1,E3_m1_m2_5points)
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+ real(kind=CUSTOM_REAL), dimension(25,5) :: &
A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(25,5) :: &
C1_mxm_m2_m1_5points,C2_mxm_m2_m1_5points,C3_mxm_m2_m1_5points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(25,5) :: &
E1_mxm_m2_m1_5points,E2_mxm_m2_m1_5points,E3_mxm_m2_m1_5points
equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
@@ -649,35 +651,13 @@
enddo ! spectral element loop
-end subroutine compute_forces_elastic_Dev_5points
-!=====================================================================
+end subroutine compute_forces_elastic_Dev_5p
+
!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
!=====================================================================
+!
-
-subroutine compute_forces_elastic_Dev_6points( iphase ,NSPEC_AB,NGLOB_AB, &
+subroutine compute_forces_elastic_Dev_6p( iphase ,NSPEC_AB,NGLOB_AB, &
displ,accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_xxT, &
@@ -724,7 +704,8 @@
kappastore,mustore,jacobian
! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(NGLLX,6) :: hprime_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(6,NGLLX) :: hprime_xxT,hprimewgll_xx
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -769,15 +750,15 @@
integer :: ispec2D_moho_top, ispec2D_moho_bot
! local parameters
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+ real(kind=CUSTOM_REAL), dimension(6,6,6) :: dummyx_loc,dummyy_loc,dummyz_loc, &
newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+ real(kind=CUSTOM_REAL), dimension(6,6,6) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
! manually inline the calls to the Deville et al. (2002) routines
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_6points,B2_m1_m2_6points,B3_m1_m2_6points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_6points,C2_m1_m2_6points,C3_m1_m2_6points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_6points,E2_m1_m2_6points,E3_m1_m2_6points
+ real(kind=CUSTOM_REAL), dimension(6,36) :: B1_m1_m2_6points,B2_m1_m2_6points,B3_m1_m2_6points
+ real(kind=CUSTOM_REAL), dimension(6,36) :: C1_m1_m2_6points,C2_m1_m2_6points,C3_m1_m2_6points
+ real(kind=CUSTOM_REAL), dimension(6,36) :: E1_m1_m2_6points,E2_m1_m2_6points,E3_m1_m2_6points
equivalence(dummyx_loc,B1_m1_m2_6points)
equivalence(dummyy_loc,B2_m1_m2_6points)
@@ -789,11 +770,11 @@
equivalence(newtempy1,E2_m1_m2_6points)
equivalence(newtempz1,E3_m1_m2_6points)
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+ real(kind=CUSTOM_REAL), dimension(36,6) :: &
A1_mxm_m2_m1_6points,A2_mxm_m2_m1_6points,A3_mxm_m2_m1_6points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(36,6) :: &
C1_mxm_m2_m1_6points,C2_mxm_m2_m1_6points,C3_mxm_m2_m1_6points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(36,6) :: &
E1_mxm_m2_m1_6points,E2_mxm_m2_m1_6points,E3_mxm_m2_m1_6points
equivalence(dummyx_loc,A1_mxm_m2_m1_6points)
@@ -881,19 +862,19 @@
hprime_xx(i,3)*B1_m1_m2_6points(3,j) + &
hprime_xx(i,4)*B1_m1_m2_6points(4,j) + &
hprime_xx(i,5)*B1_m1_m2_6points(5,j) + &
- hprime_xx(i,6)*B1_m1_m2_6points(6,j)
+ hprime_xx(i,6)*B1_m1_m2_6points(6,j)
C2_m1_m2_6points(i,j) = hprime_xx(i,1)*B2_m1_m2_6points(1,j) + &
hprime_xx(i,2)*B2_m1_m2_6points(2,j) + &
hprime_xx(i,3)*B2_m1_m2_6points(3,j) + &
hprime_xx(i,4)*B2_m1_m2_6points(4,j) + &
hprime_xx(i,5)*B2_m1_m2_6points(5,j) + &
- hprime_xx(i,6)*B2_m1_m2_6points(6,j)
+ hprime_xx(i,6)*B2_m1_m2_6points(6,j)
C3_m1_m2_6points(i,j) = hprime_xx(i,1)*B3_m1_m2_6points(1,j) + &
hprime_xx(i,2)*B3_m1_m2_6points(2,j) + &
hprime_xx(i,3)*B3_m1_m2_6points(3,j) + &
hprime_xx(i,4)*B3_m1_m2_6points(4,j) + &
hprime_xx(i,5)*B3_m1_m2_6points(5,j) + &
- hprime_xx(i,6)*B3_m1_m2_6points(6,j)
+ hprime_xx(i,6)*B3_m1_m2_6points(6,j)
enddo
enddo
@@ -908,19 +889,19 @@
dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
- dummyx_loc(i,6,k)*hprime_xxT(6,j)
+ dummyx_loc(i,6,k)*hprime_xxT(6,j)
tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
- dummyy_loc(i,6,k)*hprime_xxT(6,j)
+ dummyy_loc(i,6,k)*hprime_xxT(6,j)
tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
- dummyz_loc(i,6,k)*hprime_xxT(6,j)
+ dummyz_loc(i,6,k)*hprime_xxT(6,j)
enddo
enddo
enddo
@@ -933,19 +914,19 @@
A1_mxm_m2_m1_6points(i,3)*hprime_xxT(3,j) + &
A1_mxm_m2_m1_6points(i,4)*hprime_xxT(4,j) + &
A1_mxm_m2_m1_6points(i,5)*hprime_xxT(5,j) + &
- A1_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
+ A1_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
C2_mxm_m2_m1_6points(i,j) = A2_mxm_m2_m1_6points(i,1)*hprime_xxT(1,j) + &
A2_mxm_m2_m1_6points(i,2)*hprime_xxT(2,j) + &
A2_mxm_m2_m1_6points(i,3)*hprime_xxT(3,j) + &
A2_mxm_m2_m1_6points(i,4)*hprime_xxT(4,j) + &
A2_mxm_m2_m1_6points(i,5)*hprime_xxT(5,j) + &
- A2_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
+ A2_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
C3_mxm_m2_m1_6points(i,j) = A3_mxm_m2_m1_6points(i,1)*hprime_xxT(1,j) + &
A3_mxm_m2_m1_6points(i,2)*hprime_xxT(2,j) + &
A3_mxm_m2_m1_6points(i,3)*hprime_xxT(3,j) + &
A3_mxm_m2_m1_6points(i,4)*hprime_xxT(4,j) + &
A3_mxm_m2_m1_6points(i,5)*hprime_xxT(5,j) + &
- A3_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
+ A3_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
enddo
enddo
@@ -1176,19 +1157,19 @@
hprimewgll_xxT(i,3)*C1_m1_m2_6points(3,j) + &
hprimewgll_xxT(i,4)*C1_m1_m2_6points(4,j) + &
hprimewgll_xxT(i,5)*C1_m1_m2_6points(5,j) + &
- hprimewgll_xxT(i,6)*C1_m1_m2_6points(6,j)
+ hprimewgll_xxT(i,6)*C1_m1_m2_6points(6,j)
E2_m1_m2_6points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_6points(1,j) + &
hprimewgll_xxT(i,2)*C2_m1_m2_6points(2,j) + &
hprimewgll_xxT(i,3)*C2_m1_m2_6points(3,j) + &
hprimewgll_xxT(i,4)*C2_m1_m2_6points(4,j) + &
hprimewgll_xxT(i,5)*C2_m1_m2_6points(5,j) + &
- hprimewgll_xxT(i,6)*C2_m1_m2_6points(6,j)
+ hprimewgll_xxT(i,6)*C2_m1_m2_6points(6,j)
E3_m1_m2_6points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_6points(1,j) + &
hprimewgll_xxT(i,2)*C3_m1_m2_6points(2,j) + &
hprimewgll_xxT(i,3)*C3_m1_m2_6points(3,j) + &
hprimewgll_xxT(i,4)*C3_m1_m2_6points(4,j) + &
hprimewgll_xxT(i,5)*C3_m1_m2_6points(5,j) + &
- hprimewgll_xxT(i,6)*C3_m1_m2_6points(6,j)
+ hprimewgll_xxT(i,6)*C3_m1_m2_6points(6,j)
enddo
enddo
@@ -1203,19 +1184,19 @@
tempx2(i,3,k)*hprimewgll_xx(3,j) + &
tempx2(i,4,k)*hprimewgll_xx(4,j) + &
tempx2(i,5,k)*hprimewgll_xx(5,j) + &
- tempx2(i,6,k)*hprimewgll_xx(6,j)
+ tempx2(i,6,k)*hprimewgll_xx(6,j)
newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
tempy2(i,2,k)*hprimewgll_xx(2,j) + &
tempy2(i,3,k)*hprimewgll_xx(3,j) + &
tempy2(i,4,k)*hprimewgll_xx(4,j) + &
tempy2(i,5,k)*hprimewgll_xx(5,j) + &
- tempy2(i,6,k)*hprimewgll_xx(6,j)
+ tempy2(i,6,k)*hprimewgll_xx(6,j)
newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
tempz2(i,2,k)*hprimewgll_xx(2,j) + &
tempz2(i,3,k)*hprimewgll_xx(3,j) + &
tempz2(i,4,k)*hprimewgll_xx(4,j) + &
tempz2(i,5,k)*hprimewgll_xx(5,j) + &
- tempz2(i,6,k)*hprimewgll_xx(6,j)
+ tempz2(i,6,k)*hprimewgll_xx(6,j)
enddo
enddo
enddo
@@ -1228,19 +1209,19 @@
C1_mxm_m2_m1_6points(i,3)*hprimewgll_xx(3,j) + &
C1_mxm_m2_m1_6points(i,4)*hprimewgll_xx(4,j) + &
C1_mxm_m2_m1_6points(i,5)*hprimewgll_xx(5,j) + &
- C1_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
+ C1_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
E2_mxm_m2_m1_6points(i,j) = C2_mxm_m2_m1_6points(i,1)*hprimewgll_xx(1,j) + &
C2_mxm_m2_m1_6points(i,2)*hprimewgll_xx(2,j) + &
C2_mxm_m2_m1_6points(i,3)*hprimewgll_xx(3,j) + &
C2_mxm_m2_m1_6points(i,4)*hprimewgll_xx(4,j) + &
C2_mxm_m2_m1_6points(i,5)*hprimewgll_xx(5,j) + &
- C2_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
+ C2_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
E3_mxm_m2_m1_6points(i,j) = C3_mxm_m2_m1_6points(i,1)*hprimewgll_xx(1,j) + &
C3_mxm_m2_m1_6points(i,2)*hprimewgll_xx(2,j) + &
C3_mxm_m2_m1_6points(i,3)*hprimewgll_xx(3,j) + &
C3_mxm_m2_m1_6points(i,4)*hprimewgll_xx(4,j) + &
C3_mxm_m2_m1_6points(i,5)*hprimewgll_xx(5,j) + &
- C3_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
+ C3_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
enddo
enddo
@@ -1319,35 +1300,13 @@
enddo ! spectral element loop
-end subroutine compute_forces_elastic_Dev_6points
-!=====================================================================
+end subroutine compute_forces_elastic_Dev_6p
+
!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
!=====================================================================
+!
-
-subroutine compute_forces_elastic_Dev_7points( iphase ,NSPEC_AB,NGLOB_AB, &
+subroutine compute_forces_elastic_Dev_7p( iphase ,NSPEC_AB,NGLOB_AB, &
displ,accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_xxT, &
@@ -1394,7 +1353,8 @@
kappastore,mustore,jacobian
! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(NGLLX,7) :: hprime_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(7,NGLLX) :: hprime_xxT,hprimewgll_xx
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -1439,15 +1399,15 @@
integer :: ispec2D_moho_top, ispec2D_moho_bot
! local parameters
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+ real(kind=CUSTOM_REAL), dimension(7,7,7) :: dummyx_loc,dummyy_loc,dummyz_loc, &
newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+ real(kind=CUSTOM_REAL), dimension(7,7,7) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
! manually inline the calls to the Deville et al. (2002) routines
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_7points,B2_m1_m2_7points,B3_m1_m2_7points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_7points,C2_m1_m2_7points,C3_m1_m2_7points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_7points,E2_m1_m2_7points,E3_m1_m2_7points
+ real(kind=CUSTOM_REAL), dimension(7,49) :: B1_m1_m2_7points,B2_m1_m2_7points,B3_m1_m2_7points
+ real(kind=CUSTOM_REAL), dimension(7,49) :: C1_m1_m2_7points,C2_m1_m2_7points,C3_m1_m2_7points
+ real(kind=CUSTOM_REAL), dimension(7,49) :: E1_m1_m2_7points,E2_m1_m2_7points,E3_m1_m2_7points
equivalence(dummyx_loc,B1_m1_m2_7points)
equivalence(dummyy_loc,B2_m1_m2_7points)
@@ -1459,11 +1419,11 @@
equivalence(newtempy1,E2_m1_m2_7points)
equivalence(newtempz1,E3_m1_m2_7points)
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+ real(kind=CUSTOM_REAL), dimension(49,7) :: &
A1_mxm_m2_m1_7points,A2_mxm_m2_m1_7points,A3_mxm_m2_m1_7points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(49,7) :: &
C1_mxm_m2_m1_7points,C2_mxm_m2_m1_7points,C3_mxm_m2_m1_7points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(49,7) :: &
E1_mxm_m2_m1_7points,E2_mxm_m2_m1_7points,E3_mxm_m2_m1_7points
equivalence(dummyx_loc,A1_mxm_m2_m1_7points)
@@ -1552,21 +1512,21 @@
hprime_xx(i,4)*B1_m1_m2_7points(4,j) + &
hprime_xx(i,5)*B1_m1_m2_7points(5,j) + &
hprime_xx(i,6)*B1_m1_m2_7points(6,j) + &
- hprime_xx(i,7)*B1_m1_m2_7points(7,j)
+ hprime_xx(i,7)*B1_m1_m2_7points(7,j)
C2_m1_m2_7points(i,j) = hprime_xx(i,1)*B2_m1_m2_7points(1,j) + &
hprime_xx(i,2)*B2_m1_m2_7points(2,j) + &
hprime_xx(i,3)*B2_m1_m2_7points(3,j) + &
hprime_xx(i,4)*B2_m1_m2_7points(4,j) + &
hprime_xx(i,5)*B2_m1_m2_7points(5,j) + &
hprime_xx(i,6)*B2_m1_m2_7points(6,j) + &
- hprime_xx(i,7)*B2_m1_m2_7points(7,j)
+ hprime_xx(i,7)*B2_m1_m2_7points(7,j)
C3_m1_m2_7points(i,j) = hprime_xx(i,1)*B3_m1_m2_7points(1,j) + &
hprime_xx(i,2)*B3_m1_m2_7points(2,j) + &
hprime_xx(i,3)*B3_m1_m2_7points(3,j) + &
hprime_xx(i,4)*B3_m1_m2_7points(4,j) + &
hprime_xx(i,5)*B3_m1_m2_7points(5,j) + &
hprime_xx(i,6)*B3_m1_m2_7points(6,j) + &
- hprime_xx(i,7)*B3_m1_m2_7points(7,j)
+ hprime_xx(i,7)*B3_m1_m2_7points(7,j)
enddo
enddo
@@ -1582,21 +1542,21 @@
dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
dummyx_loc(i,6,k)*hprime_xxT(6,j) + &
- dummyx_loc(i,7,k)*hprime_xxT(7,j)
+ dummyx_loc(i,7,k)*hprime_xxT(7,j)
tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
dummyy_loc(i,6,k)*hprime_xxT(6,j) + &
- dummyy_loc(i,7,k)*hprime_xxT(7,j)
+ dummyy_loc(i,7,k)*hprime_xxT(7,j)
tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
dummyz_loc(i,6,k)*hprime_xxT(6,j) + &
- dummyz_loc(i,7,k)*hprime_xxT(7,j)
+ dummyz_loc(i,7,k)*hprime_xxT(7,j)
enddo
enddo
enddo
@@ -1610,21 +1570,21 @@
A1_mxm_m2_m1_7points(i,4)*hprime_xxT(4,j) + &
A1_mxm_m2_m1_7points(i,5)*hprime_xxT(5,j) + &
A1_mxm_m2_m1_7points(i,6)*hprime_xxT(6,j) + &
- A1_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
+ A1_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
C2_mxm_m2_m1_7points(i,j) = A2_mxm_m2_m1_7points(i,1)*hprime_xxT(1,j) + &
A2_mxm_m2_m1_7points(i,2)*hprime_xxT(2,j) + &
A2_mxm_m2_m1_7points(i,3)*hprime_xxT(3,j) + &
A2_mxm_m2_m1_7points(i,4)*hprime_xxT(4,j) + &
A2_mxm_m2_m1_7points(i,5)*hprime_xxT(5,j) + &
A2_mxm_m2_m1_7points(i,6)*hprime_xxT(6,j) + &
- A2_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
+ A2_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
C3_mxm_m2_m1_7points(i,j) = A3_mxm_m2_m1_7points(i,1)*hprime_xxT(1,j) + &
A3_mxm_m2_m1_7points(i,2)*hprime_xxT(2,j) + &
A3_mxm_m2_m1_7points(i,3)*hprime_xxT(3,j) + &
A3_mxm_m2_m1_7points(i,4)*hprime_xxT(4,j) + &
A3_mxm_m2_m1_7points(i,5)*hprime_xxT(5,j) + &
A3_mxm_m2_m1_7points(i,6)*hprime_xxT(6,j) + &
- A3_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
+ A3_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
enddo
enddo
@@ -1856,21 +1816,21 @@
hprimewgll_xxT(i,4)*C1_m1_m2_7points(4,j) + &
hprimewgll_xxT(i,5)*C1_m1_m2_7points(5,j) + &
hprimewgll_xxT(i,6)*C1_m1_m2_7points(6,j) + &
- hprimewgll_xxT(i,7)*C1_m1_m2_7points(7,j)
+ hprimewgll_xxT(i,7)*C1_m1_m2_7points(7,j)
E2_m1_m2_7points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_7points(1,j) + &
hprimewgll_xxT(i,2)*C2_m1_m2_7points(2,j) + &
hprimewgll_xxT(i,3)*C2_m1_m2_7points(3,j) + &
hprimewgll_xxT(i,4)*C2_m1_m2_7points(4,j) + &
hprimewgll_xxT(i,5)*C2_m1_m2_7points(5,j) + &
hprimewgll_xxT(i,6)*C2_m1_m2_7points(6,j) + &
- hprimewgll_xxT(i,7)*C2_m1_m2_7points(7,j)
+ hprimewgll_xxT(i,7)*C2_m1_m2_7points(7,j)
E3_m1_m2_7points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_7points(1,j) + &
hprimewgll_xxT(i,2)*C3_m1_m2_7points(2,j) + &
hprimewgll_xxT(i,3)*C3_m1_m2_7points(3,j) + &
hprimewgll_xxT(i,4)*C3_m1_m2_7points(4,j) + &
hprimewgll_xxT(i,5)*C3_m1_m2_7points(5,j) + &
hprimewgll_xxT(i,6)*C3_m1_m2_7points(6,j) + &
- hprimewgll_xxT(i,7)*C3_m1_m2_7points(7,j)
+ hprimewgll_xxT(i,7)*C3_m1_m2_7points(7,j)
enddo
enddo
@@ -1886,21 +1846,21 @@
tempx2(i,4,k)*hprimewgll_xx(4,j) + &
tempx2(i,5,k)*hprimewgll_xx(5,j) + &
tempx2(i,6,k)*hprimewgll_xx(6,j) + &
- tempx2(i,7,k)*hprimewgll_xx(7,j)
+ tempx2(i,7,k)*hprimewgll_xx(7,j)
newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
tempy2(i,2,k)*hprimewgll_xx(2,j) + &
tempy2(i,3,k)*hprimewgll_xx(3,j) + &
tempy2(i,4,k)*hprimewgll_xx(4,j) + &
tempy2(i,5,k)*hprimewgll_xx(5,j) + &
tempy2(i,6,k)*hprimewgll_xx(6,j) + &
- tempy2(i,7,k)*hprimewgll_xx(7,j)
+ tempy2(i,7,k)*hprimewgll_xx(7,j)
newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
tempz2(i,2,k)*hprimewgll_xx(2,j) + &
tempz2(i,3,k)*hprimewgll_xx(3,j) + &
tempz2(i,4,k)*hprimewgll_xx(4,j) + &
tempz2(i,5,k)*hprimewgll_xx(5,j) + &
tempz2(i,6,k)*hprimewgll_xx(6,j) + &
- tempz2(i,7,k)*hprimewgll_xx(7,j)
+ tempz2(i,7,k)*hprimewgll_xx(7,j)
enddo
enddo
enddo
@@ -1914,21 +1874,21 @@
C1_mxm_m2_m1_7points(i,4)*hprimewgll_xx(4,j) + &
C1_mxm_m2_m1_7points(i,5)*hprimewgll_xx(5,j) + &
C1_mxm_m2_m1_7points(i,6)*hprimewgll_xx(6,j) + &
- C1_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
+ C1_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
E2_mxm_m2_m1_7points(i,j) = C2_mxm_m2_m1_7points(i,1)*hprimewgll_xx(1,j) + &
C2_mxm_m2_m1_7points(i,2)*hprimewgll_xx(2,j) + &
C2_mxm_m2_m1_7points(i,3)*hprimewgll_xx(3,j) + &
C2_mxm_m2_m1_7points(i,4)*hprimewgll_xx(4,j) + &
C2_mxm_m2_m1_7points(i,5)*hprimewgll_xx(5,j) + &
C2_mxm_m2_m1_7points(i,6)*hprimewgll_xx(6,j) + &
- C2_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
+ C2_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
E3_mxm_m2_m1_7points(i,j) = C3_mxm_m2_m1_7points(i,1)*hprimewgll_xx(1,j) + &
C3_mxm_m2_m1_7points(i,2)*hprimewgll_xx(2,j) + &
C3_mxm_m2_m1_7points(i,3)*hprimewgll_xx(3,j) + &
C3_mxm_m2_m1_7points(i,4)*hprimewgll_xx(4,j) + &
C3_mxm_m2_m1_7points(i,5)*hprimewgll_xx(5,j) + &
C3_mxm_m2_m1_7points(i,6)*hprimewgll_xx(6,j) + &
- C3_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
+ C3_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
enddo
enddo
@@ -2007,35 +1967,13 @@
enddo ! spectral element loop
-end subroutine compute_forces_elastic_Dev_7points
-!=====================================================================
+end subroutine compute_forces_elastic_Dev_7p
+
!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
!=====================================================================
+!
-
-subroutine compute_forces_elastic_Dev_8points( iphase ,NSPEC_AB,NGLOB_AB, &
+subroutine compute_forces_elastic_Dev_8p( iphase ,NSPEC_AB,NGLOB_AB, &
displ,accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_xxT, &
@@ -2082,7 +2020,8 @@
kappastore,mustore,jacobian
! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(NGLLX,8) :: hprime_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(8,NGLLX) :: hprime_xxT,hprimewgll_xx
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -2127,15 +2066,15 @@
integer :: ispec2D_moho_top, ispec2D_moho_bot
! local parameters
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+ real(kind=CUSTOM_REAL), dimension(8,8,8) :: dummyx_loc,dummyy_loc,dummyz_loc, &
newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+ real(kind=CUSTOM_REAL), dimension(8,8,8) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
! manually inline the calls to the Deville et al. (2002) routines
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_8points,B2_m1_m2_8points,B3_m1_m2_8points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_8points,C2_m1_m2_8points,C3_m1_m2_8points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_8points,E2_m1_m2_8points,E3_m1_m2_8points
+ real(kind=CUSTOM_REAL), dimension(8,64) :: B1_m1_m2_8points,B2_m1_m2_8points,B3_m1_m2_8points
+ real(kind=CUSTOM_REAL), dimension(8,64) :: C1_m1_m2_8points,C2_m1_m2_8points,C3_m1_m2_8points
+ real(kind=CUSTOM_REAL), dimension(8,64) :: E1_m1_m2_8points,E2_m1_m2_8points,E3_m1_m2_8points
equivalence(dummyx_loc,B1_m1_m2_8points)
equivalence(dummyy_loc,B2_m1_m2_8points)
@@ -2147,11 +2086,11 @@
equivalence(newtempy1,E2_m1_m2_8points)
equivalence(newtempz1,E3_m1_m2_8points)
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+ real(kind=CUSTOM_REAL), dimension(64,8) :: &
A1_mxm_m2_m1_8points,A2_mxm_m2_m1_8points,A3_mxm_m2_m1_8points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(64,8) :: &
C1_mxm_m2_m1_8points,C2_mxm_m2_m1_8points,C3_mxm_m2_m1_8points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(64,8) :: &
E1_mxm_m2_m1_8points,E2_mxm_m2_m1_8points,E3_mxm_m2_m1_8points
equivalence(dummyx_loc,A1_mxm_m2_m1_8points)
@@ -2241,7 +2180,7 @@
hprime_xx(i,5)*B1_m1_m2_8points(5,j) + &
hprime_xx(i,6)*B1_m1_m2_8points(6,j) + &
hprime_xx(i,7)*B1_m1_m2_8points(7,j) + &
- hprime_xx(i,8)*B1_m1_m2_8points(8,j)
+ hprime_xx(i,8)*B1_m1_m2_8points(8,j)
C2_m1_m2_8points(i,j) = hprime_xx(i,1)*B2_m1_m2_8points(1,j) + &
hprime_xx(i,2)*B2_m1_m2_8points(2,j) + &
hprime_xx(i,3)*B2_m1_m2_8points(3,j) + &
@@ -2249,7 +2188,7 @@
hprime_xx(i,5)*B2_m1_m2_8points(5,j) + &
hprime_xx(i,6)*B2_m1_m2_8points(6,j) + &
hprime_xx(i,7)*B2_m1_m2_8points(7,j) + &
- hprime_xx(i,8)*B2_m1_m2_8points(8,j)
+ hprime_xx(i,8)*B2_m1_m2_8points(8,j)
C3_m1_m2_8points(i,j) = hprime_xx(i,1)*B3_m1_m2_8points(1,j) + &
hprime_xx(i,2)*B3_m1_m2_8points(2,j) + &
hprime_xx(i,3)*B3_m1_m2_8points(3,j) + &
@@ -2257,7 +2196,7 @@
hprime_xx(i,5)*B3_m1_m2_8points(5,j) + &
hprime_xx(i,6)*B3_m1_m2_8points(6,j) + &
hprime_xx(i,7)*B3_m1_m2_8points(7,j) + &
- hprime_xx(i,8)*B3_m1_m2_8points(8,j)
+ hprime_xx(i,8)*B3_m1_m2_8points(8,j)
enddo
enddo
@@ -2274,7 +2213,7 @@
dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
dummyx_loc(i,6,k)*hprime_xxT(6,j) + &
dummyx_loc(i,7,k)*hprime_xxT(7,j) + &
- dummyx_loc(i,8,k)*hprime_xxT(8,j)
+ dummyx_loc(i,8,k)*hprime_xxT(8,j)
tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
@@ -2282,7 +2221,7 @@
dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
dummyy_loc(i,6,k)*hprime_xxT(6,j) + &
dummyy_loc(i,7,k)*hprime_xxT(7,j) + &
- dummyy_loc(i,8,k)*hprime_xxT(8,j)
+ dummyy_loc(i,8,k)*hprime_xxT(8,j)
tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
@@ -2290,7 +2229,7 @@
dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
dummyz_loc(i,6,k)*hprime_xxT(6,j) + &
dummyz_loc(i,7,k)*hprime_xxT(7,j) + &
- dummyz_loc(i,8,k)*hprime_xxT(8,j)
+ dummyz_loc(i,8,k)*hprime_xxT(8,j)
enddo
enddo
enddo
@@ -2305,7 +2244,7 @@
A1_mxm_m2_m1_8points(i,5)*hprime_xxT(5,j) + &
A1_mxm_m2_m1_8points(i,6)*hprime_xxT(6,j) + &
A1_mxm_m2_m1_8points(i,7)*hprime_xxT(7,j) + &
- A1_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
+ A1_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
C2_mxm_m2_m1_8points(i,j) = A2_mxm_m2_m1_8points(i,1)*hprime_xxT(1,j) + &
A2_mxm_m2_m1_8points(i,2)*hprime_xxT(2,j) + &
A2_mxm_m2_m1_8points(i,3)*hprime_xxT(3,j) + &
@@ -2313,7 +2252,7 @@
A2_mxm_m2_m1_8points(i,5)*hprime_xxT(5,j) + &
A2_mxm_m2_m1_8points(i,6)*hprime_xxT(6,j) + &
A2_mxm_m2_m1_8points(i,7)*hprime_xxT(7,j) + &
- A2_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
+ A2_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
C3_mxm_m2_m1_8points(i,j) = A3_mxm_m2_m1_8points(i,1)*hprime_xxT(1,j) + &
A3_mxm_m2_m1_8points(i,2)*hprime_xxT(2,j) + &
A3_mxm_m2_m1_8points(i,3)*hprime_xxT(3,j) + &
@@ -2321,7 +2260,7 @@
A3_mxm_m2_m1_8points(i,5)*hprime_xxT(5,j) + &
A3_mxm_m2_m1_8points(i,6)*hprime_xxT(6,j) + &
A3_mxm_m2_m1_8points(i,7)*hprime_xxT(7,j) + &
- A3_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
+ A3_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
enddo
enddo
@@ -2554,7 +2493,7 @@
hprimewgll_xxT(i,5)*C1_m1_m2_8points(5,j) + &
hprimewgll_xxT(i,6)*C1_m1_m2_8points(6,j) + &
hprimewgll_xxT(i,7)*C1_m1_m2_8points(7,j) + &
- hprimewgll_xxT(i,8)*C1_m1_m2_8points(8,j)
+ hprimewgll_xxT(i,8)*C1_m1_m2_8points(8,j)
E2_m1_m2_8points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_8points(1,j) + &
hprimewgll_xxT(i,2)*C2_m1_m2_8points(2,j) + &
hprimewgll_xxT(i,3)*C2_m1_m2_8points(3,j) + &
@@ -2562,7 +2501,7 @@
hprimewgll_xxT(i,5)*C2_m1_m2_8points(5,j) + &
hprimewgll_xxT(i,6)*C2_m1_m2_8points(6,j) + &
hprimewgll_xxT(i,7)*C2_m1_m2_8points(7,j) + &
- hprimewgll_xxT(i,8)*C2_m1_m2_8points(8,j)
+ hprimewgll_xxT(i,8)*C2_m1_m2_8points(8,j)
E3_m1_m2_8points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_8points(1,j) + &
hprimewgll_xxT(i,2)*C3_m1_m2_8points(2,j) + &
hprimewgll_xxT(i,3)*C3_m1_m2_8points(3,j) + &
@@ -2570,7 +2509,7 @@
hprimewgll_xxT(i,5)*C3_m1_m2_8points(5,j) + &
hprimewgll_xxT(i,6)*C3_m1_m2_8points(6,j) + &
hprimewgll_xxT(i,7)*C3_m1_m2_8points(7,j) + &
- hprimewgll_xxT(i,8)*C3_m1_m2_8points(8,j)
+ hprimewgll_xxT(i,8)*C3_m1_m2_8points(8,j)
enddo
enddo
@@ -2587,7 +2526,7 @@
tempx2(i,5,k)*hprimewgll_xx(5,j) + &
tempx2(i,6,k)*hprimewgll_xx(6,j) + &
tempx2(i,7,k)*hprimewgll_xx(7,j) + &
- tempx2(i,8,k)*hprimewgll_xx(8,j)
+ tempx2(i,8,k)*hprimewgll_xx(8,j)
newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
tempy2(i,2,k)*hprimewgll_xx(2,j) + &
tempy2(i,3,k)*hprimewgll_xx(3,j) + &
@@ -2595,7 +2534,7 @@
tempy2(i,5,k)*hprimewgll_xx(5,j) + &
tempy2(i,6,k)*hprimewgll_xx(6,j) + &
tempy2(i,7,k)*hprimewgll_xx(7,j) + &
- tempy2(i,8,k)*hprimewgll_xx(8,j)
+ tempy2(i,8,k)*hprimewgll_xx(8,j)
newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
tempz2(i,2,k)*hprimewgll_xx(2,j) + &
tempz2(i,3,k)*hprimewgll_xx(3,j) + &
@@ -2603,7 +2542,7 @@
tempz2(i,5,k)*hprimewgll_xx(5,j) + &
tempz2(i,6,k)*hprimewgll_xx(6,j) + &
tempz2(i,7,k)*hprimewgll_xx(7,j) + &
- tempz2(i,8,k)*hprimewgll_xx(8,j)
+ tempz2(i,8,k)*hprimewgll_xx(8,j)
enddo
enddo
enddo
@@ -2618,7 +2557,7 @@
C1_mxm_m2_m1_8points(i,5)*hprimewgll_xx(5,j) + &
C1_mxm_m2_m1_8points(i,6)*hprimewgll_xx(6,j) + &
C1_mxm_m2_m1_8points(i,7)*hprimewgll_xx(7,j) + &
- C1_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
+ C1_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
E2_mxm_m2_m1_8points(i,j) = C2_mxm_m2_m1_8points(i,1)*hprimewgll_xx(1,j) + &
C2_mxm_m2_m1_8points(i,2)*hprimewgll_xx(2,j) + &
C2_mxm_m2_m1_8points(i,3)*hprimewgll_xx(3,j) + &
@@ -2626,7 +2565,7 @@
C2_mxm_m2_m1_8points(i,5)*hprimewgll_xx(5,j) + &
C2_mxm_m2_m1_8points(i,6)*hprimewgll_xx(6,j) + &
C2_mxm_m2_m1_8points(i,7)*hprimewgll_xx(7,j) + &
- C2_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
+ C2_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
E3_mxm_m2_m1_8points(i,j) = C3_mxm_m2_m1_8points(i,1)*hprimewgll_xx(1,j) + &
C3_mxm_m2_m1_8points(i,2)*hprimewgll_xx(2,j) + &
C3_mxm_m2_m1_8points(i,3)*hprimewgll_xx(3,j) + &
@@ -2634,7 +2573,7 @@
C3_mxm_m2_m1_8points(i,5)*hprimewgll_xx(5,j) + &
C3_mxm_m2_m1_8points(i,6)*hprimewgll_xx(6,j) + &
C3_mxm_m2_m1_8points(i,7)*hprimewgll_xx(7,j) + &
- C3_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
+ C3_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
enddo
enddo
@@ -2713,35 +2652,13 @@
enddo ! spectral element loop
-end subroutine compute_forces_elastic_Dev_8points
-!=====================================================================
+end subroutine compute_forces_elastic_Dev_8p
+
!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
!=====================================================================
+!
-
-subroutine compute_forces_elastic_Dev_9points( iphase ,NSPEC_AB,NGLOB_AB, &
+subroutine compute_forces_elastic_Dev_9p( iphase ,NSPEC_AB,NGLOB_AB, &
displ,accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_xxT, &
@@ -2788,7 +2705,8 @@
kappastore,mustore,jacobian
! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(NGLLX,9) :: hprime_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(9,NGLLX) :: hprime_xxT,hprimewgll_xx
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -2833,15 +2751,15 @@
integer :: ispec2D_moho_top, ispec2D_moho_bot
! local parameters
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+ real(kind=CUSTOM_REAL), dimension(9,9,9) :: dummyx_loc,dummyy_loc,dummyz_loc, &
newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+ real(kind=CUSTOM_REAL), dimension(9,9,9) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
! manually inline the calls to the Deville et al. (2002) routines
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_9points,B2_m1_m2_9points,B3_m1_m2_9points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_9points,C2_m1_m2_9points,C3_m1_m2_9points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_9points,E2_m1_m2_9points,E3_m1_m2_9points
+ real(kind=CUSTOM_REAL), dimension(9,81) :: B1_m1_m2_9points,B2_m1_m2_9points,B3_m1_m2_9points
+ real(kind=CUSTOM_REAL), dimension(9,81) :: C1_m1_m2_9points,C2_m1_m2_9points,C3_m1_m2_9points
+ real(kind=CUSTOM_REAL), dimension(9,81) :: E1_m1_m2_9points,E2_m1_m2_9points,E3_m1_m2_9points
equivalence(dummyx_loc,B1_m1_m2_9points)
equivalence(dummyy_loc,B2_m1_m2_9points)
@@ -2853,11 +2771,11 @@
equivalence(newtempy1,E2_m1_m2_9points)
equivalence(newtempz1,E3_m1_m2_9points)
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+ real(kind=CUSTOM_REAL), dimension(81,9) :: &
A1_mxm_m2_m1_9points,A2_mxm_m2_m1_9points,A3_mxm_m2_m1_9points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(81,9) :: &
C1_mxm_m2_m1_9points,C2_mxm_m2_m1_9points,C3_mxm_m2_m1_9points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(81,9) :: &
E1_mxm_m2_m1_9points,E2_mxm_m2_m1_9points,E3_mxm_m2_m1_9points
equivalence(dummyx_loc,A1_mxm_m2_m1_9points)
@@ -2948,7 +2866,7 @@
hprime_xx(i,6)*B1_m1_m2_9points(6,j) + &
hprime_xx(i,7)*B1_m1_m2_9points(7,j) + &
hprime_xx(i,8)*B1_m1_m2_9points(8,j) + &
- hprime_xx(i,9)*B1_m1_m2_9points(9,j)
+ hprime_xx(i,9)*B1_m1_m2_9points(9,j)
C2_m1_m2_9points(i,j) = hprime_xx(i,1)*B2_m1_m2_9points(1,j) + &
hprime_xx(i,2)*B2_m1_m2_9points(2,j) + &
hprime_xx(i,3)*B2_m1_m2_9points(3,j) + &
@@ -2957,7 +2875,7 @@
hprime_xx(i,6)*B2_m1_m2_9points(6,j) + &
hprime_xx(i,7)*B2_m1_m2_9points(7,j) + &
hprime_xx(i,8)*B2_m1_m2_9points(8,j) + &
- hprime_xx(i,9)*B2_m1_m2_9points(9,j)
+ hprime_xx(i,9)*B2_m1_m2_9points(9,j)
C3_m1_m2_9points(i,j) = hprime_xx(i,1)*B3_m1_m2_9points(1,j) + &
hprime_xx(i,2)*B3_m1_m2_9points(2,j) + &
hprime_xx(i,3)*B3_m1_m2_9points(3,j) + &
@@ -2966,7 +2884,7 @@
hprime_xx(i,6)*B3_m1_m2_9points(6,j) + &
hprime_xx(i,7)*B3_m1_m2_9points(7,j) + &
hprime_xx(i,8)*B3_m1_m2_9points(8,j) + &
- hprime_xx(i,9)*B3_m1_m2_9points(9,j)
+ hprime_xx(i,9)*B3_m1_m2_9points(9,j)
enddo
enddo
@@ -2984,7 +2902,7 @@
dummyx_loc(i,6,k)*hprime_xxT(6,j) + &
dummyx_loc(i,7,k)*hprime_xxT(7,j) + &
dummyx_loc(i,8,k)*hprime_xxT(8,j) + &
- dummyx_loc(i,9,k)*hprime_xxT(9,j)
+ dummyx_loc(i,9,k)*hprime_xxT(9,j)
tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
@@ -2993,7 +2911,7 @@
dummyy_loc(i,6,k)*hprime_xxT(6,j) + &
dummyy_loc(i,7,k)*hprime_xxT(7,j) + &
dummyy_loc(i,8,k)*hprime_xxT(8,j) + &
- dummyy_loc(i,9,k)*hprime_xxT(9,j)
+ dummyy_loc(i,9,k)*hprime_xxT(9,j)
tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
@@ -3002,7 +2920,7 @@
dummyz_loc(i,6,k)*hprime_xxT(6,j) + &
dummyz_loc(i,7,k)*hprime_xxT(7,j) + &
dummyz_loc(i,8,k)*hprime_xxT(8,j) + &
- dummyz_loc(i,9,k)*hprime_xxT(9,j)
+ dummyz_loc(i,9,k)*hprime_xxT(9,j)
enddo
enddo
enddo
@@ -3018,7 +2936,7 @@
A1_mxm_m2_m1_9points(i,6)*hprime_xxT(6,j) + &
A1_mxm_m2_m1_9points(i,7)*hprime_xxT(7,j) + &
A1_mxm_m2_m1_9points(i,8)*hprime_xxT(8,j) + &
- A1_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
+ A1_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
C2_mxm_m2_m1_9points(i,j) = A2_mxm_m2_m1_9points(i,1)*hprime_xxT(1,j) + &
A2_mxm_m2_m1_9points(i,2)*hprime_xxT(2,j) + &
A2_mxm_m2_m1_9points(i,3)*hprime_xxT(3,j) + &
@@ -3027,7 +2945,7 @@
A2_mxm_m2_m1_9points(i,6)*hprime_xxT(6,j) + &
A2_mxm_m2_m1_9points(i,7)*hprime_xxT(7,j) + &
A2_mxm_m2_m1_9points(i,8)*hprime_xxT(8,j) + &
- A2_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
+ A2_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
C3_mxm_m2_m1_9points(i,j) = A3_mxm_m2_m1_9points(i,1)*hprime_xxT(1,j) + &
A3_mxm_m2_m1_9points(i,2)*hprime_xxT(2,j) + &
A3_mxm_m2_m1_9points(i,3)*hprime_xxT(3,j) + &
@@ -3036,7 +2954,7 @@
A3_mxm_m2_m1_9points(i,6)*hprime_xxT(6,j) + &
A3_mxm_m2_m1_9points(i,7)*hprime_xxT(7,j) + &
A3_mxm_m2_m1_9points(i,8)*hprime_xxT(8,j) + &
- A3_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
+ A3_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
enddo
enddo
@@ -3270,7 +3188,7 @@
hprimewgll_xxT(i,6)*C1_m1_m2_9points(6,j) + &
hprimewgll_xxT(i,7)*C1_m1_m2_9points(7,j) + &
hprimewgll_xxT(i,8)*C1_m1_m2_9points(8,j) + &
- hprimewgll_xxT(i,9)*C1_m1_m2_9points(9,j)
+ hprimewgll_xxT(i,9)*C1_m1_m2_9points(9,j)
E2_m1_m2_9points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_9points(1,j) + &
hprimewgll_xxT(i,2)*C2_m1_m2_9points(2,j) + &
hprimewgll_xxT(i,3)*C2_m1_m2_9points(3,j) + &
@@ -3279,7 +3197,7 @@
hprimewgll_xxT(i,6)*C2_m1_m2_9points(6,j) + &
hprimewgll_xxT(i,7)*C2_m1_m2_9points(7,j) + &
hprimewgll_xxT(i,8)*C2_m1_m2_9points(8,j) + &
- hprimewgll_xxT(i,9)*C2_m1_m2_9points(9,j)
+ hprimewgll_xxT(i,9)*C2_m1_m2_9points(9,j)
E3_m1_m2_9points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_9points(1,j) + &
hprimewgll_xxT(i,2)*C3_m1_m2_9points(2,j) + &
hprimewgll_xxT(i,3)*C3_m1_m2_9points(3,j) + &
@@ -3288,7 +3206,7 @@
hprimewgll_xxT(i,6)*C3_m1_m2_9points(6,j) + &
hprimewgll_xxT(i,7)*C3_m1_m2_9points(7,j) + &
hprimewgll_xxT(i,8)*C3_m1_m2_9points(8,j) + &
- hprimewgll_xxT(i,9)*C3_m1_m2_9points(9,j)
+ hprimewgll_xxT(i,9)*C3_m1_m2_9points(9,j)
enddo
enddo
@@ -3306,7 +3224,7 @@
tempx2(i,6,k)*hprimewgll_xx(6,j) + &
tempx2(i,7,k)*hprimewgll_xx(7,j) + &
tempx2(i,8,k)*hprimewgll_xx(8,j) + &
- tempx2(i,9,k)*hprimewgll_xx(9,j)
+ tempx2(i,9,k)*hprimewgll_xx(9,j)
newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
tempy2(i,2,k)*hprimewgll_xx(2,j) + &
tempy2(i,3,k)*hprimewgll_xx(3,j) + &
@@ -3315,7 +3233,7 @@
tempy2(i,6,k)*hprimewgll_xx(6,j) + &
tempy2(i,7,k)*hprimewgll_xx(7,j) + &
tempy2(i,8,k)*hprimewgll_xx(8,j) + &
- tempy2(i,9,k)*hprimewgll_xx(9,j)
+ tempy2(i,9,k)*hprimewgll_xx(9,j)
newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
tempz2(i,2,k)*hprimewgll_xx(2,j) + &
tempz2(i,3,k)*hprimewgll_xx(3,j) + &
@@ -3324,7 +3242,7 @@
tempz2(i,6,k)*hprimewgll_xx(6,j) + &
tempz2(i,7,k)*hprimewgll_xx(7,j) + &
tempz2(i,8,k)*hprimewgll_xx(8,j) + &
- tempz2(i,9,k)*hprimewgll_xx(9,j)
+ tempz2(i,9,k)*hprimewgll_xx(9,j)
enddo
enddo
enddo
@@ -3340,7 +3258,7 @@
C1_mxm_m2_m1_9points(i,6)*hprimewgll_xx(6,j) + &
C1_mxm_m2_m1_9points(i,7)*hprimewgll_xx(7,j) + &
C1_mxm_m2_m1_9points(i,8)*hprimewgll_xx(8,j) + &
- C1_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
+ C1_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
E2_mxm_m2_m1_9points(i,j) = C2_mxm_m2_m1_9points(i,1)*hprimewgll_xx(1,j) + &
C2_mxm_m2_m1_9points(i,2)*hprimewgll_xx(2,j) + &
C2_mxm_m2_m1_9points(i,3)*hprimewgll_xx(3,j) + &
@@ -3349,7 +3267,7 @@
C2_mxm_m2_m1_9points(i,6)*hprimewgll_xx(6,j) + &
C2_mxm_m2_m1_9points(i,7)*hprimewgll_xx(7,j) + &
C2_mxm_m2_m1_9points(i,8)*hprimewgll_xx(8,j) + &
- C2_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
+ C2_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
E3_mxm_m2_m1_9points(i,j) = C3_mxm_m2_m1_9points(i,1)*hprimewgll_xx(1,j) + &
C3_mxm_m2_m1_9points(i,2)*hprimewgll_xx(2,j) + &
C3_mxm_m2_m1_9points(i,3)*hprimewgll_xx(3,j) + &
@@ -3358,7 +3276,7 @@
C3_mxm_m2_m1_9points(i,6)*hprimewgll_xx(6,j) + &
C3_mxm_m2_m1_9points(i,7)*hprimewgll_xx(7,j) + &
C3_mxm_m2_m1_9points(i,8)*hprimewgll_xx(8,j) + &
- C3_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
+ C3_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
enddo
enddo
@@ -3437,35 +3355,13 @@
enddo ! spectral element loop
-end subroutine compute_forces_elastic_Dev_9points
-!=====================================================================
+end subroutine compute_forces_elastic_Dev_9p
+
!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
!=====================================================================
+!
-
-subroutine compute_forces_elastic_Dev_10points( iphase ,NSPEC_AB,NGLOB_AB, &
+subroutine compute_forces_elastic_Dev_10p( iphase ,NSPEC_AB,NGLOB_AB, &
displ,accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_xxT, &
@@ -3512,7 +3408,8 @@
kappastore,mustore,jacobian
! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(NGLLX,10) :: hprime_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(10,NGLLX) :: hprime_xxT,hprimewgll_xx
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
@@ -3557,15 +3454,15 @@
integer :: ispec2D_moho_top, ispec2D_moho_bot
! local parameters
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+ real(kind=CUSTOM_REAL), dimension(10,10,10) :: dummyx_loc,dummyy_loc,dummyz_loc, &
newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+ real(kind=CUSTOM_REAL), dimension(10,10,10) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
! manually inline the calls to the Deville et al. (2002) routines
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_10points,B2_m1_m2_10points,B3_m1_m2_10points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_10points,C2_m1_m2_10points,C3_m1_m2_10points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_10points,E2_m1_m2_10points,E3_m1_m2_10points
+ real(kind=CUSTOM_REAL), dimension(10,100) :: B1_m1_m2_10points,B2_m1_m2_10points,B3_m1_m2_10points
+ real(kind=CUSTOM_REAL), dimension(10,100) :: C1_m1_m2_10points,C2_m1_m2_10points,C3_m1_m2_10points
+ real(kind=CUSTOM_REAL), dimension(10,100) :: E1_m1_m2_10points,E2_m1_m2_10points,E3_m1_m2_10points
equivalence(dummyx_loc,B1_m1_m2_10points)
equivalence(dummyy_loc,B2_m1_m2_10points)
@@ -3577,11 +3474,11 @@
equivalence(newtempy1,E2_m1_m2_10points)
equivalence(newtempz1,E3_m1_m2_10points)
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+ real(kind=CUSTOM_REAL), dimension(100,10) :: &
A1_mxm_m2_m1_10points,A2_mxm_m2_m1_10points,A3_mxm_m2_m1_10points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(100,10) :: &
C1_mxm_m2_m1_10points,C2_mxm_m2_m1_10points,C3_mxm_m2_m1_10points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+ real(kind=CUSTOM_REAL), dimension(100,10) :: &
E1_mxm_m2_m1_10points,E2_mxm_m2_m1_10points,E3_mxm_m2_m1_10points
equivalence(dummyx_loc,A1_mxm_m2_m1_10points)
@@ -3673,7 +3570,7 @@
hprime_xx(i,7)*B1_m1_m2_10points(7,j) + &
hprime_xx(i,8)*B1_m1_m2_10points(8,j) + &
hprime_xx(i,9)*B1_m1_m2_10points(9,j) + &
- hprime_xx(i,10)*B1_m1_m2_10points(10,j)
+ hprime_xx(i,10)*B1_m1_m2_10points(10,j)
C2_m1_m2_10points(i,j) = hprime_xx(i,1)*B2_m1_m2_10points(1,j) + &
hprime_xx(i,2)*B2_m1_m2_10points(2,j) + &
hprime_xx(i,3)*B2_m1_m2_10points(3,j) + &
@@ -3683,7 +3580,7 @@
hprime_xx(i,7)*B2_m1_m2_10points(7,j) + &
hprime_xx(i,8)*B2_m1_m2_10points(8,j) + &
hprime_xx(i,9)*B2_m1_m2_10points(9,j) + &
- hprime_xx(i,10)*B2_m1_m2_10points(10,j)
+ hprime_xx(i,10)*B2_m1_m2_10points(10,j)
C3_m1_m2_10points(i,j) = hprime_xx(i,1)*B3_m1_m2_10points(1,j) + &
hprime_xx(i,2)*B3_m1_m2_10points(2,j) + &
hprime_xx(i,3)*B3_m1_m2_10points(3,j) + &
@@ -3693,7 +3590,7 @@
hprime_xx(i,7)*B3_m1_m2_10points(7,j) + &
hprime_xx(i,8)*B3_m1_m2_10points(8,j) + &
hprime_xx(i,9)*B3_m1_m2_10points(9,j) + &
- hprime_xx(i,10)*B3_m1_m2_10points(10,j)
+ hprime_xx(i,10)*B3_m1_m2_10points(10,j)
enddo
enddo
@@ -3712,7 +3609,7 @@
dummyx_loc(i,7,k)*hprime_xxT(7,j) + &
dummyx_loc(i,8,k)*hprime_xxT(8,j) + &
dummyx_loc(i,9,k)*hprime_xxT(9,j) + &
- dummyx_loc(i,10,k)*hprime_xxT(10,j)
+ dummyx_loc(i,10,k)*hprime_xxT(10,j)
tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
@@ -3749,7 +3646,7 @@
A1_mxm_m2_m1_10points(i,7)*hprime_xxT(7,j) + &
A1_mxm_m2_m1_10points(i,8)*hprime_xxT(8,j) + &
A1_mxm_m2_m1_10points(i,9)*hprime_xxT(9,j) + &
- A1_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
+ A1_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
C2_mxm_m2_m1_10points(i,j) = A2_mxm_m2_m1_10points(i,1)*hprime_xxT(1,j) + &
A2_mxm_m2_m1_10points(i,2)*hprime_xxT(2,j) + &
A2_mxm_m2_m1_10points(i,3)*hprime_xxT(3,j) + &
@@ -3759,7 +3656,7 @@
A2_mxm_m2_m1_10points(i,7)*hprime_xxT(7,j) + &
A2_mxm_m2_m1_10points(i,8)*hprime_xxT(8,j) + &
A2_mxm_m2_m1_10points(i,9)*hprime_xxT(9,j) + &
- A2_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
+ A2_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
C3_mxm_m2_m1_10points(i,j) = A3_mxm_m2_m1_10points(i,1)*hprime_xxT(1,j) + &
A3_mxm_m2_m1_10points(i,2)*hprime_xxT(2,j) + &
A3_mxm_m2_m1_10points(i,3)*hprime_xxT(3,j) + &
@@ -3769,7 +3666,7 @@
A3_mxm_m2_m1_10points(i,7)*hprime_xxT(7,j) + &
A3_mxm_m2_m1_10points(i,8)*hprime_xxT(8,j) + &
A3_mxm_m2_m1_10points(i,9)*hprime_xxT(9,j) + &
- A3_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
+ A3_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
enddo
enddo
@@ -4004,7 +3901,7 @@
hprimewgll_xxT(i,7)*C1_m1_m2_10points(7,j) + &
hprimewgll_xxT(i,8)*C1_m1_m2_10points(8,j) + &
hprimewgll_xxT(i,9)*C1_m1_m2_10points(9,j) + &
- hprimewgll_xxT(i,10)*C1_m1_m2_10points(10,j)
+ hprimewgll_xxT(i,10)*C1_m1_m2_10points(10,j)
E2_m1_m2_10points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_10points(1,j) + &
hprimewgll_xxT(i,2)*C2_m1_m2_10points(2,j) + &
hprimewgll_xxT(i,3)*C2_m1_m2_10points(3,j) + &
@@ -4024,7 +3921,7 @@
hprimewgll_xxT(i,7)*C3_m1_m2_10points(7,j) + &
hprimewgll_xxT(i,8)*C3_m1_m2_10points(8,j) + &
hprimewgll_xxT(i,9)*C3_m1_m2_10points(9,j) + &
- hprimewgll_xxT(i,10)*C3_m1_m2_10points(10,j)
+ hprimewgll_xxT(i,10)*C3_m1_m2_10points(10,j)
enddo
enddo
@@ -4053,7 +3950,7 @@
tempy2(i,7,k)*hprimewgll_xx(7,j) + &
tempy2(i,8,k)*hprimewgll_xx(8,j) + &
tempy2(i,9,k)*hprimewgll_xx(9,j) + &
- tempy2(i,10,k)*hprimewgll_xx(10,j)
+ tempy2(i,10,k)*hprimewgll_xx(10,j)
newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
tempz2(i,2,k)*hprimewgll_xx(2,j) + &
tempz2(i,3,k)*hprimewgll_xx(3,j) + &
@@ -4080,7 +3977,7 @@
C1_mxm_m2_m1_10points(i,7)*hprimewgll_xx(7,j) + &
C1_mxm_m2_m1_10points(i,8)*hprimewgll_xx(8,j) + &
C1_mxm_m2_m1_10points(i,9)*hprimewgll_xx(9,j) + &
- C1_mxm_m2_m1_10points(i,10)*hprimewgll_xx(10,j)
+ C1_mxm_m2_m1_10points(i,10)*hprimewgll_xx(10,j)
E2_mxm_m2_m1_10points(i,j) = C2_mxm_m2_m1_10points(i,1)*hprimewgll_xx(1,j) + &
C2_mxm_m2_m1_10points(i,2)*hprimewgll_xx(2,j) + &
C2_mxm_m2_m1_10points(i,3)*hprimewgll_xx(3,j) + &
@@ -4179,4 +4076,4 @@
enddo ! spectral element loop
-end subroutine compute_forces_elastic_Dev_10points
+end subroutine compute_forces_elastic_Dev_10p
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -64,8 +64,8 @@
! displacement and acceleration
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,accel
-
+
! arrays with mesh parameters per slice
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
@@ -124,11 +124,11 @@
! newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NUM_THREADS) :: &
! tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-
+
real(kind=CUSTOM_REAL), dimension(:,:,:,:),allocatable :: &
dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3,&
newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3,&
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+ tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
! real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB,NUM_THREADS) :: accel_omp
! real(kind=CUSTOM_REAL), dimension(:,:,:),allocatable :: accel_omp
@@ -160,7 +160,7 @@
double precision end_time
double precision accumulate_time_start
double precision accumulate_time_stop
-
+
! local anisotropy parameters
real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
@@ -171,14 +171,14 @@
integer thread_id
integer NUM_THREADS
integer omp_get_num_threads ! function
-
+
imodulo_N_SLS = mod(N_SLS,3)
! NUM_THREADS = 12
NUM_THREADS = OMP_GET_MAX_THREADS()
-
-
+
+
! allocate(accel_omp(NDIM,NGLOB_AB,NUM_THREADS))
-
+
! allocate local arrays
allocate(dummyx_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
allocate(dummyy_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
@@ -209,7 +209,7 @@
endif
! "start" timer
start_time = omp_get_wtime()
-
+
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(&
!$OMP R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3,&
!$OMP factor_loc,alphaval_loc,betaval_loc,gammaval_loc,&
@@ -228,17 +228,17 @@
!$OMP ispec,iglob,ispec_p,&
!$OMP i,j,k,&
!$OMP thread_id)
-
+
thread_id = OMP_get_thread_num()+1
! thread_id = 1
-
-
+
+
! accel_omp(:,:,thread_id) = 0.0
!$OMP DO
do ispec_p = 1,num_elements
-
-
+
+
! returns element id from stored element list
ispec = phase_ispec_inner_elastic(ispec_p,iphase)
@@ -268,7 +268,7 @@
! pages 386 and 389 and Figure 8.3.1
! call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
do j=1,m2
- do i=1,m1
+ do i=1,m1
tempx1(i,j,1,thread_id) = &
hprime_xx(i,1)*dummyx_loc(1,j,1,thread_id) + &
hprime_xx(i,2)*dummyx_loc(2,j,1,thread_id) + &
@@ -314,7 +314,7 @@
enddo
enddo
enddo
-
+
! call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
do j=1,m1
do i=1,m2
@@ -631,7 +631,7 @@
tempz3(i,1,5,thread_id)*hprimewgll_xx(5,j)
enddo
enddo
-
+
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -651,28 +651,28 @@
! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id)&
! - fac1*newtempz1(i,j,k,thread_id) - fac2*newtempz2(i,j,k,thread_id)&
! - fac3*newtempz3(i,j,k,thread_id)
-
+
!$OMP ATOMIC
accel(1,iglob) = accel(1,iglob) - (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
!$OMP ATOMIC
accel(2,iglob) = accel(2,iglob) - (fac1*newtempy1(i,j,k,thread_id) + fac2*newtempy2(i,j,k,thread_id) + fac3*newtempy3(i,j,k,thread_id))
!$OMP ATOMIC
accel(3,iglob) = accel(3,iglob) - (fac1*newtempz1(i,j,k,thread_id) + fac2*newtempz2(i,j,k,thread_id) + fac3*newtempz3(i,j,k,thread_id))
-
+
! accel(1,iglob) = accel(1,iglob) - &
! (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
! accel(2,iglob) = accel(2,iglob) - &
! (fac1*newtempy1(i,j,k,thread_id) + fac2*newtempy2(i,j,k,thread_id) + fac3*newtempy3(i,j,k,thread_id))
! accel(3,iglob) = accel(3,iglob) - &
! (fac1*newtempz1(i,j,k,thread_id) + fac2*newtempz2(i,j,k,thread_id) + fac3*newtempz3(i,j,k,thread_id))
-
+
! accel_omp(1,iglob,thread_id) = accel_omp(1,iglob,thread_id) - fac1*newtempx1(i,j,k,thread_id) - &
! fac2*newtempx2(i,j,k,thread_id) - fac3*newtempx3(i,j,k,thread_id)
! accel_omp(2,iglob,thread_id) = accel_omp(2,iglob,thread_id) - fac1*newtempy1(i,j,k,thread_id) - &
! fac2*newtempy2(i,j,k,thread_id) - fac3*newtempy3(i,j,k,thread_id)
! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id) - fac1*newtempz1(i,j,k,thread_id) - &
! fac2*newtempz2(i,j,k,thread_id) - fac3*newtempz3(i,j,k,thread_id)
-
+
! update memory variables based upon the Runge-Kutta scheme
if(ATTENUATION) then
@@ -731,32 +731,32 @@
enddo ! spectral element loop
!$OMP END DO
-
-
- ! accel(:,:) = accel(:,:) + accel_omp(:,:,thread_id)
+
+
+ ! accel(:,:) = accel(:,:) + accel_omp(:,:,thread_id)
! do i=1,NGLOB_AB
! accel(1,i) = accel(1,i) + accel_omp(1,i,thread_id)
! accel(2,i) = accel(1,i) + accel_omp(2,i,thread_id)
- ! accel(3,i) = accel(1,i) + accel_omp(3,i,thread_id)
+ ! accel(3,i) = accel(1,i) + accel_omp(3,i,thread_id)
! enddo
-
+
!$OMP END PARALLEL
! accumulate_time_start = omp_get_wtime()
-
+
! do i=1,NUM_THREADS
! ! ! parallel vector add
! accel(:,:) = accel(:,:) + accel_omp(:,:,i)
! end do
! accumulate_time_stop = omp_get_wtime()
-
+
! "stop" timer
end_time = omp_get_wtime()
write(*,*) "Total Elapsed time: ", (end_time-start_time) , "seconds. (Threads=",NUM_THREADS,")"
! write(*,*) "Accumulate Elapsed time: ", (accumulate_time_stop-accumulate_time_start) , "seconds"
-
-
+
+
deallocate(dummyx_loc)
deallocate(dummyy_loc)
deallocate(dummyz_loc)
@@ -778,7 +778,7 @@
deallocate(tempz1)
deallocate(tempz2)
deallocate(tempz3)
-
+
! accel(:,:) = accel_omp(:,:,1)
-
+
end subroutine compute_forces_elastic_Dev_openmp
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -48,7 +48,7 @@
if ( APPROXIMATE_HESS_KL ) then
call compute_kernels_hessian()
endif
-
+
end subroutine compute_kernels
@@ -72,7 +72,7 @@
! updates kernels on GPU
if(GPU_MODE) then
call compute_kernels_elastic_cuda(Mesh_pointer,deltat)
-
+
! for noise simulations --- source strength kernel
if (NOISE_TOMOGRAPHY == 3) &
call compute_kernels_strength_noise(NGLLSQUARE*num_free_surface_faces,ibool, &
@@ -82,11 +82,11 @@
NSPEC_AB,NGLOB_AB, &
num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
GPU_MODE,Mesh_pointer)
-
+
! kernels are done
return
endif
-
+
! updates kernels on CPU
do ispec = 1, NSPEC_AB
@@ -143,7 +143,7 @@
endif !ispec_is_elastic
enddo
-
+
! moho kernel
if( SAVE_MOHO_MESH ) then
call compute_boundary_kernel()
@@ -158,7 +158,7 @@
NSPEC_AB,NGLOB_AB, &
num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
GPU_MODE,Mesh_pointer)
-
+
end subroutine compute_kernels_el
!
@@ -249,21 +249,20 @@
real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: b_accel_elm,accel_elm
integer :: i,j,k,ispec,iglob
- !daniel: todo - workaround to do this on GPU?
- if( GPU_MODE) then
- if( ACOUSTIC_SIMULATION) then
- call transfer_potential_dot_dot_from_device(NGLOB_AB,potential_dot_dot_acoustic, Mesh_pointer)
- call transfer_b_potential_dot_dot_from_device(NGLOB_AB,b_potential_dot_dot_acoustic, Mesh_pointer)
- endif
- if( ELASTIC_SIMULATION ) then
- call transfer_accel_from_device(NGLOB_AB*NDIM,accel,Mesh_pointer)
- call transfer_b_accel_from_device(NGLOB_AB*NDIM,b_accel,Mesh_pointer)
- endif
+ ! updates kernels on GPU
+ if(GPU_MODE) then
+
+ ! computes contribution to density and bulk modulus kernel
+ call compute_kernels_hess_cuda(Mesh_pointer,deltat, &
+ ELASTIC_SIMULATION,ACOUSTIC_SIMULATION)
+
+ ! done on GPU
+ return
endif
! loops over all elements
do ispec = 1, NSPEC_AB
-
+
! acoustic domains
if( ispec_is_acoustic(ispec) ) then
@@ -280,7 +279,7 @@
hprime_xx,hprime_yy,hprime_zz, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
ibool,rhostore)
-
+
do k = 1, NGLLZ
do j = 1, NGLLY
do i = 1, NGLLX
@@ -293,7 +292,7 @@
enddo
enddo
- enddo
+ enddo
endif
! elastic domains
@@ -310,9 +309,9 @@
enddo
enddo
- enddo
+ enddo
endif
-
+
enddo
end subroutine compute_kernels_hessian
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -70,20 +70,20 @@
! GPU_MODE variables
integer(kind=8) :: Mesh_pointer
- logical :: GPU_MODE
+ logical :: GPU_MODE
! local parameters
real(kind=CUSTOM_REAL) :: rhol,cpl,jacobianw,absorbl
integer :: ispec,iglob,i,j,k,iface,igll
!integer:: reclen1,reclen2
-
+
! checks if anything to do
if( num_abs_boundary_faces == 0 ) return
! adjoint simulations:
if (SIMULATION_TYPE == 3 .and. num_abs_boundary_faces > 0) then
! reads in absorbing boundary array when first phase is running
- if( phase_is_inner .eqv. .false. ) then
+ if( phase_is_inner .eqv. .false. ) then
! note: the index NSTEP-it+1 is valid if b_displ is read in after the Newark scheme
! uses fortran routine
!read(IOABS_AC,rec=NSTEP-it+1) reclen1,b_absorb_potential,reclen2
@@ -91,7 +91,7 @@
! call exit_mpi(0,'Error reading absorbing contribution b_absorb_potential')
! uses c routine for faster reading
call read_abs(1,b_absorb_potential,b_reclen_potential,NSTEP-it+1)
- endif
+ endif
endif !adjoint
! absorbs absorbing-boundary surface using Sommerfeld condition (vanishing field in the outer-space)
@@ -144,13 +144,14 @@
endif ! ispec_is_acoustic
endif ! ispec_is_inner
enddo ! num_abs_boundary_faces
- else
+ else
! GPU_MODE == .true.
- call compute_stacey_acoustic_cuda(Mesh_pointer, phase_is_inner, &
- SIMULATION_TYPE,SAVE_FORWARD,b_absorb_potential)
+ if( num_abs_boundary_faces > 0 ) &
+ call compute_stacey_acoustic_cuda(Mesh_pointer, phase_is_inner, &
+ SIMULATION_TYPE,SAVE_FORWARD,b_absorb_potential)
endif
-
+
! adjoint simulations: stores absorbed wavefield part
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) then
! writes out absorbing boundary value only when second phase is running
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -77,8 +77,8 @@
! GPU_MODE variables
integer(kind=8) :: Mesh_pointer
- logical :: GPU_MODE
-
+ logical :: GPU_MODE
+
! local parameters
real(kind=CUSTOM_REAL) vx,vy,vz,nx,ny,nz,tx,ty,tz,vn,jacobianw
integer :: ispec,iglob,i,j,k,iface,igll
@@ -90,7 +90,7 @@
! adjoint simulations:
if (SIMULATION_TYPE == 3 .and. num_abs_boundary_faces > 0) then
! reads in absorbing boundary array when first phase is running
- if( phase_is_inner .eqv. .false. ) then
+ if( phase_is_inner .eqv. .false. ) then
! note: the index NSTEP-it+1 is valid if b_displ is read in after the Newark scheme
! uses fortran routine
!read(IOABS,rec=NSTEP-it+1) reclen1,b_absorb_field,reclen2
@@ -98,10 +98,10 @@
! call exit_mpi(0,'Error reading absorbing contribution b_absorb_field')
! uses c routine for faster reading
call read_abs(0,b_absorb_field,b_reclen_field,NSTEP-it+1)
- endif
+ endif
endif !adjoint
-
+
if(.NOT. GPU_MODE) then
! absorbs absorbing-boundary surface using Stacey condition (Clayton & Enquist)
@@ -162,16 +162,17 @@
endif ! ispec_is_inner
enddo
- else
+ else
! GPU_MODE == .true.
- call compute_stacey_elastic_cuda(Mesh_pointer,phase_is_inner, &
- SIMULATION_TYPE,SAVE_FORWARD,b_absorb_field)
+ if( num_abs_boundary_faces > 0 ) &
+ call compute_stacey_elastic_cuda(Mesh_pointer,phase_is_inner, &
+ SIMULATION_TYPE,SAVE_FORWARD,b_absorb_field)
endif
! adjoint simulations: stores absorbed wavefield part
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) then
! writes out absorbing boundary value only when second phase is running
- if( phase_is_inner .eqv. .true. ) then
+ if( phase_is_inner .eqv. .true. ) then
! uses fortran routine
!write(IOABS,rec=it) b_reclen_field,b_absorb_field,b_reclen_field
! uses c routine
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -129,6 +129,8 @@
!character(len=256) :: vtkfilename
integer :: zoom_factor = 4
logical :: zoom
+ integer, dimension(1) :: tmp_pixel_loc
+ integer, dimension(1,0:NPROC-1) :: tmp_pixel_per_proc
! checks image type
if(IMAGE_TYPE > 4 .or. IMAGE_TYPE < 1) then
@@ -378,12 +380,15 @@
if( nb_pixel_loc > 0 ) then
if( .not. allocated(num_pixel_loc) ) call exit_MPI(myrank,'error num_pixel_loc allocation')
endif
-
+
! filling array iglob_image_color, containing info on which process owns which pixels.
allocate(nb_pixel_per_proc(0:NPROC-1),stat=ier)
if( ier /= 0 ) stop 'error allocating array nb_pixel_per_proc'
- call gather_all_i(nb_pixel_loc,1,nb_pixel_per_proc,1,NPROC)
+ tmp_pixel_loc(1) = nb_pixel_loc
+ call gather_all_i(tmp_pixel_loc,1,tmp_pixel_per_proc,1,NPROC)
+ nb_pixel_per_proc(:) = tmp_pixel_per_proc(1,:)
+
! allocates receiving array
if ( myrank == 0 ) then
allocate( num_pixel_recv(maxval(nb_pixel_per_proc(:)),0:NPROC-1),stat=ier)
@@ -428,7 +433,7 @@
if(ier /= 0 ) call exit_mpi(myrank,'error allocating image send data')
data_pixel_send(:) = 0._CUSTOM_REAL
endif
-
+
! handles vp background data
call write_PNM_GIF_vp_background()
@@ -474,7 +479,7 @@
! master collects
if (myrank == 0) then
do iproc = 1, NPROC-1
- if( nb_pixel_per_proc(iproc) > 0 ) then
+ if( nb_pixel_per_proc(iproc) > 0 ) then
call recvv_cr(data_pixel_recv(1),nb_pixel_per_proc(iproc),iproc,43)
! fills vp display array
do k = 1, nb_pixel_per_proc(iproc)
@@ -851,15 +856,19 @@
if( ELASTIC_SIMULATION ) then
if( ispec_is_elastic(ispec) ) then
if( SIMULATION_TYPE == 3 ) then
- veloc_val(:) = b_veloc(:,iglob)
+ ! to display re-constructed wavefield
+ !veloc_val(:) = b_veloc(:,iglob)
+ ! to display adjoint wavefield
+ veloc_val(:) = veloc(:,iglob)
else
- veloc_val(:) = veloc(:,iglob)
+ veloc_val(:) = veloc(:,iglob)
endif
! returns with this result
return
endif
endif
+
if( ACOUSTIC_SIMULATION ) then
if( ispec_is_acoustic(ispec) ) then
if( SIMULATION_TYPE == 3 ) then
@@ -868,7 +877,7 @@
b_potential_dot_acoustic, veloc_element,&
hprime_xx,hprime_yy,hprime_zz, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
+ ibool,rhostore)
else
! velocity vector
call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
@@ -877,7 +886,7 @@
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
ibool,rhostore)
endif
-
+
! returns corresponding iglob velocity entry
do k=1,NGLLZ
do j=1,NGLLY
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -31,13 +31,13 @@
use specfem_par
use specfem_par_elastic
use specfem_par_acoustic
-
+
implicit none
integer :: irec_local
! save last frame
-
+
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
open(unit=27,file=prname(1:len_trim(prname))//'save_forward_arrays.bin',&
status='unknown',form='unformatted')
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -215,10 +215,10 @@
stop 'GPU mode does not support SAVE_MOHO_MESH yet'
if( ATTENUATION ) then
if( N_SLS /= 3 ) &
- stop 'GPU mode does not support N_SLS /= 3 yet'
+ stop 'GPU mode does not support N_SLS /= 3 yet'
endif
- if( ANISOTROPY ) &
- stop 'GPU mode does not support ANISOTROPY yet'
+ if( ANISOTROPY ) &
+ stop 'GPU mode does not support ANISOTROPY yet'
endif
! absorbing surfaces
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -126,7 +126,7 @@
enddo ! end of main time loop
! Transfer fields from GPU card to host for further analysis
- if(GPU_MODE) call it_transfer_from_GPU()
+ if(GPU_MODE) call it_transfer_from_GPU()
end subroutine iterate_time
@@ -148,32 +148,32 @@
integer :: ihours,iminutes,iseconds,int_tCPU, &
ihours_remain,iminutes_remain,iseconds_remain,int_t_remain, &
ihours_total,iminutes_total,iseconds_total,int_t_total
-
+
! if(GPU_MODE) then
-! ! way 1: copy whole fields
+! ! way 1: copy whole fields
! ! elastic wavefield
-! if( ELASTIC_SIMULATION ) then
-! call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
-! ! backward/reconstructed wavefield
+! if( ELASTIC_SIMULATION ) then
+! call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+! ! backward/reconstructed wavefield
! if(SIMULATION_TYPE==3) &
! call transfer_b_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
-! endif
+! endif
! endif
! compute maximum of norm of displacement in each slice
if( ELASTIC_SIMULATION ) then
if( GPU_MODE) then
- ! way 2: just get maximum of field from GPU
- call get_norm_elastic_from_device_cuda(Usolidnorm,Mesh_pointer,1)
+ ! way 2: just get maximum of field from GPU
+ call get_norm_elastic_from_device(Usolidnorm,Mesh_pointer,1)
else
Usolidnorm = maxval(sqrt(displ(1,:)**2 + displ(2,:)**2 + displ(3,:)**2))
endif
else
if( ACOUSTIC_SIMULATION ) then
if(GPU_MODE) then
- ! way 2: just get maximum of field from GPU
- call get_norm_acoustic_from_device_cuda(Usolidnorm,Mesh_pointer,1)
- else
+ ! way 2: just get maximum of field from GPU
+ call get_norm_acoustic_from_device(Usolidnorm,Mesh_pointer,1)
+ else
Usolidnorm = maxval(abs(potential_dot_dot_acoustic(:)))
endif
endif
@@ -187,7 +187,7 @@
if( ELASTIC_SIMULATION ) then
! way 2
if(GPU_MODE) then
- call get_norm_elastic_from_device_cuda(b_Usolidnorm,Mesh_pointer,3)
+ call get_norm_elastic_from_device(b_Usolidnorm,Mesh_pointer,3)
else
b_Usolidnorm = maxval(sqrt(b_displ(1,:)**2 + b_displ(2,:)**2 + b_displ(3,:)**2))
endif
@@ -195,8 +195,8 @@
if( ACOUSTIC_SIMULATION ) then
! way 2
if(GPU_MODE) then
- call get_norm_acoustic_from_device_cuda(b_Usolidnorm,Mesh_pointer,3)
- else
+ call get_norm_acoustic_from_device(b_Usolidnorm,Mesh_pointer,3)
+ else
b_Usolidnorm = maxval(abs(b_potential_dot_dot_acoustic(:)))
endif
endif
@@ -335,7 +335,7 @@
! updates acoustic potentials
if( ACOUSTIC_SIMULATION ) then
- if(.NOT. GPU_MODE) then
+ if(.NOT. GPU_MODE) then
potential_acoustic(:) = potential_acoustic(:) &
+ deltat * potential_dot_acoustic(:) &
+ deltatsqover2 * potential_dot_dot_acoustic(:)
@@ -344,15 +344,15 @@
potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
else
! on GPU
- call it_update_displacement_scheme_acoustic_cuda(Mesh_pointer, NGLOB_AB, &
+ call it_update_displacement_ac_cuda(Mesh_pointer, NGLOB_AB, &
deltat, deltatsqover2, deltatover2, &
SIMULATION_TYPE, b_deltat, b_deltatsqover2, b_deltatover2)
- endif
+ endif
! time marching potentials
- if(PML) then
- if( GPU_MODE ) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ if(PML) then
+ if( GPU_MODE ) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
call PML_acoustic_time_march(NSPEC_AB,NGLOB_AB,ibool,&
potential_acoustic,potential_dot_acoustic,&
@@ -367,22 +367,22 @@
my_neighbours_ext_mesh,NPROC,&
ispec_is_acoustic)
- if( GPU_MODE ) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ if( GPU_MODE ) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
endif
-
+
endif ! ACOUSTIC_SIMULATION
! updates elastic displacement and velocity
if( ELASTIC_SIMULATION ) then
-
+
if(.NOT. GPU_MODE) then
displ(:,:) = displ(:,:) + deltat*veloc(:,:) + deltatsqover2*accel(:,:)
veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
accel(:,:) = 0._CUSTOM_REAL
else ! GPU_MODE == 1
! Includes SIM_TYPE 1 & 3 (for noise tomography)
- call it_update_displacement_scheme_cuda(Mesh_pointer, size(displ), deltat, deltatsqover2,&
+ call it_update_displacement_cuda(Mesh_pointer, size(displ), deltat, deltatsqover2,&
deltatover2, SIMULATION_TYPE, b_deltat, b_deltatsqover2, b_deltatover2)
endif
endif
@@ -398,7 +398,7 @@
+ b_deltatover2 * b_potential_dot_dot_acoustic(:)
b_potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
endif
-
+
! elastic backward fields
if( ELASTIC_SIMULATION ) then
b_displ(:,:) = b_displ(:,:) + b_deltat*b_veloc(:,:) + b_deltatsqover2*b_accel(:,:)
@@ -446,11 +446,11 @@
read(27) b_potential_acoustic
read(27) b_potential_dot_acoustic
read(27) b_potential_dot_dot_acoustic
-
+
! transfers fields onto GPU
if(GPU_MODE) &
- call transfer_b_fields_acoustic_to_device(NGLOB_AB,b_potential_acoustic, &
- b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+ call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
+ b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
endif
! elastic wavefields
@@ -459,7 +459,7 @@
read(27) b_veloc
read(27) b_accel
- ! puts elastic wavefield to GPU
+ ! puts elastic wavefield to GPU
if(GPU_MODE) &
call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
@@ -475,19 +475,19 @@
read(27) b_epsilondev_xy
read(27) b_epsilondev_xz
read(27) b_epsilondev_yz
-
- ! puts elastic attenuation arrays to GPU
+
+ ! puts elastic attenuation arrays to GPU
if(GPU_MODE) &
call transfer_b_fields_att_to_device(Mesh_pointer, &
b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &
b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
- size(b_epsilondev_xx))
+ size(b_epsilondev_xx))
endif
endif
close(27)
-
+
end subroutine it_read_foward_arrays
!=====================================================================
@@ -520,6 +520,13 @@
read(27) b_displ
read(27) b_veloc
read(27) b_accel
+
+ ! puts elastic fields onto GPU
+ if(GPU_MODE) then
+ ! wavefields
+ call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
+ endif
+
read(27) b_R_xx
read(27) b_R_yy
read(27) b_R_xy
@@ -530,30 +537,28 @@
read(27) b_epsilondev_xy
read(27) b_epsilondev_xz
read(27) b_epsilondev_yz
-
+
! puts elastic fields onto GPU
if(GPU_MODE) then
- ! wavefields
- call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
! attenuation arrays
call transfer_b_fields_att_to_device(Mesh_pointer, &
b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &
b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
- size(b_epsilondev_xx))
- endif
+ size(b_epsilondev_xx))
+ endif
endif
-
+
if( ACOUSTIC_SIMULATION ) then
- ! reads arrays from disk files
+ ! reads arrays from disk files
read(27) b_potential_acoustic
read(27) b_potential_dot_acoustic
read(27) b_potential_dot_dot_acoustic
! puts acoustic fields onto GPU
if(GPU_MODE) &
- call transfer_b_fields_acoustic_to_device(NGLOB_AB,b_potential_acoustic, &
+ call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
-
+
endif
close(27)
else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. mod(it,NSTEP_Q_SAVE) == 0) then
@@ -564,20 +569,22 @@
if( ELASTIC_SIMULATION ) then
! gets elastic fields from GPU onto CPU
if(GPU_MODE) then
- call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ endif
+ ! writes to disk file
+ write(27) displ
+ write(27) veloc
+ write(27) accel
+
+ if(GPU_MODE) then
! attenuation arrays
call transfer_fields_att_from_device(Mesh_pointer, &
R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
- size(epsilondev_xx))
+ size(epsilondev_xx))
endif
-
- ! writes to disk file
- write(27) displ
- write(27) veloc
- write(27) accel
-
+
write(27) R_xx
write(27) R_yy
write(27) R_xy
@@ -592,10 +599,10 @@
if( ACOUSTIC_SIMULATION ) then
! gets acoustic fields from GPU onto CPU
if(GPU_MODE) &
- call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
- ! writes to disk file
+ ! writes to disk file
write(27) potential_acoustic
write(27) potential_dot_acoustic
write(27) potential_dot_dot_acoustic
@@ -606,7 +613,7 @@
end subroutine it_store_attenuation_arrays
-
+
!=====================================================================
subroutine it_transfer_from_GPU()
@@ -624,53 +631,61 @@
! acoustic potentials
if( ACOUSTIC_SIMULATION ) &
- call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
-
+ call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
! elastic wavefield
if( ELASTIC_SIMULATION ) then
- call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
if (ATTENUATION) &
call transfer_fields_att_from_device(Mesh_pointer, &
R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
size(epsilondev_xx))
-
+
endif
else if (SIMULATION_TYPE == 3) then
! to store kernels
-
+
if( ACOUSTIC_SIMULATION ) then
! only in case needed...
- !call transfer_b_fields_acoustic_from_device(NGLOB_AB,b_potential_acoustic, &
- ! b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
-
- ! acoustic kernels
- call transfer_sensitivity_kernels_acoustic_to_host(Mesh_pointer,rho_ac_kl,kappa_ac_kl,NSPEC_AB)
+ !call transfer_b_fields_ac_from_device(NGLOB_AB,b_potential_acoustic, &
+ ! b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+
+ ! acoustic kernels
+ call transfer_kernels_ac_to_host(Mesh_pointer,rho_ac_kl,kappa_ac_kl,NSPEC_AB)
endif
if( ELASTIC_SIMULATION ) then
! only in case needed...
!call transfer_b_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
-
+
! elastic kernels
- call transfer_sensitivity_kernels_to_host(Mesh_pointer,rho_kl,mu_kl,kappa_kl,NSPEC_AB)
+ call transfer_kernels_el_to_host(Mesh_pointer,rho_kl,mu_kl,kappa_kl,NSPEC_AB)
endif
! specific noise strength kernel
if( NOISE_TOMOGRAPHY == 3 ) then
- call transfer_sensitivity_kernels_to_host(Mesh_pointer,Sigma_kl,NSPEC_AB)
+ call transfer_kernels_noise_to_host(Mesh_pointer,Sigma_kl,NSPEC_AB)
endif
+ ! approximative hessian for preconditioning kernels
+ if ( APPROXIMATE_HESS_KL ) then
+ if( ELASTIC_SIMULATION ) call transfer_kernels_hess_el_tohost(Mesh_pointer,hess_kl,NSPEC_AB)
+ if( ACOUSTIC_SIMULATION ) call transfer_kernels_hess_ac_tohost(Mesh_pointer,hess_ac_kl,NSPEC_AB)
+ endif
+
endif
-
+
! frees allocated memory on GPU
call prepare_cleanup_device(Mesh_pointer, &
- SIMULATION_TYPE,ACOUSTIC_SIMULATION,ELASTIC_SIMULATION, &
+ SIMULATION_TYPE,SAVE_FORWARD, &
+ ACOUSTIC_SIMULATION,ELASTIC_SIMULATION, &
ABSORBING_CONDITIONS,NOISE_TOMOGRAPHY,COMPUTE_AND_STORE_STRAIN, &
- ATTENUATION)
-
+ ATTENUATION,OCEANS, &
+ APPROXIMATE_HESS_KL)
+
end subroutine it_transfer_from_GPU
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_receivers.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_receivers.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,1091 +1,1091 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!----
-!---- locate_receivers finds the correct position of the receivers
-!----
- subroutine locate_receivers(ibool,myrank,NSPEC_AB,NGLOB_AB, &
- xstore,ystore,zstore,xigll,yigll,zigll,rec_filename, &
- nrec,islice_selected_rec,ispec_selected_rec, &
- xi_receiver,eta_receiver,gamma_receiver,station_name,network_name,nu, &
- NPROC,utm_x_source,utm_y_source, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
- iglob_is_surface_external_mesh,ispec_is_surface_external_mesh, &
- num_free_surface_faces,free_surface_ispec,free_surface_ijk)
-
- implicit none
-
- include "constants.h"
-
- logical SUPPRESS_UTM_PROJECTION
-
- integer NPROC,UTM_PROJECTION_ZONE
-
- integer nrec,myrank
-
- integer NSPEC_AB,NGLOB_AB
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-
-! arrays containing coordinates of the points
- real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: xstore,ystore,zstore
-
-! for surface locating and normal computing with external mesh
- integer :: pt0_ix,pt0_iy,pt0_iz,pt1_ix,pt1_iy,pt1_iz,pt2_ix,pt2_iy,pt2_iz
- integer :: num_free_surface_faces
- real(kind=CUSTOM_REAL), dimension(3) :: u_vector,v_vector,w_vector
- logical, dimension(NGLOB_AB) :: iglob_is_surface_external_mesh
- logical, dimension(NSPEC_AB) :: ispec_is_surface_external_mesh
- integer, dimension(num_free_surface_faces) :: free_surface_ispec
- integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
-
- integer, allocatable, dimension(:) :: ix_initial_guess,iy_initial_guess,iz_initial_guess
-
- integer iprocloop
- integer ios
-
- double precision,dimension(1) :: altitude_rec,distmin_ele
- double precision,dimension(4) :: elevation_node,dist_node
- double precision,dimension(NPROC) :: distmin_ele_all,elevation_all
- double precision, allocatable, dimension(:) :: x_target,y_target,z_target
- double precision, allocatable, dimension(:) :: horiz_dist
- double precision, allocatable, dimension(:) :: x_found,y_found,z_found
-
- integer irec
- integer i,j,k,ispec,iglob,iface,inode,imin,imax,jmin,jmax,kmin,kmax,igll,jgll,kgll
- integer iselected,jselected,iface_selected,iadjust,jadjust
- integer iproc(1)
-
- double precision utm_x_source,utm_y_source
- double precision dist
- double precision xi,eta,gamma,dx,dy,dz,dxi,deta,dgamma
-
-! Gauss-Lobatto-Legendre points of integration
- double precision xigll(NGLLX)
- double precision yigll(NGLLY)
- double precision zigll(NGLLZ)
-
-! input receiver file name
- character(len=*) rec_filename
-
-! topology of the control points of the surface element
- integer iax,iay,iaz
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
-! coordinates of the control points of the surface element
- double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
-
- integer iter_loop,ispec_iterate
-
- integer ia
- double precision x,y,z
- double precision xix,xiy,xiz
- double precision etax,etay,etaz
- double precision gammax,gammay,gammaz
-
-! timer MPI
- double precision, external :: wtime
- double precision time_start,tCPU
-
-! use dynamic allocation
- double precision, dimension(:), allocatable :: final_distance
- double precision distmin,final_distance_max
-
-! receiver information
-! timing information for the stations
-! station information for writing the seismograms
-
- integer :: iglob_selected
- integer, dimension(nrec) :: islice_selected_rec,ispec_selected_rec
- double precision, dimension(nrec) :: xi_receiver,eta_receiver,gamma_receiver
- double precision, dimension(3,3,nrec) :: nu
- character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
- character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
- double precision, allocatable, dimension(:) :: stlat,stlon,stele,stbur,stutm_x,stutm_y,elevation
-
- double precision, allocatable, dimension(:) :: x_found_all,y_found_all,z_found_all
- double precision, dimension(:), allocatable :: final_distance_all
- integer, allocatable, dimension(:) :: ispec_selected_rec_all
- double precision, allocatable, dimension(:) :: xi_receiver_all,eta_receiver_all,gamma_receiver_all
- double precision, allocatable, dimension(:,:,:) :: nu_all
-
- integer :: ier
- character(len=256) OUTPUT_FILES
-
- real(kind=CUSTOM_REAL) :: xmin,xmax,ymin,ymax,zmin,zmax
- real(kind=CUSTOM_REAL) :: xmin_ELE,xmax_ELE,ymin_ELE,ymax_ELE,zmin_ELE,zmax_ELE
- integer :: imin_temp,imax_temp,jmin_temp,jmax_temp,kmin_temp,kmax_temp
- integer,dimension(NGLLX*NGLLY*NGLLZ) :: iglob_temp
-
-! **************
-
- ! dimension of model in current proc
- xmin=minval(xstore(:)); xmax=maxval(xstore(:))
- ymin=minval(ystore(:)); ymax=maxval(ystore(:))
- zmin=minval(zstore(:)); zmax=maxval(zstore(:))
- if(FASTER_RECEIVERS_POINTS_ONLY) then
- imin_temp = 1; imax_temp = NGLLX
- jmin_temp = 1; jmax_temp = NGLLY
- kmin_temp = 1; kmax_temp = NGLLZ
- else
- imin_temp = 2; imax_temp = NGLLX - 1
- jmin_temp = 2; jmax_temp = NGLLY - 1
- kmin_temp = 2; kmax_temp = NGLLZ - 1
- endif
-
- ! get MPI starting time
- time_start = wtime()
-
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) '********************'
- write(IMAIN,*) ' locating receivers'
- write(IMAIN,*) '********************'
- write(IMAIN,*)
- endif
-
- ! define topology of the control element
- call usual_hex_nodes(iaddx,iaddy,iaddz)
-
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) '*****************************************************************'
- write(IMAIN,'(1x,a,a,a)') 'reading receiver information from ', trim(rec_filename), ' file'
- write(IMAIN,*) '*****************************************************************'
- endif
-
- ! get number of stations from receiver file
- open(unit=1,file=trim(rec_filename),status='old',action='read',iostat=ios)
- if (ios /= 0) call exit_mpi(myrank,'error opening file '//trim(rec_filename))
-
- ! allocate memory for arrays using number of stations
- allocate(stlat(nrec), &
- stlon(nrec), &
- stele(nrec), &
- stbur(nrec), &
- stutm_x(nrec), &
- stutm_y(nrec), &
- horiz_dist(nrec), &
- elevation(nrec), &
- ix_initial_guess(nrec), &
- iy_initial_guess(nrec), &
- iz_initial_guess(nrec), &
- x_target(nrec), &
- y_target(nrec), &
- z_target(nrec), &
- x_found(nrec), &
- y_found(nrec), &
- z_found(nrec), &
- final_distance(nrec), &
- ispec_selected_rec_all(nrec), &
- xi_receiver_all(nrec), &
- eta_receiver_all(nrec), &
- gamma_receiver_all(nrec), &
- x_found_all(nrec), &
- y_found_all(nrec), &
- z_found_all(nrec), &
- final_distance_all(nrec), &
- nu_all(3,3,nrec),stat=ier)
- if( ier /= 0 ) stop 'error allocating arrays for locating receivers'
-
- ! loop on all the stations
- do irec=1,nrec
-
- read(1,*,iostat=ios) station_name(irec),network_name(irec),stlat(irec),stlon(irec),stele(irec),stbur(irec)
- if (ios /= 0) call exit_mpi(myrank, 'Error reading station file '//trim(rec_filename))
-
- ! convert station location to UTM
- call utm_geo(stlon(irec),stlat(irec),stutm_x(irec),stutm_y(irec),&
- UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
-
- ! compute horizontal distance between source and receiver in km
- horiz_dist(irec) = dsqrt((stutm_y(irec)-utm_y_source)**2 + (stutm_x(irec)-utm_x_source)**2) / 1000.
-
- ! print some information about stations
- if(myrank == 0) &
- write(IMAIN,*) 'Station #',irec,': ',station_name(irec)(1:len_trim(station_name(irec))), &
- '.',network_name(irec)(1:len_trim(network_name(irec))), &
- ' horizontal distance: ',sngl(horiz_dist(irec)),' km'
-
- ! get approximate topography elevation at source long/lat coordinates
- ! set distance to huge initial value
- distmin = HUGEVAL
- if(num_free_surface_faces > 0) then
- iglob_selected = 1
- ! loop only on points inside the element
- ! exclude edges to ensure this point is not shared with other elements
- imin = 2
- imax = NGLLX - 1
- jmin = 2
- jmax = NGLLY - 1
- iselected = 0
- jselected = 0
- iface_selected = 0
- do iface=1,num_free_surface_faces
- do j=jmin,jmax
- do i=imin,imax
-
- ispec = free_surface_ispec(iface)
- igll = free_surface_ijk(1,(j-1)*NGLLY+i,iface)
- jgll = free_surface_ijk(2,(j-1)*NGLLY+i,iface)
- kgll = free_surface_ijk(3,(j-1)*NGLLY+i,iface)
- iglob = ibool(igll,jgll,kgll,ispec)
-
- ! keep this point if it is closer to the receiver
- dist = dsqrt((stutm_x(irec)-dble(xstore(iglob)))**2 + &
- (stutm_y(irec)-dble(ystore(iglob)))**2)
- if(dist < distmin) then
- distmin = dist
- iglob_selected = iglob
- iface_selected = iface
- iselected = i
- jselected = j
- altitude_rec(1) = zstore(iglob_selected)
- endif
- enddo
- enddo
- ! end of loop on all the elements on the free surface
- end do
- ! weighted mean at current point of topography elevation of the four closest nodes
- ! set distance to huge initial value
- distmin = HUGEVAL
- do j=jselected,jselected+1
- do i=iselected,iselected+1
- inode = 1
- do jadjust=0,1
- do iadjust= 0,1
- ispec = free_surface_ispec(iface_selected)
- igll = free_surface_ijk(1,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
- jgll = free_surface_ijk(2,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
- kgll = free_surface_ijk(3,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
- iglob = ibool(igll,jgll,kgll,ispec)
-
- elevation_node(inode) = zstore(iglob)
- dist_node(inode) = dsqrt((stutm_x(irec)-dble(xstore(iglob)))**2 + &
- (stutm_y(irec)-dble(ystore(iglob)))**2)
- inode = inode + 1
- end do
- end do
- dist = sum(dist_node)
- if(dist < distmin) then
- distmin = dist
- altitude_rec(1) = (dist_node(1)/dist)*elevation_node(1) + &
- (dist_node(2)/dist)*elevation_node(2) + &
- (dist_node(3)/dist)*elevation_node(3) + &
- (dist_node(4)/dist)*elevation_node(4)
- endif
- end do
- end do
- end if
- ! MPI communications to determine the best slice
- distmin_ele(1)= distmin
- call gather_all_dp(distmin_ele,1,distmin_ele_all,1,NPROC)
- call gather_all_dp(altitude_rec,1,elevation_all,1,NPROC)
- if(myrank == 0) then
- iproc = minloc(distmin_ele_all)
- altitude_rec(1) = elevation_all(iproc(1))
- end if
- call bcast_all_dp(altitude_rec,1)
- elevation(irec) = altitude_rec(1)
-
- ! reset distance to huge initial value
- distmin=HUGEVAL
-
-! get the Cartesian components of n in the model: nu
-
- ! orientation consistent with the UTM projection
- ! X coordinate - East
- nu(1,1,irec) = 1.d0
- nu(1,2,irec) = 0.d0
- nu(1,3,irec) = 0.d0
- ! Y coordinate - North
- nu(2,1,irec) = 0.d0
- nu(2,2,irec) = 1.d0
- nu(2,3,irec) = 0.d0
- ! Z coordinate - Vertical
- nu(3,1,irec) = 0.d0
- nu(3,2,irec) = 0.d0
- nu(3,3,irec) = 1.d0
-
- x_target(irec) = stutm_x(irec)
- y_target(irec) = stutm_y(irec)
-
- ! receiver's Z coordinate
- if( USE_SOURCES_RECVS_Z ) then
- ! alternative: burial depth is given as z value directly
- z_target(irec) = stbur(irec)
- else
- ! burial depth in STATIONS file given in m
- z_target(irec) = elevation(irec) - stbur(irec)
- endif
- !if (myrank == 0) write(IOVTK,*) x_target(irec), y_target(irec), z_target(irec)
-
- if (.not. SU_FORMAT) then
- ! determines closest GLL point
- ispec_selected_rec(irec) = 0
- do ispec=1,NSPEC_AB
-
- ! define the interval in which we look for points
- if(FASTER_RECEIVERS_POINTS_ONLY) then
- imin = 1
- imax = NGLLX
-
- jmin = 1
- jmax = NGLLY
-
- kmin = 1
- kmax = NGLLZ
-
- else
- ! loop only on points inside the element
- ! exclude edges to ensure this point is not shared with other elements
- imin = 2
- imax = NGLLX - 1
-
- jmin = 2
- jmax = NGLLY - 1
-
- kmin = 2
- kmax = NGLLZ - 1
- endif
-
- do k = kmin,kmax
- do j = jmin,jmax
- do i = imin,imax
-
- iglob = ibool(i,j,k,ispec)
-
- if (.not. RECVS_CAN_BE_BURIED_EXT_MESH) then
- if ((.not. iglob_is_surface_external_mesh(iglob)) .or. (.not. ispec_is_surface_external_mesh(ispec))) then
- cycle
- endif
- endif
-
- dist = dsqrt((x_target(irec)-dble(xstore(iglob)))**2 &
- +(y_target(irec)-dble(ystore(iglob)))**2 &
- +(z_target(irec)-dble(zstore(iglob)))**2)
-
- ! keep this point if it is closer to the receiver
- if(dist < distmin) then
- distmin = dist
- ispec_selected_rec(irec) = ispec
- ix_initial_guess(irec) = i
- iy_initial_guess(irec) = j
- iz_initial_guess(irec) = k
-
- xi_receiver(irec) = dble(ix_initial_guess(irec))
- eta_receiver(irec) = dble(iy_initial_guess(irec))
- gamma_receiver(irec) = dble(iz_initial_guess(irec))
- x_found(irec) = xstore(iglob)
- y_found(irec) = ystore(iglob)
- z_found(irec) = zstore(iglob)
- endif
-
- enddo
- enddo
- enddo
-
- ! compute final distance between asked and found (converted to km)
- final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
- (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
-
- ! end of loop on all the spectral elements in current slice
- enddo
- else
- ispec_selected_rec(irec) = 0
- ix_initial_guess(irec) = 0
- iy_initial_guess(irec) = 0
- iz_initial_guess(irec) = 0
- final_distance(irec) = HUGEVAL
- if ( (x_target(irec)>=xmin .and. x_target(irec)<=xmax) .and. &
- (y_target(irec)>=ymin .and. y_target(irec)<=ymax) .and. &
- (z_target(irec)>=zmin .and. z_target(irec)<=zmax) ) then
- do ispec=1,NSPEC_AB
- iglob_temp=reshape(ibool(:,:,:,ispec),(/NGLLX*NGLLY*NGLLZ/))
- xmin_ELE=minval(xstore(iglob_temp))
- xmax_ELE=maxval(xstore(iglob_temp))
- ymin_ELE=minval(ystore(iglob_temp))
- ymax_ELE=maxval(ystore(iglob_temp))
- zmin_ELE=minval(zstore(iglob_temp))
- zmax_ELE=maxval(zstore(iglob_temp))
- if ( (x_target(irec)>=xmin_ELE .and. x_target(irec)<=xmax_ELE) .and. &
- (y_target(irec)>=ymin_ELE .and. y_target(irec)<=ymax_ELE) .and. &
- (z_target(irec)>=zmin_ELE .and. z_target(irec)<=zmax_ELE) ) then
- ! we find the element (ispec) which "may" contain the receiver (irec)
- ! so we only need to compute distances (which is expensive because of "dsqrt") within those elements
- ispec_selected_rec(irec) = ispec
- do k = kmin_temp,kmax_temp
- do j = jmin_temp,jmax_temp
- do i = imin_temp,imax_temp
- iglob = ibool(i,j,k,ispec)
- ! for comparison purpose, we don't have to do "dsqrt", which is expensive
- dist = ((x_target(irec)-dble(xstore(iglob)))**2 &
- +(y_target(irec)-dble(ystore(iglob)))**2 &
- +(z_target(irec)-dble(zstore(iglob)))**2)
- if(dist < distmin) then
- distmin = dist
- ix_initial_guess(irec) = i
- iy_initial_guess(irec) = j
- iz_initial_guess(irec) = k
- xi_receiver(irec) = dble(ix_initial_guess(irec))
- eta_receiver(irec) = dble(iy_initial_guess(irec))
- gamma_receiver(irec) = dble(iz_initial_guess(irec))
- x_found(irec) = xstore(iglob)
- y_found(irec) = ystore(iglob)
- z_found(irec) = zstore(iglob)
- endif
- enddo
- enddo
- enddo
- final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
- (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
- endif ! if receiver "may" be within this element
- enddo ! do ispec=1,NSPEC_AB
- endif ! if receiver "may" be within this proc
- endif !if (.not. SU_FORMAT)
-
- if (ispec_selected_rec(irec) == 0) then
- ! receiver is NOT within this proc, assign trivial values
- ispec_selected_rec(irec) = 1
- ix_initial_guess(irec) = 1
- iy_initial_guess(irec) = 1
- iz_initial_guess(irec) = 1
- final_distance(irec) = HUGEVAL
- endif
-
- ! get normal to the face of the hexaedra if receiver is on the surface
- if ((.not. RECVS_CAN_BE_BURIED_EXT_MESH) .and. &
- .not. (ispec_selected_rec(irec) == 0)) then
- pt0_ix = -1
- pt0_iy = -1
- pt0_iz = -1
- pt1_ix = -1
- pt1_iy = -1
- pt1_iz = -1
- pt2_ix = -1
- pt2_iy = -1
- pt2_iz = -1
- ! we get two vectors of the face (three points) to compute the normal
- if (ix_initial_guess(irec) == 1 .and. &
- iglob_is_surface_external_mesh(ibool(1,2,2,ispec_selected_rec(irec)))) then
- pt0_ix = 1
- pt0_iy = NGLLY
- pt0_iz = 1
- pt1_ix = 1
- pt1_iy = 1
- pt1_iz = 1
- pt2_ix = 1
- pt2_iy = NGLLY
- pt2_iz = NGLLZ
- endif
- if (ix_initial_guess(irec) == NGLLX .and. &
- iglob_is_surface_external_mesh(ibool(NGLLX,2,2,ispec_selected_rec(irec)))) then
- pt0_ix = NGLLX
- pt0_iy = 1
- pt0_iz = 1
- pt1_ix = NGLLX
- pt1_iy = NGLLY
- pt1_iz = 1
- pt2_ix = NGLLX
- pt2_iy = 1
- pt2_iz = NGLLZ
- endif
- if (iy_initial_guess(irec) == 1 .and. &
- iglob_is_surface_external_mesh(ibool(2,1,2,ispec_selected_rec(irec)))) then
- pt0_ix = 1
- pt0_iy = 1
- pt0_iz = 1
- pt1_ix = NGLLX
- pt1_iy = 1
- pt1_iz = 1
- pt2_ix = 1
- pt2_iy = 1
- pt2_iz = NGLLZ
- endif
- if (iy_initial_guess(irec) == NGLLY .and. &
- iglob_is_surface_external_mesh(ibool(2,NGLLY,2,ispec_selected_rec(irec)))) then
- pt0_ix = NGLLX
- pt0_iy = NGLLY
- pt0_iz = 1
- pt1_ix = 1
- pt1_iy = NGLLY
- pt1_iz = 1
- pt2_ix = NGLLX
- pt2_iy = NGLLY
- pt2_iz = NGLLZ
- endif
- if (iz_initial_guess(irec) == 1 .and. &
- iglob_is_surface_external_mesh(ibool(2,2,1,ispec_selected_rec(irec)))) then
- pt0_ix = NGLLX
- pt0_iy = 1
- pt0_iz = 1
- pt1_ix = 1
- pt1_iy = 1
- pt1_iz = 1
- pt2_ix = NGLLX
- pt2_iy = NGLLY
- pt2_iz = 1
- endif
- if (iz_initial_guess(irec) == NGLLZ .and. &
- iglob_is_surface_external_mesh(ibool(2,2,NGLLZ,ispec_selected_rec(irec)))) then
- pt0_ix = 1
- pt0_iy = 1
- pt0_iz = NGLLZ
- pt1_ix = NGLLX
- pt1_iy = 1
- pt1_iz = NGLLZ
- pt2_ix = 1
- pt2_iy = NGLLY
- pt2_iz = NGLLZ
- endif
-
- if (pt0_ix<0 .or.pt0_iy<0 .or. pt0_iz<0 .or. &
- pt1_ix<0 .or. pt1_iy<0 .or. pt1_iz<0 .or. &
- pt2_ix<0 .or. pt2_iy<0 .or. pt2_iz<0) then
- stop 'error in computing normal for receivers.'
- endif
-
- u_vector(1) = xstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
- - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
- u_vector(2) = ystore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
- - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
- u_vector(3) = zstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
- - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
- v_vector(1) = xstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
- - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
- v_vector(2) = ystore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
- - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
- v_vector(3) = zstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
- - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
-
- ! cross product
- w_vector(1) = u_vector(2)*v_vector(3) - u_vector(3)*v_vector(2)
- w_vector(2) = u_vector(3)*v_vector(1) - u_vector(1)*v_vector(3)
- w_vector(3) = u_vector(1)*v_vector(2) - u_vector(2)*v_vector(1)
-
- ! normalize vector w
- w_vector(:) = w_vector(:)/sqrt(w_vector(1)**2+w_vector(2)**2+w_vector(3)**2)
-
- ! build the two other vectors for a direct base: we normalize u, and v=w^u
- u_vector(:) = u_vector(:)/sqrt(u_vector(1)**2+u_vector(2)**2+u_vector(3)**2)
- v_vector(1) = w_vector(2)*u_vector(3) - w_vector(3)*u_vector(2)
- v_vector(2) = w_vector(3)*u_vector(1) - w_vector(1)*u_vector(3)
- v_vector(3) = w_vector(1)*u_vector(2) - w_vector(2)*u_vector(1)
-
- ! build rotation matrice nu for seismograms
- if (EXT_MESH_RECV_NORMAL) then
- ! East (u)
- nu(1,1,irec) = u_vector(1)
- nu(1,2,irec) = v_vector(1)
- nu(1,3,irec) = w_vector(1)
-
- ! North (v)
- nu(2,1,irec) = u_vector(2)
- nu(2,2,irec) = v_vector(2)
- nu(2,3,irec) = w_vector(2)
-
- ! Vertical (w)
- nu(3,1,irec) = u_vector(3)
- nu(3,2,irec) = v_vector(3)
- nu(3,3,irec) = w_vector(3)
- else
- ! East
- nu(1,1,irec) = 1.d0
- nu(1,2,irec) = 0.d0
- nu(1,3,irec) = 0.d0
-
- ! North
- nu(2,1,irec) = 0.d0
- nu(2,2,irec) = 1.d0
- nu(2,3,irec) = 0.d0
-
- ! Vertical
- nu(3,1,irec) = 0.d0
- nu(3,2,irec) = 0.d0
- nu(3,3,irec) = 1.d0
- endif
-
- endif ! of if (.not. RECVS_CAN_BE_BURIED_EXT_MESH)
-
- ! end of loop on all the stations
- enddo
-
- ! close receiver file
- close(1)
-
-! ****************************************
-! find the best (xi,eta,gamma) for each receiver
-! ****************************************
-
- if(.not. FASTER_RECEIVERS_POINTS_ONLY) then
-
- ! loop on all the receivers to iterate in that slice
- do irec = 1,nrec
-
- ispec_iterate = ispec_selected_rec(irec)
-
- ! use initial guess in xi and eta
- xi = xigll(ix_initial_guess(irec))
- eta = yigll(iy_initial_guess(irec))
- gamma = zigll(iz_initial_guess(irec))
-
- ! define coordinates of the control points of the element
- do ia=1,NGNOD
- iax = 0
- iay = 0
- iaz = 0
- if(iaddx(ia) == 0) then
- iax = 1
- else if(iaddx(ia) == 1) then
- iax = (NGLLX+1)/2
- else if(iaddx(ia) == 2) then
- iax = NGLLX
- else
- call exit_MPI(myrank,'incorrect value of iaddx')
- endif
-
- if(iaddy(ia) == 0) then
- iay = 1
- else if(iaddy(ia) == 1) then
- iay = (NGLLY+1)/2
- else if(iaddy(ia) == 2) then
- iay = NGLLY
- else
- call exit_MPI(myrank,'incorrect value of iaddy')
- endif
-
- if(iaddz(ia) == 0) then
- iaz = 1
- else if(iaddz(ia) == 1) then
- iaz = (NGLLZ+1)/2
- else if(iaddz(ia) == 2) then
- iaz = NGLLZ
- else
- call exit_MPI(myrank,'incorrect value of iaddz')
- endif
-
- iglob = ibool(iax,iay,iaz,ispec_iterate)
- xelm(ia) = dble(xstore(iglob))
- yelm(ia) = dble(ystore(iglob))
- zelm(ia) = dble(zstore(iglob))
-
- enddo
-
- ! iterate to solve the non linear system
- do iter_loop = 1,NUM_ITER
-
- ! impose receiver exactly at the surface
- ! gamma = 1.d0
-
- ! recompute jacobian for the new point
- call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
-
- ! compute distance to target location
- dx = - (x - x_target(irec))
- dy = - (y - y_target(irec))
- dz = - (z - z_target(irec))
-
- ! compute increments
- ! gamma does not change since we know the receiver is exactly on the surface
- dxi = xix*dx + xiy*dy + xiz*dz
- deta = etax*dx + etay*dy + etaz*dz
- dgamma = gammax*dx + gammay*dy + gammaz*dz
-
- ! update values
- xi = xi + dxi
- eta = eta + deta
- gamma = gamma + dgamma
-
- ! impose that we stay in that element
- ! (useful if user gives a receiver outside the mesh for instance)
- ! we can go slightly outside the [1,1] segment since with finite elements
- ! the polynomial solution is defined everywhere
- ! this can be useful for convergence of itertive scheme with distorted elements
- if (xi > 1.10d0) xi = 1.10d0
- if (xi < -1.10d0) xi = -1.10d0
- if (eta > 1.10d0) eta = 1.10d0
- if (eta < -1.10d0) eta = -1.10d0
- if (gamma > 1.10d0) gamma = 1.10d0
- if (gamma < -1.10d0) gamma = -1.10d0
-
- ! end of non linear iterations
- enddo
-
- ! impose receiver exactly at the surface after final iteration
- ! gamma = 1.d0
-
- ! compute final coordinates of point found
- call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
-
- ! store xi,eta and x,y,z of point found
- xi_receiver(irec) = xi
- eta_receiver(irec) = eta
- gamma_receiver(irec) = gamma
- x_found(irec) = x
- y_found(irec) = y
- z_found(irec) = z
-
- ! compute final distance between asked and found (converted to km)
- final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
- (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
-
- enddo
-
- endif ! of if (.not. FASTER_RECEIVERS_POINTS_ONLY)
-
- ! synchronize all the processes to make sure all the estimates are available
- call sync_all()
- ! for MPI version, gather information from all the nodes
- if (myrank/=0) then ! gather information from other processors (one at a time)
- call send_i(ispec_selected_rec, nrec,0,0)
- call send_dp(xi_receiver, nrec,0,1)
- call send_dp(eta_receiver, nrec,0,2)
- call send_dp(gamma_receiver, nrec,0,3)
- call send_dp(final_distance, nrec,0,4)
- call send_dp(x_found, nrec,0,5)
- call send_dp(y_found, nrec,0,6)
- call send_dp(z_found, nrec,0,7)
- call send_dp(nu, 3*3*nrec,0,8)
- else
- islice_selected_rec(:) = 0
- do iprocloop=1,NPROC-1
- call recv_i(ispec_selected_rec_all, nrec,iprocloop,0)
- call recv_dp(xi_receiver_all, nrec,iprocloop,1)
- call recv_dp(eta_receiver_all, nrec,iprocloop,2)
- call recv_dp(gamma_receiver_all, nrec,iprocloop,3)
- call recv_dp(final_distance_all, nrec,iprocloop,4)
- call recv_dp(x_found_all, nrec,iprocloop,5)
- call recv_dp(y_found_all, nrec,iprocloop,6)
- call recv_dp(z_found_all, nrec,iprocloop,7)
- call recv_dp(nu_all, 3*3*nrec,iprocloop,8)
- do irec=1,nrec
- if (final_distance_all(irec) < final_distance(irec)) then
- final_distance(irec) = final_distance_all(irec)
- islice_selected_rec(irec) = iprocloop
- ispec_selected_rec(irec) = ispec_selected_rec_all(irec)
- xi_receiver(irec) = xi_receiver_all(irec)
- eta_receiver(irec) = eta_receiver_all(irec)
- gamma_receiver(irec) = gamma_receiver_all(irec)
- x_found(irec) = x_found_all(irec)
- y_found(irec) = y_found_all(irec)
- z_found(irec) = z_found_all(irec)
- nu(:,:,irec) = nu_all(:,:,irec)
- endif
- enddo
- enddo
- endif
- call sync_all()
-
- ! this is executed by main process only
- if(myrank == 0) then
-
- do irec=1,nrec
-
- write(IMAIN,*)
- write(IMAIN,*) 'station # ',irec,' ',station_name(irec),network_name(irec)
-
- if(final_distance(irec) == HUGEVAL) call exit_MPI(myrank,'error locating receiver')
-
- write(IMAIN,*) ' original latitude: ',sngl(stlat(irec))
- write(IMAIN,*) ' original longitude: ',sngl(stlon(irec))
- if( SUPPRESS_UTM_PROJECTION ) then
- write(IMAIN,*) ' original x: ',sngl(stutm_x(irec))
- write(IMAIN,*) ' original y: ',sngl(stutm_y(irec))
- else
- write(IMAIN,*) ' original UTM x: ',sngl(stutm_x(irec))
- write(IMAIN,*) ' original UTM y: ',sngl(stutm_y(irec))
- endif
- if( USE_SOURCES_RECVS_Z ) then
- write(IMAIN,*) ' original z: ',sngl(stbur(irec))
- else
- write(IMAIN,*) ' original depth: ',sngl(stbur(irec)),' m'
- endif
- write(IMAIN,*) ' horizontal distance: ',sngl(horiz_dist(irec))
- write(IMAIN,*) ' target x, y, z: ',sngl(x_target(irec)),sngl(y_target(irec)),sngl(z_target(irec))
-
- write(IMAIN,*) ' closest estimate found: ',sngl(final_distance(irec)),' m away'
- write(IMAIN,*) ' in slice ',islice_selected_rec(irec),' in element ',ispec_selected_rec(irec)
- if(FASTER_RECEIVERS_POINTS_ONLY) then
- write(IMAIN,*) ' in point i,j,k = ',nint(xi_receiver(irec)),nint(eta_receiver(irec)),nint(gamma_receiver(irec))
- write(IMAIN,*) ' nu1 = ',nu(1,:,irec)
- write(IMAIN,*) ' nu2 = ',nu(2,:,irec)
- write(IMAIN,*) ' nu3 = ',nu(3,:,irec)
- else
- write(IMAIN,*) ' at coordinates: '
- write(IMAIN,*) ' xi = ',xi_receiver(irec)
- write(IMAIN,*) ' eta = ',eta_receiver(irec)
- write(IMAIN,*) ' gamma = ',gamma_receiver(irec)
- endif
- if( SUPPRESS_UTM_PROJECTION ) then
- write(IMAIN,*) ' x: ',x_found(irec)
- write(IMAIN,*) ' y: ',y_found(irec)
- else
- write(IMAIN,*) ' UTM x: ',x_found(irec)
- write(IMAIN,*) ' UTM y: ',y_found(irec)
- endif
- if( USE_SOURCES_RECVS_Z ) then
- write(IMAIN,*) ' z: ',z_found(irec)
- else
- write(IMAIN,*) ' depth: ',dabs(z_found(irec) - elevation(irec)),' m'
- write(IMAIN,*) ' z: ',z_found(irec)
- endif
- write(IMAIN,*)
-
-
- ! add warning if estimate is poor
- ! (usually means receiver outside the mesh given by the user)
- if(final_distance(irec) > 3000.d0) then
- write(IMAIN,*) '*******************************************************'
- write(IMAIN,*) '***** WARNING: receiver location estimate is poor *****'
- write(IMAIN,*) '*******************************************************'
- endif
-
- write(IMAIN,*)
-
- enddo
-
- ! compute maximal distance for all the receivers
- final_distance_max = maxval(final_distance(:))
-
- ! display maximum error for all the receivers
- write(IMAIN,*) 'maximum error in location of all the receivers: ',sngl(final_distance_max),' m'
-
- ! add warning if estimate is poor
- ! (usually means receiver outside the mesh given by the user)
- if(final_distance_max > 1000.d0) then
- write(IMAIN,*)
- write(IMAIN,*) '************************************************************'
- write(IMAIN,*) '************************************************************'
- write(IMAIN,*) '***** WARNING: at least one receiver is poorly located *****'
- write(IMAIN,*) '************************************************************'
- write(IMAIN,*) '************************************************************'
- endif
-
- ! get the base pathname for output files
- call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
-
- !! write the list of stations and associated epicentral distance
- !open(unit=27,file=trim(OUTPUT_FILES)//'/output_list_stations.txt',status='unknown')
- !do irec=1,nrec
- ! write(27,*) station_name(irec),'.',network_name(irec),' : ',horiz_dist(irec),' km horizontal distance'
- !enddo
- !close(27)
-
- ! write the locations of stations, so that we can load them and write them to SU headers later
- open(unit=IOUT_SU,file=trim(OUTPUT_FILES)//'/output_list_stations.txt',status='unknown')
- do irec=1,nrec
- write(IOUT_SU,*) x_found(irec),y_found(irec),z_found(irec)
- enddo
- close(IOUT_SU)
-
- ! elapsed time since beginning of mesh generation
- tCPU = wtime() - time_start
- write(IMAIN,*)
- write(IMAIN,*) 'Elapsed time for receiver detection in seconds = ',tCPU
- write(IMAIN,*)
- write(IMAIN,*) 'End of receiver detection - done'
- write(IMAIN,*)
-
- endif ! end of section executed by main process only
-
- ! main process broadcasts the results to all the slices
- call bcast_all_i(islice_selected_rec,nrec)
- call bcast_all_i(ispec_selected_rec,nrec)
- call bcast_all_dp(xi_receiver,nrec)
- call bcast_all_dp(eta_receiver,nrec)
- call bcast_all_dp(gamma_receiver,nrec)
- ! synchronize all the processes to make sure everybody has finished
- call sync_all()
-
- ! deallocate arrays
- deallocate(stlat)
- deallocate(stlon)
- deallocate(stele)
- deallocate(stbur)
- deallocate(stutm_x)
- deallocate(stutm_y)
- deallocate(horiz_dist)
- deallocate(ix_initial_guess)
- deallocate(iy_initial_guess)
- deallocate(iz_initial_guess)
- deallocate(x_target)
- deallocate(y_target)
- deallocate(z_target)
- deallocate(x_found)
- deallocate(y_found)
- deallocate(z_found)
- deallocate(final_distance)
- deallocate(ispec_selected_rec_all)
- deallocate(xi_receiver_all)
- deallocate(eta_receiver_all)
- deallocate(gamma_receiver_all)
- deallocate(x_found_all)
- deallocate(y_found_all)
- deallocate(z_found_all)
- deallocate(final_distance_all)
-
- end subroutine locate_receivers
-
-!=====================================================================
-
-
- subroutine station_filter(SUPPRESS_UTM_PROJECTION,UTM_PROJECTION_ZONE,myrank,filename,filtered_filename,nfilter, &
- LATITUDE_MIN, LATITUDE_MAX, LONGITUDE_MIN, LONGITUDE_MAX)
-
- implicit none
-
- include 'constants.h'
-
-! input
- logical :: SUPPRESS_UTM_PROJECTION
- integer :: UTM_PROJECTION_ZONE
- integer :: myrank
- character(len=*) :: filename,filtered_filename
- double precision :: LATITUDE_MIN,LATITUDE_MAX,LONGITUDE_MIN,LONGITUDE_MAX
-
-! output
- integer :: nfilter
-
- integer :: nrec, nrec_filtered, ios
-
- double precision :: stlat,stlon,stele,stbur,stutm_x,stutm_y
- double precision :: minlat,minlon,maxlat,maxlon
- character(len=MAX_LENGTH_STATION_NAME) :: station_name
- character(len=MAX_LENGTH_NETWORK_NAME) :: network_name
- character(len=256) :: dummystring
-
- nrec = 0
- nrec_filtered = 0
-
- ! counts number of lines in stations file
- open(unit=IIN, file=trim(filename), status = 'old', iostat = ios)
- if (ios /= 0) call exit_mpi(myrank, 'No file '//trim(filename)//', exit')
- do while(ios == 0)
- read(IIN,"(a256)",iostat = ios) dummystring
- if(ios /= 0) exit
-
- if( len_trim(dummystring) > 0 ) nrec = nrec + 1
- enddo
- close(IIN)
-
- ! reads in station locations
- open(unit=IIN, file=trim(filename), status = 'old', iostat = ios)
- !do irec = 1,nrec
- ! read(IIN,*) station_name,network_name,stlat,stlon,stele,stbur
- do while(ios == 0)
- read(IIN,"(a256)",iostat = ios) dummystring
- if( ios /= 0 ) exit
-
- ! counts number of stations in min/max region
- if( len_trim(dummystring) > 0 ) then
- dummystring = trim(dummystring)
- read(dummystring, *) station_name, network_name, stlat, stlon, stele, stbur
-
- ! convert station location to UTM
- call utm_geo(stlon,stlat,stutm_x,stutm_y,&
- UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
-
- ! counts stations within lon/lat region
- if( stutm_y >= LATITUDE_MIN .and. stutm_y <= LATITUDE_MAX .and. &
- stutm_x >= LONGITUDE_MIN .and. stutm_x <= LONGITUDE_MAX) &
- nrec_filtered = nrec_filtered + 1
- endif
- enddo
- close(IIN)
-
- ! writes out filtered stations file
- if (myrank == 0) then
- open(unit=IIN,file=trim(filename),status='old',action='read',iostat=ios)
- open(unit=IOUT,file=trim(filtered_filename),status='unknown')
- do while(ios == 0)
- read(IIN,"(a256)",iostat = ios) dummystring
- if( ios /= 0 ) exit
-
- !read(IIN,*) station_name,network_name,stlat,stlon,stele,stbur
- if( len_trim(dummystring) > 0 ) then
- dummystring = trim(dummystring)
- read(dummystring, *) station_name, network_name, stlat, stlon, stele, stbur
-
- ! convert station location to UTM
- call utm_geo(stlon,stlat,stutm_x,stutm_y,&
- UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
-
- if( stutm_y >= LATITUDE_MIN .and. stutm_y <= LATITUDE_MAX .and. &
- stutm_x >= LONGITUDE_MIN .and. stutm_x <= LONGITUDE_MAX) then
-
- ! w/out formating
- ! write(IOUT,*) trim(station_name),' ',trim(network_name),' ',sngl(stlat), &
- ! ' ',sngl(stlon), ' ',sngl(stele), ' ',sngl(stbur)
-
- ! w/ specific format
- write(IOUT,'(a10,1x,a10,4e18.6)') &
- trim(station_name),trim(network_name), &
- sngl(stlat),sngl(stlon),sngl(stele),sngl(stbur)
-
- endif
- end if
- enddo
- close(IIN)
- close(IOUT)
-
- write(IMAIN,*)
- write(IMAIN,*) 'there are ',nrec,' stations in file ', trim(filename)
- write(IMAIN,*) 'saving ',nrec_filtered,' stations inside the model in file ', trim(filtered_filename)
- write(IMAIN,*) 'excluding ',nrec - nrec_filtered,' stations located outside the model'
- write(IMAIN,*)
-
- if( nrec_filtered < 1 ) then
- write(IMAIN,*) 'error filtered stations:'
- write(IMAIN,*) ' simulation needs at least 1 station but got ',nrec_filtered
- write(IMAIN,*)
- write(IMAIN,*) ' check that stations in file '//trim(filename)//' are within'
-
- if( SUPPRESS_UTM_PROJECTION ) then
- write(IMAIN,*) ' latitude min/max : ',LATITUDE_MIN,LATITUDE_MAX
- write(IMAIN,*) ' longitude min/max: ',LONGITUDE_MIN,LONGITUDE_MAX
- else
- ! convert edge locations from UTM back to lat/lon
- call utm_geo(minlon,minlat,LONGITUDE_MIN,LATITUDE_MIN,&
- UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
- call utm_geo(maxlon,maxlat,LONGITUDE_MAX,LATITUDE_MAX,&
- UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
- write(IMAIN,*) ' longitude min/max: ',minlon,maxlon
- write(IMAIN,*) ' latitude min/max : ',minlat,maxlat
- write(IMAIN,*) ' UTM x min/max: ',LONGITUDE_MIN,LONGITUDE_MAX
- write(IMAIN,*) ' UTM y min/max : ',LATITUDE_MIN,LATITUDE_MAX
- endif
-
- write(IMAIN,*)
- endif
-
- endif
-
- nfilter = nrec_filtered
-
- end subroutine station_filter
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!----
+!---- locate_receivers finds the correct position of the receivers
+!----
+ subroutine locate_receivers(ibool,myrank,NSPEC_AB,NGLOB_AB, &
+ xstore,ystore,zstore,xigll,yigll,zigll,rec_filename, &
+ nrec,islice_selected_rec,ispec_selected_rec, &
+ xi_receiver,eta_receiver,gamma_receiver,station_name,network_name,nu, &
+ NPROC,utm_x_source,utm_y_source, &
+ UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+ iglob_is_surface_external_mesh,ispec_is_surface_external_mesh, &
+ num_free_surface_faces,free_surface_ispec,free_surface_ijk)
+
+ implicit none
+
+ include "constants.h"
+
+ logical SUPPRESS_UTM_PROJECTION
+
+ integer NPROC,UTM_PROJECTION_ZONE
+
+ integer nrec,myrank
+
+ integer NSPEC_AB,NGLOB_AB
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+
+! arrays containing coordinates of the points
+ real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: xstore,ystore,zstore
+
+! for surface locating and normal computing with external mesh
+ integer :: pt0_ix,pt0_iy,pt0_iz,pt1_ix,pt1_iy,pt1_iz,pt2_ix,pt2_iy,pt2_iz
+ integer :: num_free_surface_faces
+ real(kind=CUSTOM_REAL), dimension(3) :: u_vector,v_vector,w_vector
+ logical, dimension(NGLOB_AB) :: iglob_is_surface_external_mesh
+ logical, dimension(NSPEC_AB) :: ispec_is_surface_external_mesh
+ integer, dimension(num_free_surface_faces) :: free_surface_ispec
+ integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
+
+ integer, allocatable, dimension(:) :: ix_initial_guess,iy_initial_guess,iz_initial_guess
+
+ integer iprocloop
+ integer ios
+
+ double precision,dimension(1) :: altitude_rec,distmin_ele
+ double precision,dimension(4) :: elevation_node,dist_node
+ double precision,dimension(NPROC) :: distmin_ele_all,elevation_all
+ double precision, allocatable, dimension(:) :: x_target,y_target,z_target
+ double precision, allocatable, dimension(:) :: horiz_dist
+ double precision, allocatable, dimension(:) :: x_found,y_found,z_found
+
+ integer irec
+ integer i,j,k,ispec,iglob,iface,inode,imin,imax,jmin,jmax,kmin,kmax,igll,jgll,kgll
+ integer iselected,jselected,iface_selected,iadjust,jadjust
+ integer iproc(1)
+
+ double precision utm_x_source,utm_y_source
+ double precision dist
+ double precision xi,eta,gamma,dx,dy,dz,dxi,deta,dgamma
+
+! Gauss-Lobatto-Legendre points of integration
+ double precision xigll(NGLLX)
+ double precision yigll(NGLLY)
+ double precision zigll(NGLLZ)
+
+! input receiver file name
+ character(len=*) rec_filename
+
+! topology of the control points of the surface element
+ integer iax,iay,iaz
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+! coordinates of the control points of the surface element
+ double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
+
+ integer iter_loop,ispec_iterate
+
+ integer ia
+ double precision x,y,z
+ double precision xix,xiy,xiz
+ double precision etax,etay,etaz
+ double precision gammax,gammay,gammaz
+
+! timer MPI
+ double precision, external :: wtime
+ double precision time_start,tCPU
+
+! use dynamic allocation
+ double precision, dimension(:), allocatable :: final_distance
+ double precision distmin,final_distance_max
+
+! receiver information
+! timing information for the stations
+! station information for writing the seismograms
+
+ integer :: iglob_selected
+ integer, dimension(nrec) :: islice_selected_rec,ispec_selected_rec
+ double precision, dimension(nrec) :: xi_receiver,eta_receiver,gamma_receiver
+ double precision, dimension(3,3,nrec) :: nu
+ character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
+ character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
+ double precision, allocatable, dimension(:) :: stlat,stlon,stele,stbur,stutm_x,stutm_y,elevation
+
+ double precision, allocatable, dimension(:) :: x_found_all,y_found_all,z_found_all
+ double precision, dimension(:), allocatable :: final_distance_all
+ integer, allocatable, dimension(:) :: ispec_selected_rec_all
+ double precision, allocatable, dimension(:) :: xi_receiver_all,eta_receiver_all,gamma_receiver_all
+ double precision, allocatable, dimension(:,:,:) :: nu_all
+
+ integer :: ier
+ character(len=256) OUTPUT_FILES
+
+ real(kind=CUSTOM_REAL) :: xmin,xmax,ymin,ymax,zmin,zmax
+ real(kind=CUSTOM_REAL) :: xmin_ELE,xmax_ELE,ymin_ELE,ymax_ELE,zmin_ELE,zmax_ELE
+ integer :: imin_temp,imax_temp,jmin_temp,jmax_temp,kmin_temp,kmax_temp
+ integer,dimension(NGLLX*NGLLY*NGLLZ) :: iglob_temp
+
+! **************
+
+ ! dimension of model in current proc
+ xmin=minval(xstore(:)); xmax=maxval(xstore(:))
+ ymin=minval(ystore(:)); ymax=maxval(ystore(:))
+ zmin=minval(zstore(:)); zmax=maxval(zstore(:))
+ if(FASTER_RECEIVERS_POINTS_ONLY) then
+ imin_temp = 1; imax_temp = NGLLX
+ jmin_temp = 1; jmax_temp = NGLLY
+ kmin_temp = 1; kmax_temp = NGLLZ
+ else
+ imin_temp = 2; imax_temp = NGLLX - 1
+ jmin_temp = 2; jmax_temp = NGLLY - 1
+ kmin_temp = 2; kmax_temp = NGLLZ - 1
+ endif
+
+ ! get MPI starting time
+ time_start = wtime()
+
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) '********************'
+ write(IMAIN,*) ' locating receivers'
+ write(IMAIN,*) '********************'
+ write(IMAIN,*)
+ endif
+
+ ! define topology of the control element
+ call usual_hex_nodes(iaddx,iaddy,iaddz)
+
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) '*****************************************************************'
+ write(IMAIN,'(1x,a,a,a)') 'reading receiver information from ', trim(rec_filename), ' file'
+ write(IMAIN,*) '*****************************************************************'
+ endif
+
+ ! get number of stations from receiver file
+ open(unit=1,file=trim(rec_filename),status='old',action='read',iostat=ios)
+ if (ios /= 0) call exit_mpi(myrank,'error opening file '//trim(rec_filename))
+
+ ! allocate memory for arrays using number of stations
+ allocate(stlat(nrec), &
+ stlon(nrec), &
+ stele(nrec), &
+ stbur(nrec), &
+ stutm_x(nrec), &
+ stutm_y(nrec), &
+ horiz_dist(nrec), &
+ elevation(nrec), &
+ ix_initial_guess(nrec), &
+ iy_initial_guess(nrec), &
+ iz_initial_guess(nrec), &
+ x_target(nrec), &
+ y_target(nrec), &
+ z_target(nrec), &
+ x_found(nrec), &
+ y_found(nrec), &
+ z_found(nrec), &
+ final_distance(nrec), &
+ ispec_selected_rec_all(nrec), &
+ xi_receiver_all(nrec), &
+ eta_receiver_all(nrec), &
+ gamma_receiver_all(nrec), &
+ x_found_all(nrec), &
+ y_found_all(nrec), &
+ z_found_all(nrec), &
+ final_distance_all(nrec), &
+ nu_all(3,3,nrec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating arrays for locating receivers'
+
+ ! loop on all the stations
+ do irec=1,nrec
+
+ read(1,*,iostat=ios) station_name(irec),network_name(irec),stlat(irec),stlon(irec),stele(irec),stbur(irec)
+ if (ios /= 0) call exit_mpi(myrank, 'Error reading station file '//trim(rec_filename))
+
+ ! convert station location to UTM
+ call utm_geo(stlon(irec),stlat(irec),stutm_x(irec),stutm_y(irec),&
+ UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
+
+ ! compute horizontal distance between source and receiver in km
+ horiz_dist(irec) = dsqrt((stutm_y(irec)-utm_y_source)**2 + (stutm_x(irec)-utm_x_source)**2) / 1000.
+
+ ! print some information about stations
+ if(myrank == 0) &
+ write(IMAIN,*) 'Station #',irec,': ',station_name(irec)(1:len_trim(station_name(irec))), &
+ '.',network_name(irec)(1:len_trim(network_name(irec))), &
+ ' horizontal distance: ',sngl(horiz_dist(irec)),' km'
+
+ ! get approximate topography elevation at source long/lat coordinates
+ ! set distance to huge initial value
+ distmin = HUGEVAL
+ if(num_free_surface_faces > 0) then
+ iglob_selected = 1
+ ! loop only on points inside the element
+ ! exclude edges to ensure this point is not shared with other elements
+ imin = 2
+ imax = NGLLX - 1
+ jmin = 2
+ jmax = NGLLY - 1
+ iselected = 0
+ jselected = 0
+ iface_selected = 0
+ do iface=1,num_free_surface_faces
+ do j=jmin,jmax
+ do i=imin,imax
+
+ ispec = free_surface_ispec(iface)
+ igll = free_surface_ijk(1,(j-1)*NGLLY+i,iface)
+ jgll = free_surface_ijk(2,(j-1)*NGLLY+i,iface)
+ kgll = free_surface_ijk(3,(j-1)*NGLLY+i,iface)
+ iglob = ibool(igll,jgll,kgll,ispec)
+
+ ! keep this point if it is closer to the receiver
+ dist = dsqrt((stutm_x(irec)-dble(xstore(iglob)))**2 + &
+ (stutm_y(irec)-dble(ystore(iglob)))**2)
+ if(dist < distmin) then
+ distmin = dist
+ iglob_selected = iglob
+ iface_selected = iface
+ iselected = i
+ jselected = j
+ altitude_rec(1) = zstore(iglob_selected)
+ endif
+ enddo
+ enddo
+ ! end of loop on all the elements on the free surface
+ end do
+ ! weighted mean at current point of topography elevation of the four closest nodes
+ ! set distance to huge initial value
+ distmin = HUGEVAL
+ do j=jselected,jselected+1
+ do i=iselected,iselected+1
+ inode = 1
+ do jadjust=0,1
+ do iadjust= 0,1
+ ispec = free_surface_ispec(iface_selected)
+ igll = free_surface_ijk(1,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+ jgll = free_surface_ijk(2,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+ kgll = free_surface_ijk(3,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+ iglob = ibool(igll,jgll,kgll,ispec)
+
+ elevation_node(inode) = zstore(iglob)
+ dist_node(inode) = dsqrt((stutm_x(irec)-dble(xstore(iglob)))**2 + &
+ (stutm_y(irec)-dble(ystore(iglob)))**2)
+ inode = inode + 1
+ end do
+ end do
+ dist = sum(dist_node)
+ if(dist < distmin) then
+ distmin = dist
+ altitude_rec(1) = (dist_node(1)/dist)*elevation_node(1) + &
+ (dist_node(2)/dist)*elevation_node(2) + &
+ (dist_node(3)/dist)*elevation_node(3) + &
+ (dist_node(4)/dist)*elevation_node(4)
+ endif
+ end do
+ end do
+ end if
+ ! MPI communications to determine the best slice
+ distmin_ele(1)= distmin
+ call gather_all_dp(distmin_ele,1,distmin_ele_all,1,NPROC)
+ call gather_all_dp(altitude_rec,1,elevation_all,1,NPROC)
+ if(myrank == 0) then
+ iproc = minloc(distmin_ele_all)
+ altitude_rec(1) = elevation_all(iproc(1))
+ end if
+ call bcast_all_dp(altitude_rec,1)
+ elevation(irec) = altitude_rec(1)
+
+ ! reset distance to huge initial value
+ distmin=HUGEVAL
+
+! get the Cartesian components of n in the model: nu
+
+ ! orientation consistent with the UTM projection
+ ! X coordinate - East
+ nu(1,1,irec) = 1.d0
+ nu(1,2,irec) = 0.d0
+ nu(1,3,irec) = 0.d0
+ ! Y coordinate - North
+ nu(2,1,irec) = 0.d0
+ nu(2,2,irec) = 1.d0
+ nu(2,3,irec) = 0.d0
+ ! Z coordinate - Vertical
+ nu(3,1,irec) = 0.d0
+ nu(3,2,irec) = 0.d0
+ nu(3,3,irec) = 1.d0
+
+ x_target(irec) = stutm_x(irec)
+ y_target(irec) = stutm_y(irec)
+
+ ! receiver's Z coordinate
+ if( USE_SOURCES_RECVS_Z ) then
+ ! alternative: burial depth is given as z value directly
+ z_target(irec) = stbur(irec)
+ else
+ ! burial depth in STATIONS file given in m
+ z_target(irec) = elevation(irec) - stbur(irec)
+ endif
+ !if (myrank == 0) write(IOVTK,*) x_target(irec), y_target(irec), z_target(irec)
+
+ if (.not. SU_FORMAT) then
+ ! determines closest GLL point
+ ispec_selected_rec(irec) = 0
+ do ispec=1,NSPEC_AB
+
+ ! define the interval in which we look for points
+ if(FASTER_RECEIVERS_POINTS_ONLY) then
+ imin = 1
+ imax = NGLLX
+
+ jmin = 1
+ jmax = NGLLY
+
+ kmin = 1
+ kmax = NGLLZ
+
+ else
+ ! loop only on points inside the element
+ ! exclude edges to ensure this point is not shared with other elements
+ imin = 2
+ imax = NGLLX - 1
+
+ jmin = 2
+ jmax = NGLLY - 1
+
+ kmin = 2
+ kmax = NGLLZ - 1
+ endif
+
+ do k = kmin,kmax
+ do j = jmin,jmax
+ do i = imin,imax
+
+ iglob = ibool(i,j,k,ispec)
+
+ if (.not. RECVS_CAN_BE_BURIED_EXT_MESH) then
+ if ((.not. iglob_is_surface_external_mesh(iglob)) .or. (.not. ispec_is_surface_external_mesh(ispec))) then
+ cycle
+ endif
+ endif
+
+ dist = dsqrt((x_target(irec)-dble(xstore(iglob)))**2 &
+ +(y_target(irec)-dble(ystore(iglob)))**2 &
+ +(z_target(irec)-dble(zstore(iglob)))**2)
+
+ ! keep this point if it is closer to the receiver
+ if(dist < distmin) then
+ distmin = dist
+ ispec_selected_rec(irec) = ispec
+ ix_initial_guess(irec) = i
+ iy_initial_guess(irec) = j
+ iz_initial_guess(irec) = k
+
+ xi_receiver(irec) = dble(ix_initial_guess(irec))
+ eta_receiver(irec) = dble(iy_initial_guess(irec))
+ gamma_receiver(irec) = dble(iz_initial_guess(irec))
+ x_found(irec) = xstore(iglob)
+ y_found(irec) = ystore(iglob)
+ z_found(irec) = zstore(iglob)
+ endif
+
+ enddo
+ enddo
+ enddo
+
+ ! compute final distance between asked and found (converted to km)
+ final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
+ (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
+
+ ! end of loop on all the spectral elements in current slice
+ enddo
+ else
+ ispec_selected_rec(irec) = 0
+ ix_initial_guess(irec) = 0
+ iy_initial_guess(irec) = 0
+ iz_initial_guess(irec) = 0
+ final_distance(irec) = HUGEVAL
+ if ( (x_target(irec)>=xmin .and. x_target(irec)<=xmax) .and. &
+ (y_target(irec)>=ymin .and. y_target(irec)<=ymax) .and. &
+ (z_target(irec)>=zmin .and. z_target(irec)<=zmax) ) then
+ do ispec=1,NSPEC_AB
+ iglob_temp=reshape(ibool(:,:,:,ispec),(/NGLLX*NGLLY*NGLLZ/))
+ xmin_ELE=minval(xstore(iglob_temp))
+ xmax_ELE=maxval(xstore(iglob_temp))
+ ymin_ELE=minval(ystore(iglob_temp))
+ ymax_ELE=maxval(ystore(iglob_temp))
+ zmin_ELE=minval(zstore(iglob_temp))
+ zmax_ELE=maxval(zstore(iglob_temp))
+ if ( (x_target(irec)>=xmin_ELE .and. x_target(irec)<=xmax_ELE) .and. &
+ (y_target(irec)>=ymin_ELE .and. y_target(irec)<=ymax_ELE) .and. &
+ (z_target(irec)>=zmin_ELE .and. z_target(irec)<=zmax_ELE) ) then
+ ! we find the element (ispec) which "may" contain the receiver (irec)
+ ! so we only need to compute distances (which is expensive because of "dsqrt") within those elements
+ ispec_selected_rec(irec) = ispec
+ do k = kmin_temp,kmax_temp
+ do j = jmin_temp,jmax_temp
+ do i = imin_temp,imax_temp
+ iglob = ibool(i,j,k,ispec)
+ ! for comparison purpose, we don't have to do "dsqrt", which is expensive
+ dist = ((x_target(irec)-dble(xstore(iglob)))**2 &
+ +(y_target(irec)-dble(ystore(iglob)))**2 &
+ +(z_target(irec)-dble(zstore(iglob)))**2)
+ if(dist < distmin) then
+ distmin = dist
+ ix_initial_guess(irec) = i
+ iy_initial_guess(irec) = j
+ iz_initial_guess(irec) = k
+ xi_receiver(irec) = dble(ix_initial_guess(irec))
+ eta_receiver(irec) = dble(iy_initial_guess(irec))
+ gamma_receiver(irec) = dble(iz_initial_guess(irec))
+ x_found(irec) = xstore(iglob)
+ y_found(irec) = ystore(iglob)
+ z_found(irec) = zstore(iglob)
+ endif
+ enddo
+ enddo
+ enddo
+ final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
+ (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
+ endif ! if receiver "may" be within this element
+ enddo ! do ispec=1,NSPEC_AB
+ endif ! if receiver "may" be within this proc
+ endif !if (.not. SU_FORMAT)
+
+ if (ispec_selected_rec(irec) == 0) then
+ ! receiver is NOT within this proc, assign trivial values
+ ispec_selected_rec(irec) = 1
+ ix_initial_guess(irec) = 1
+ iy_initial_guess(irec) = 1
+ iz_initial_guess(irec) = 1
+ final_distance(irec) = HUGEVAL
+ endif
+
+ ! get normal to the face of the hexaedra if receiver is on the surface
+ if ((.not. RECVS_CAN_BE_BURIED_EXT_MESH) .and. &
+ .not. (ispec_selected_rec(irec) == 0)) then
+ pt0_ix = -1
+ pt0_iy = -1
+ pt0_iz = -1
+ pt1_ix = -1
+ pt1_iy = -1
+ pt1_iz = -1
+ pt2_ix = -1
+ pt2_iy = -1
+ pt2_iz = -1
+ ! we get two vectors of the face (three points) to compute the normal
+ if (ix_initial_guess(irec) == 1 .and. &
+ iglob_is_surface_external_mesh(ibool(1,2,2,ispec_selected_rec(irec)))) then
+ pt0_ix = 1
+ pt0_iy = NGLLY
+ pt0_iz = 1
+ pt1_ix = 1
+ pt1_iy = 1
+ pt1_iz = 1
+ pt2_ix = 1
+ pt2_iy = NGLLY
+ pt2_iz = NGLLZ
+ endif
+ if (ix_initial_guess(irec) == NGLLX .and. &
+ iglob_is_surface_external_mesh(ibool(NGLLX,2,2,ispec_selected_rec(irec)))) then
+ pt0_ix = NGLLX
+ pt0_iy = 1
+ pt0_iz = 1
+ pt1_ix = NGLLX
+ pt1_iy = NGLLY
+ pt1_iz = 1
+ pt2_ix = NGLLX
+ pt2_iy = 1
+ pt2_iz = NGLLZ
+ endif
+ if (iy_initial_guess(irec) == 1 .and. &
+ iglob_is_surface_external_mesh(ibool(2,1,2,ispec_selected_rec(irec)))) then
+ pt0_ix = 1
+ pt0_iy = 1
+ pt0_iz = 1
+ pt1_ix = NGLLX
+ pt1_iy = 1
+ pt1_iz = 1
+ pt2_ix = 1
+ pt2_iy = 1
+ pt2_iz = NGLLZ
+ endif
+ if (iy_initial_guess(irec) == NGLLY .and. &
+ iglob_is_surface_external_mesh(ibool(2,NGLLY,2,ispec_selected_rec(irec)))) then
+ pt0_ix = NGLLX
+ pt0_iy = NGLLY
+ pt0_iz = 1
+ pt1_ix = 1
+ pt1_iy = NGLLY
+ pt1_iz = 1
+ pt2_ix = NGLLX
+ pt2_iy = NGLLY
+ pt2_iz = NGLLZ
+ endif
+ if (iz_initial_guess(irec) == 1 .and. &
+ iglob_is_surface_external_mesh(ibool(2,2,1,ispec_selected_rec(irec)))) then
+ pt0_ix = NGLLX
+ pt0_iy = 1
+ pt0_iz = 1
+ pt1_ix = 1
+ pt1_iy = 1
+ pt1_iz = 1
+ pt2_ix = NGLLX
+ pt2_iy = NGLLY
+ pt2_iz = 1
+ endif
+ if (iz_initial_guess(irec) == NGLLZ .and. &
+ iglob_is_surface_external_mesh(ibool(2,2,NGLLZ,ispec_selected_rec(irec)))) then
+ pt0_ix = 1
+ pt0_iy = 1
+ pt0_iz = NGLLZ
+ pt1_ix = NGLLX
+ pt1_iy = 1
+ pt1_iz = NGLLZ
+ pt2_ix = 1
+ pt2_iy = NGLLY
+ pt2_iz = NGLLZ
+ endif
+
+ if (pt0_ix<0 .or.pt0_iy<0 .or. pt0_iz<0 .or. &
+ pt1_ix<0 .or. pt1_iy<0 .or. pt1_iz<0 .or. &
+ pt2_ix<0 .or. pt2_iy<0 .or. pt2_iz<0) then
+ stop 'error in computing normal for receivers.'
+ endif
+
+ u_vector(1) = xstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
+ - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+ u_vector(2) = ystore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
+ - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+ u_vector(3) = zstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_rec(irec))) &
+ - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+ v_vector(1) = xstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
+ - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+ v_vector(2) = ystore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
+ - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+ v_vector(3) = zstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_rec(irec))) &
+ - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_rec(irec)))
+
+ ! cross product
+ w_vector(1) = u_vector(2)*v_vector(3) - u_vector(3)*v_vector(2)
+ w_vector(2) = u_vector(3)*v_vector(1) - u_vector(1)*v_vector(3)
+ w_vector(3) = u_vector(1)*v_vector(2) - u_vector(2)*v_vector(1)
+
+ ! normalize vector w
+ w_vector(:) = w_vector(:)/sqrt(w_vector(1)**2+w_vector(2)**2+w_vector(3)**2)
+
+ ! build the two other vectors for a direct base: we normalize u, and v=w^u
+ u_vector(:) = u_vector(:)/sqrt(u_vector(1)**2+u_vector(2)**2+u_vector(3)**2)
+ v_vector(1) = w_vector(2)*u_vector(3) - w_vector(3)*u_vector(2)
+ v_vector(2) = w_vector(3)*u_vector(1) - w_vector(1)*u_vector(3)
+ v_vector(3) = w_vector(1)*u_vector(2) - w_vector(2)*u_vector(1)
+
+ ! build rotation matrice nu for seismograms
+ if (EXT_MESH_RECV_NORMAL) then
+ ! East (u)
+ nu(1,1,irec) = u_vector(1)
+ nu(1,2,irec) = v_vector(1)
+ nu(1,3,irec) = w_vector(1)
+
+ ! North (v)
+ nu(2,1,irec) = u_vector(2)
+ nu(2,2,irec) = v_vector(2)
+ nu(2,3,irec) = w_vector(2)
+
+ ! Vertical (w)
+ nu(3,1,irec) = u_vector(3)
+ nu(3,2,irec) = v_vector(3)
+ nu(3,3,irec) = w_vector(3)
+ else
+ ! East
+ nu(1,1,irec) = 1.d0
+ nu(1,2,irec) = 0.d0
+ nu(1,3,irec) = 0.d0
+
+ ! North
+ nu(2,1,irec) = 0.d0
+ nu(2,2,irec) = 1.d0
+ nu(2,3,irec) = 0.d0
+
+ ! Vertical
+ nu(3,1,irec) = 0.d0
+ nu(3,2,irec) = 0.d0
+ nu(3,3,irec) = 1.d0
+ endif
+
+ endif ! of if (.not. RECVS_CAN_BE_BURIED_EXT_MESH)
+
+ ! end of loop on all the stations
+ enddo
+
+ ! close receiver file
+ close(1)
+
+! ****************************************
+! find the best (xi,eta,gamma) for each receiver
+! ****************************************
+
+ if(.not. FASTER_RECEIVERS_POINTS_ONLY) then
+
+ ! loop on all the receivers to iterate in that slice
+ do irec = 1,nrec
+
+ ispec_iterate = ispec_selected_rec(irec)
+
+ ! use initial guess in xi and eta
+ xi = xigll(ix_initial_guess(irec))
+ eta = yigll(iy_initial_guess(irec))
+ gamma = zigll(iz_initial_guess(irec))
+
+ ! define coordinates of the control points of the element
+ do ia=1,NGNOD
+ iax = 0
+ iay = 0
+ iaz = 0
+ if(iaddx(ia) == 0) then
+ iax = 1
+ else if(iaddx(ia) == 1) then
+ iax = (NGLLX+1)/2
+ else if(iaddx(ia) == 2) then
+ iax = NGLLX
+ else
+ call exit_MPI(myrank,'incorrect value of iaddx')
+ endif
+
+ if(iaddy(ia) == 0) then
+ iay = 1
+ else if(iaddy(ia) == 1) then
+ iay = (NGLLY+1)/2
+ else if(iaddy(ia) == 2) then
+ iay = NGLLY
+ else
+ call exit_MPI(myrank,'incorrect value of iaddy')
+ endif
+
+ if(iaddz(ia) == 0) then
+ iaz = 1
+ else if(iaddz(ia) == 1) then
+ iaz = (NGLLZ+1)/2
+ else if(iaddz(ia) == 2) then
+ iaz = NGLLZ
+ else
+ call exit_MPI(myrank,'incorrect value of iaddz')
+ endif
+
+ iglob = ibool(iax,iay,iaz,ispec_iterate)
+ xelm(ia) = dble(xstore(iglob))
+ yelm(ia) = dble(ystore(iglob))
+ zelm(ia) = dble(zstore(iglob))
+
+ enddo
+
+ ! iterate to solve the non linear system
+ do iter_loop = 1,NUM_ITER
+
+ ! impose receiver exactly at the surface
+ ! gamma = 1.d0
+
+ ! recompute jacobian for the new point
+ call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+
+ ! compute distance to target location
+ dx = - (x - x_target(irec))
+ dy = - (y - y_target(irec))
+ dz = - (z - z_target(irec))
+
+ ! compute increments
+ ! gamma does not change since we know the receiver is exactly on the surface
+ dxi = xix*dx + xiy*dy + xiz*dz
+ deta = etax*dx + etay*dy + etaz*dz
+ dgamma = gammax*dx + gammay*dy + gammaz*dz
+
+ ! update values
+ xi = xi + dxi
+ eta = eta + deta
+ gamma = gamma + dgamma
+
+ ! impose that we stay in that element
+ ! (useful if user gives a receiver outside the mesh for instance)
+ ! we can go slightly outside the [1,1] segment since with finite elements
+ ! the polynomial solution is defined everywhere
+ ! this can be useful for convergence of itertive scheme with distorted elements
+ if (xi > 1.10d0) xi = 1.10d0
+ if (xi < -1.10d0) xi = -1.10d0
+ if (eta > 1.10d0) eta = 1.10d0
+ if (eta < -1.10d0) eta = -1.10d0
+ if (gamma > 1.10d0) gamma = 1.10d0
+ if (gamma < -1.10d0) gamma = -1.10d0
+
+ ! end of non linear iterations
+ enddo
+
+ ! impose receiver exactly at the surface after final iteration
+ ! gamma = 1.d0
+
+ ! compute final coordinates of point found
+ call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+
+ ! store xi,eta and x,y,z of point found
+ xi_receiver(irec) = xi
+ eta_receiver(irec) = eta
+ gamma_receiver(irec) = gamma
+ x_found(irec) = x
+ y_found(irec) = y
+ z_found(irec) = z
+
+ ! compute final distance between asked and found (converted to km)
+ final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
+ (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)
+
+ enddo
+
+ endif ! of if (.not. FASTER_RECEIVERS_POINTS_ONLY)
+
+ ! synchronize all the processes to make sure all the estimates are available
+ call sync_all()
+ ! for MPI version, gather information from all the nodes
+ if (myrank/=0) then ! gather information from other processors (one at a time)
+ call send_i(ispec_selected_rec, nrec,0,0)
+ call send_dp(xi_receiver, nrec,0,1)
+ call send_dp(eta_receiver, nrec,0,2)
+ call send_dp(gamma_receiver, nrec,0,3)
+ call send_dp(final_distance, nrec,0,4)
+ call send_dp(x_found, nrec,0,5)
+ call send_dp(y_found, nrec,0,6)
+ call send_dp(z_found, nrec,0,7)
+ call send_dp(nu, 3*3*nrec,0,8)
+ else
+ islice_selected_rec(:) = 0
+ do iprocloop=1,NPROC-1
+ call recv_i(ispec_selected_rec_all, nrec,iprocloop,0)
+ call recv_dp(xi_receiver_all, nrec,iprocloop,1)
+ call recv_dp(eta_receiver_all, nrec,iprocloop,2)
+ call recv_dp(gamma_receiver_all, nrec,iprocloop,3)
+ call recv_dp(final_distance_all, nrec,iprocloop,4)
+ call recv_dp(x_found_all, nrec,iprocloop,5)
+ call recv_dp(y_found_all, nrec,iprocloop,6)
+ call recv_dp(z_found_all, nrec,iprocloop,7)
+ call recv_dp(nu_all, 3*3*nrec,iprocloop,8)
+ do irec=1,nrec
+ if (final_distance_all(irec) < final_distance(irec)) then
+ final_distance(irec) = final_distance_all(irec)
+ islice_selected_rec(irec) = iprocloop
+ ispec_selected_rec(irec) = ispec_selected_rec_all(irec)
+ xi_receiver(irec) = xi_receiver_all(irec)
+ eta_receiver(irec) = eta_receiver_all(irec)
+ gamma_receiver(irec) = gamma_receiver_all(irec)
+ x_found(irec) = x_found_all(irec)
+ y_found(irec) = y_found_all(irec)
+ z_found(irec) = z_found_all(irec)
+ nu(:,:,irec) = nu_all(:,:,irec)
+ endif
+ enddo
+ enddo
+ endif
+ call sync_all()
+
+ ! this is executed by main process only
+ if(myrank == 0) then
+
+ do irec=1,nrec
+
+ write(IMAIN,*)
+ write(IMAIN,*) 'station # ',irec,' ',station_name(irec),network_name(irec)
+
+ if(final_distance(irec) == HUGEVAL) call exit_MPI(myrank,'error locating receiver')
+
+ write(IMAIN,*) ' original latitude: ',sngl(stlat(irec))
+ write(IMAIN,*) ' original longitude: ',sngl(stlon(irec))
+ if( SUPPRESS_UTM_PROJECTION ) then
+ write(IMAIN,*) ' original x: ',sngl(stutm_x(irec))
+ write(IMAIN,*) ' original y: ',sngl(stutm_y(irec))
+ else
+ write(IMAIN,*) ' original UTM x: ',sngl(stutm_x(irec))
+ write(IMAIN,*) ' original UTM y: ',sngl(stutm_y(irec))
+ endif
+ if( USE_SOURCES_RECVS_Z ) then
+ write(IMAIN,*) ' original z: ',sngl(stbur(irec))
+ else
+ write(IMAIN,*) ' original depth: ',sngl(stbur(irec)),' m'
+ endif
+ write(IMAIN,*) ' horizontal distance: ',sngl(horiz_dist(irec))
+ write(IMAIN,*) ' target x, y, z: ',sngl(x_target(irec)),sngl(y_target(irec)),sngl(z_target(irec))
+
+ write(IMAIN,*) ' closest estimate found: ',sngl(final_distance(irec)),' m away'
+ write(IMAIN,*) ' in slice ',islice_selected_rec(irec),' in element ',ispec_selected_rec(irec)
+ if(FASTER_RECEIVERS_POINTS_ONLY) then
+ write(IMAIN,*) ' in point i,j,k = ',nint(xi_receiver(irec)),nint(eta_receiver(irec)),nint(gamma_receiver(irec))
+ write(IMAIN,*) ' nu1 = ',nu(1,:,irec)
+ write(IMAIN,*) ' nu2 = ',nu(2,:,irec)
+ write(IMAIN,*) ' nu3 = ',nu(3,:,irec)
+ else
+ write(IMAIN,*) ' at coordinates: '
+ write(IMAIN,*) ' xi = ',xi_receiver(irec)
+ write(IMAIN,*) ' eta = ',eta_receiver(irec)
+ write(IMAIN,*) ' gamma = ',gamma_receiver(irec)
+ endif
+ if( SUPPRESS_UTM_PROJECTION ) then
+ write(IMAIN,*) ' x: ',x_found(irec)
+ write(IMAIN,*) ' y: ',y_found(irec)
+ else
+ write(IMAIN,*) ' UTM x: ',x_found(irec)
+ write(IMAIN,*) ' UTM y: ',y_found(irec)
+ endif
+ if( USE_SOURCES_RECVS_Z ) then
+ write(IMAIN,*) ' z: ',z_found(irec)
+ else
+ write(IMAIN,*) ' depth: ',dabs(z_found(irec) - elevation(irec)),' m'
+ write(IMAIN,*) ' z: ',z_found(irec)
+ endif
+ write(IMAIN,*)
+
+
+ ! add warning if estimate is poor
+ ! (usually means receiver outside the mesh given by the user)
+ if(final_distance(irec) > 3000.d0) then
+ write(IMAIN,*) '*******************************************************'
+ write(IMAIN,*) '***** WARNING: receiver location estimate is poor *****'
+ write(IMAIN,*) '*******************************************************'
+ endif
+
+ write(IMAIN,*)
+
+ enddo
+
+ ! compute maximal distance for all the receivers
+ final_distance_max = maxval(final_distance(:))
+
+ ! display maximum error for all the receivers
+ write(IMAIN,*) 'maximum error in location of all the receivers: ',sngl(final_distance_max),' m'
+
+ ! add warning if estimate is poor
+ ! (usually means receiver outside the mesh given by the user)
+ if(final_distance_max > 1000.d0) then
+ write(IMAIN,*)
+ write(IMAIN,*) '************************************************************'
+ write(IMAIN,*) '************************************************************'
+ write(IMAIN,*) '***** WARNING: at least one receiver is poorly located *****'
+ write(IMAIN,*) '************************************************************'
+ write(IMAIN,*) '************************************************************'
+ endif
+
+ ! get the base pathname for output files
+ call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
+
+ !! write the list of stations and associated epicentral distance
+ !open(unit=27,file=trim(OUTPUT_FILES)//'/output_list_stations.txt',status='unknown')
+ !do irec=1,nrec
+ ! write(27,*) station_name(irec),'.',network_name(irec),' : ',horiz_dist(irec),' km horizontal distance'
+ !enddo
+ !close(27)
+
+ ! write the locations of stations, so that we can load them and write them to SU headers later
+ open(unit=IOUT_SU,file=trim(OUTPUT_FILES)//'/output_list_stations.txt',status='unknown')
+ do irec=1,nrec
+ write(IOUT_SU,*) x_found(irec),y_found(irec),z_found(irec)
+ enddo
+ close(IOUT_SU)
+
+ ! elapsed time since beginning of mesh generation
+ tCPU = wtime() - time_start
+ write(IMAIN,*)
+ write(IMAIN,*) 'Elapsed time for receiver detection in seconds = ',tCPU
+ write(IMAIN,*)
+ write(IMAIN,*) 'End of receiver detection - done'
+ write(IMAIN,*)
+
+ endif ! end of section executed by main process only
+
+ ! main process broadcasts the results to all the slices
+ call bcast_all_i(islice_selected_rec,nrec)
+ call bcast_all_i(ispec_selected_rec,nrec)
+ call bcast_all_dp(xi_receiver,nrec)
+ call bcast_all_dp(eta_receiver,nrec)
+ call bcast_all_dp(gamma_receiver,nrec)
+ ! synchronize all the processes to make sure everybody has finished
+ call sync_all()
+
+ ! deallocate arrays
+ deallocate(stlat)
+ deallocate(stlon)
+ deallocate(stele)
+ deallocate(stbur)
+ deallocate(stutm_x)
+ deallocate(stutm_y)
+ deallocate(horiz_dist)
+ deallocate(ix_initial_guess)
+ deallocate(iy_initial_guess)
+ deallocate(iz_initial_guess)
+ deallocate(x_target)
+ deallocate(y_target)
+ deallocate(z_target)
+ deallocate(x_found)
+ deallocate(y_found)
+ deallocate(z_found)
+ deallocate(final_distance)
+ deallocate(ispec_selected_rec_all)
+ deallocate(xi_receiver_all)
+ deallocate(eta_receiver_all)
+ deallocate(gamma_receiver_all)
+ deallocate(x_found_all)
+ deallocate(y_found_all)
+ deallocate(z_found_all)
+ deallocate(final_distance_all)
+
+ end subroutine locate_receivers
+
+!=====================================================================
+
+
+ subroutine station_filter(SUPPRESS_UTM_PROJECTION,UTM_PROJECTION_ZONE,myrank,filename,filtered_filename,nfilter, &
+ LATITUDE_MIN, LATITUDE_MAX, LONGITUDE_MIN, LONGITUDE_MAX)
+
+ implicit none
+
+ include 'constants.h'
+
+! input
+ logical :: SUPPRESS_UTM_PROJECTION
+ integer :: UTM_PROJECTION_ZONE
+ integer :: myrank
+ character(len=*) :: filename,filtered_filename
+ double precision :: LATITUDE_MIN,LATITUDE_MAX,LONGITUDE_MIN,LONGITUDE_MAX
+
+! output
+ integer :: nfilter
+
+ integer :: nrec, nrec_filtered, ios
+
+ double precision :: stlat,stlon,stele,stbur,stutm_x,stutm_y
+ double precision :: minlat,minlon,maxlat,maxlon
+ character(len=MAX_LENGTH_STATION_NAME) :: station_name
+ character(len=MAX_LENGTH_NETWORK_NAME) :: network_name
+ character(len=256) :: dummystring
+
+ nrec = 0
+ nrec_filtered = 0
+
+ ! counts number of lines in stations file
+ open(unit=IIN, file=trim(filename), status = 'old', iostat = ios)
+ if (ios /= 0) call exit_mpi(myrank, 'No file '//trim(filename)//', exit')
+ do while(ios == 0)
+ read(IIN,"(a256)",iostat = ios) dummystring
+ if(ios /= 0) exit
+
+ if( len_trim(dummystring) > 0 ) nrec = nrec + 1
+ enddo
+ close(IIN)
+
+ ! reads in station locations
+ open(unit=IIN, file=trim(filename), status = 'old', iostat = ios)
+ !do irec = 1,nrec
+ ! read(IIN,*) station_name,network_name,stlat,stlon,stele,stbur
+ do while(ios == 0)
+ read(IIN,"(a256)",iostat = ios) dummystring
+ if( ios /= 0 ) exit
+
+ ! counts number of stations in min/max region
+ if( len_trim(dummystring) > 0 ) then
+ dummystring = trim(dummystring)
+ read(dummystring, *) station_name, network_name, stlat, stlon, stele, stbur
+
+ ! convert station location to UTM
+ call utm_geo(stlon,stlat,stutm_x,stutm_y,&
+ UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
+
+ ! counts stations within lon/lat region
+ if( stutm_y >= LATITUDE_MIN .and. stutm_y <= LATITUDE_MAX .and. &
+ stutm_x >= LONGITUDE_MIN .and. stutm_x <= LONGITUDE_MAX) &
+ nrec_filtered = nrec_filtered + 1
+ endif
+ enddo
+ close(IIN)
+
+ ! writes out filtered stations file
+ if (myrank == 0) then
+ open(unit=IIN,file=trim(filename),status='old',action='read',iostat=ios)
+ open(unit=IOUT,file=trim(filtered_filename),status='unknown')
+ do while(ios == 0)
+ read(IIN,"(a256)",iostat = ios) dummystring
+ if( ios /= 0 ) exit
+
+ !read(IIN,*) station_name,network_name,stlat,stlon,stele,stbur
+ if( len_trim(dummystring) > 0 ) then
+ dummystring = trim(dummystring)
+ read(dummystring, *) station_name, network_name, stlat, stlon, stele, stbur
+
+ ! convert station location to UTM
+ call utm_geo(stlon,stlat,stutm_x,stutm_y,&
+ UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
+
+ if( stutm_y >= LATITUDE_MIN .and. stutm_y <= LATITUDE_MAX .and. &
+ stutm_x >= LONGITUDE_MIN .and. stutm_x <= LONGITUDE_MAX) then
+
+ ! w/out formating
+ ! write(IOUT,*) trim(station_name),' ',trim(network_name),' ',sngl(stlat), &
+ ! ' ',sngl(stlon), ' ',sngl(stele), ' ',sngl(stbur)
+
+ ! w/ specific format
+ write(IOUT,'(a10,1x,a10,4e18.6)') &
+ trim(station_name),trim(network_name), &
+ sngl(stlat),sngl(stlon),sngl(stele),sngl(stbur)
+
+ endif
+ end if
+ enddo
+ close(IIN)
+ close(IOUT)
+
+ write(IMAIN,*)
+ write(IMAIN,*) 'there are ',nrec,' stations in file ', trim(filename)
+ write(IMAIN,*) 'saving ',nrec_filtered,' stations inside the model in file ', trim(filtered_filename)
+ write(IMAIN,*) 'excluding ',nrec - nrec_filtered,' stations located outside the model'
+ write(IMAIN,*)
+
+ if( nrec_filtered < 1 ) then
+ write(IMAIN,*) 'error filtered stations:'
+ write(IMAIN,*) ' simulation needs at least 1 station but got ',nrec_filtered
+ write(IMAIN,*)
+ write(IMAIN,*) ' check that stations in file '//trim(filename)//' are within'
+
+ if( SUPPRESS_UTM_PROJECTION ) then
+ write(IMAIN,*) ' latitude min/max : ',LATITUDE_MIN,LATITUDE_MAX
+ write(IMAIN,*) ' longitude min/max: ',LONGITUDE_MIN,LONGITUDE_MAX
+ else
+ ! convert edge locations from UTM back to lat/lon
+ call utm_geo(minlon,minlat,LONGITUDE_MIN,LATITUDE_MIN,&
+ UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
+ call utm_geo(maxlon,maxlat,LONGITUDE_MAX,LATITUDE_MAX,&
+ UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
+ write(IMAIN,*) ' longitude min/max: ',minlon,maxlon
+ write(IMAIN,*) ' latitude min/max : ',minlat,maxlat
+ write(IMAIN,*) ' UTM x min/max: ',LONGITUDE_MIN,LONGITUDE_MAX
+ write(IMAIN,*) ' UTM y min/max : ',LATITUDE_MIN,LATITUDE_MAX
+ endif
+
+ write(IMAIN,*)
+ endif
+
+ endif
+
+ nfilter = nrec_filtered
+
+ end subroutine station_filter
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_source.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_source.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/locate_source.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,948 +1,948 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!----
-!---- locate_source finds the correct position of the source
-!----
-
- subroutine locate_source(ibool,NSOURCES,myrank,NSPEC_AB,NGLOB_AB,xstore,ystore,zstore, &
- xigll,yigll,zigll,NPROC, &
- tshift_cmt,min_tshift_cmt_original,yr,jda,ho,mi,utm_x_source,utm_y_source, &
- DT,hdur,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
- islice_selected_source,ispec_selected_source, &
- xi_source,eta_source,gamma_source, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
- PRINT_SOURCE_TIME_FUNCTION, &
- nu_source,iglob_is_surface_external_mesh,ispec_is_surface_external_mesh, &
- ispec_is_acoustic,ispec_is_elastic, &
- num_free_surface_faces,free_surface_ispec,free_surface_ijk)
-
- implicit none
-
- include "constants.h"
-
- integer NPROC,UTM_PROJECTION_ZONE
- integer NSPEC_AB,NGLOB_AB,NSOURCES
-
- logical PRINT_SOURCE_TIME_FUNCTION,SUPPRESS_UTM_PROJECTION
-
- double precision DT
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-
- integer myrank
-
- ! arrays containing coordinates of the points
- real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: xstore,ystore,zstore
-
- logical, dimension(NSPEC_AB) :: ispec_is_acoustic,ispec_is_elastic
-
- integer yr,jda,ho,mi
-
- double precision tshift_cmt(NSOURCES)
- double precision sec,min_tshift_cmt_original
-
- integer iprocloop
-
- integer i,j,k,ispec,iglob,iglob_selected,inode,iface,isource
- integer imin,imax,jmin,jmax,kmin,kmax,igll,jgll,kgll
- integer iselected,jselected,iface_selected,iadjust,jadjust
- integer iproc(1)
-
- double precision, dimension(NSOURCES) :: utm_x_source,utm_y_source
- double precision dist
- double precision xi,eta,gamma,dx,dy,dz,dxi,deta
-
- ! Gauss-Lobatto-Legendre points of integration
- double precision xigll(NGLLX)
- double precision yigll(NGLLY)
- double precision zigll(NGLLZ)
-
- ! topology of the control points of the surface element
- integer iax,iay,iaz
- integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
-
- ! coordinates of the control points of the surface element
- double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
-
- integer iter_loop
-
- integer ia
- double precision x,y,z
- double precision xix,xiy,xiz
- double precision etax,etay,etaz
- double precision gammax,gammay,gammaz
- double precision dgamma
-
- double precision final_distance_source(NSOURCES)
-
- double precision x_target_source,y_target_source,z_target_source
-
- double precision,dimension(1) :: altitude_source,distmin_ele
- double precision,dimension(NPROC) :: distmin_ele_all,elevation_all
- double precision,dimension(4) :: elevation_node,dist_node
-
- integer islice_selected_source(NSOURCES)
-
- ! timer MPI
- double precision, external :: wtime
- double precision time_start,tCPU
-
- integer ispec_selected_source(NSOURCES)
-
- integer ngather, ns, ne, ig, is, ng
-
- integer, dimension(NGATHER_SOURCES,0:NPROC-1) :: ispec_selected_source_all
- double precision, dimension(NGATHER_SOURCES,0:NPROC-1) :: xi_source_all,eta_source_all,gamma_source_all, &
- final_distance_source_all,x_found_source_all,y_found_source_all,z_found_source_all
- double precision, dimension(3,3,NGATHER_SOURCES,0:NPROC-1) :: nu_source_all
-
- double precision, dimension(:), allocatable :: tmp_local
- double precision, dimension(:,:),allocatable :: tmp_all_local
-
- double precision hdur(NSOURCES)
- double precision :: f0,t0_ricker
-
- double precision, dimension(NSOURCES) :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
- double precision, dimension(NSOURCES) :: xi_source,eta_source,gamma_source
- double precision, dimension(3,3,NSOURCES) :: nu_source
-
- double precision, dimension(NSOURCES) :: lat,long,depth
- double precision moment_tensor(6,NSOURCES)
-
- character(len=256) OUTPUT_FILES
-
- double precision, dimension(NSOURCES) :: x_found_source,y_found_source,z_found_source
- double precision, dimension(NSOURCES) :: elevation
- double precision distmin
-
- integer, dimension(:), allocatable :: tmp_i_local
- integer, dimension(:,:),allocatable :: tmp_i_all_local
-
- ! for surface locating and normal computing with external mesh
- integer :: pt0_ix,pt0_iy,pt0_iz,pt1_ix,pt1_iy,pt1_iz,pt2_ix,pt2_iy,pt2_iz
- integer :: num_free_surface_faces
- real(kind=CUSTOM_REAL), dimension(3) :: u_vector,v_vector,w_vector
- logical, dimension(NGLOB_AB) :: iglob_is_surface_external_mesh
- logical, dimension(NSPEC_AB) :: ispec_is_surface_external_mesh
- integer, dimension(num_free_surface_faces) :: free_surface_ispec
- integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
-
- integer ix_initial_guess_source,iy_initial_guess_source,iz_initial_guess_source
- integer ier
- integer, dimension(NSOURCES) :: idomain
- integer, dimension(NGATHER_SOURCES,0:NPROC-1) :: idomain_all
-
- ! get the base pathname for output files
- call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
-
- ! read all the sources (note: each process reads the source file)
- call get_cmt(yr,jda,ho,mi,sec,tshift_cmt,hdur,lat,long,depth,moment_tensor, &
- DT,NSOURCES,min_tshift_cmt_original)
-
- ! define topology of the control element
- call usual_hex_nodes(iaddx,iaddy,iaddz)
-
- ! get MPI starting time
- time_start = wtime()
-
- ! user output
- if( myrank == 0 ) then
- if(SUPPRESS_UTM_PROJECTION ) then
- write(IMAIN,*) 'no UTM projection:'
- else
- write(IMAIN,*) 'UTM projection:'
- write(IMAIN,*) ' UTM zone: ',UTM_PROJECTION_ZONE
- endif
- if( USE_SOURCES_RECVS_Z ) then
- write(IMAIN,*) ' (depth) becomes directly (z) coordinate'
- endif
- endif
-
- ! loop on all the sources
- do isource = 1,NSOURCES
-
- !
- ! r -> z, theta -> -y, phi -> x
- !
- ! Mrr = Mzz
- ! Mtt = Myy
- ! Mpp = Mxx
- ! Mrt = -Myz
- ! Mrp = Mxz
- ! Mtp = -Mxy
-
- ! get the moment tensor
- Mzz(isource) = + moment_tensor(1,isource)
- Mxx(isource) = + moment_tensor(3,isource)
- Myy(isource) = + moment_tensor(2,isource)
- Mxz(isource) = + moment_tensor(5,isource)
- Myz(isource) = - moment_tensor(4,isource)
- Mxy(isource) = - moment_tensor(6,isource)
-
- ! gets UTM x,y
- call utm_geo(long(isource),lat(isource),utm_x_source(isource),utm_y_source(isource), &
- UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
-
- ! get approximate topography elevation at source long/lat coordinates
- ! set distance to huge initial value
- distmin = HUGEVAL
- if(num_free_surface_faces > 0) then
- iglob_selected = 1
- ! loop only on points inside the element
- ! exclude edges to ensure this point is not shared with other elements
- imin = 2
- imax = NGLLX - 1
-
- jmin = 2
- jmax = NGLLY - 1
-
- iselected = 0
- jselected = 0
- iface_selected = 0
- do iface=1,num_free_surface_faces
- do j=jmin,jmax
- do i=imin,imax
-
- ispec = free_surface_ispec(iface)
- igll = free_surface_ijk(1,(j-1)*NGLLY+i,iface)
- jgll = free_surface_ijk(2,(j-1)*NGLLY+i,iface)
- kgll = free_surface_ijk(3,(j-1)*NGLLY+i,iface)
- iglob = ibool(igll,jgll,kgll,ispec)
-
- ! keep this point if it is closer to the receiver
- dist = dsqrt((utm_x_source(isource)-dble(xstore(iglob)))**2 + &
- (utm_y_source(isource)-dble(ystore(iglob)))**2)
- if(dist < distmin) then
- distmin = dist
- iglob_selected = iglob
- iface_selected = iface
- iselected = i
- jselected = j
- altitude_source(1) = zstore(iglob_selected)
- endif
- enddo
- enddo
- ! end of loop on all the elements on the free surface
- end do
- ! weighted mean at current point of topography elevation of the four closest nodes
- ! set distance to huge initial value
- distmin = HUGEVAL
- do j=jselected,jselected+1
- do i=iselected,iselected+1
- inode = 1
- do jadjust=0,1
- do iadjust= 0,1
- ispec = free_surface_ispec(iface_selected)
- igll = free_surface_ijk(1,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
- jgll = free_surface_ijk(2,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
- kgll = free_surface_ijk(3,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
- iglob = ibool(igll,jgll,kgll,ispec)
-
- elevation_node(inode) = zstore(iglob)
- dist_node(inode) = dsqrt((utm_x_source(isource)-dble(xstore(iglob)))**2 + &
- (utm_y_source(isource)-dble(ystore(iglob)))**2)
- inode = inode + 1
- end do
- end do
- dist = sum(dist_node)
- if(dist < distmin) then
- distmin = dist
- altitude_source(1) = (dist_node(1)/dist)*elevation_node(1) + &
- (dist_node(2)/dist)*elevation_node(2) + &
- (dist_node(3)/dist)*elevation_node(3) + &
- (dist_node(4)/dist)*elevation_node(4)
- endif
- end do
- end do
- end if
- ! MPI communications to determine the best slice
- distmin_ele(1)= distmin
- call gather_all_dp(distmin_ele,1,distmin_ele_all,1,NPROC)
- call gather_all_dp(altitude_source,1,elevation_all,1,NPROC)
- if(myrank == 0) then
- iproc = minloc(distmin_ele_all)
- altitude_source(1) = elevation_all(iproc(1))
- end if
- call bcast_all_dp(altitude_source,1)
- elevation(isource) = altitude_source(1)
-
- ! orientation consistent with the UTM projection
- ! East
- nu_source(1,1,isource) = 1.d0
- nu_source(1,2,isource) = 0.d0
- nu_source(1,3,isource) = 0.d0
- ! North
- nu_source(2,1,isource) = 0.d0
- nu_source(2,2,isource) = 1.d0
- nu_source(2,3,isource) = 0.d0
- ! Vertical
- nu_source(3,1,isource) = 0.d0
- nu_source(3,2,isource) = 0.d0
- nu_source(3,3,isource) = 1.d0
-
- x_target_source = utm_x_source(isource)
- y_target_source = utm_y_source(isource)
-
- ! source Z coordinate
- if( USE_SOURCES_RECVS_Z ) then
- ! alternative: depth is given as z value directly
- z_target_source = depth(isource)
- else
- ! depth in CMTSOLUTION given in km
- z_target_source = - depth(isource)*1000.0d0 + elevation(isource)
- endif
-
- ! set distance to huge initial value
- distmin = HUGEVAL
-
- ispec_selected_source(isource) = 0
- ix_initial_guess_source = 0
- iy_initial_guess_source = 0
- iz_initial_guess_source = 0
- do ispec=1,NSPEC_AB
-
- ! define the interval in which we look for points
- if(USE_FORCE_POINT_SOURCE) then
- imin = 1
- imax = NGLLX
-
- jmin = 1
- jmax = NGLLY
-
- kmin = 1
- kmax = NGLLZ
-
- else
- ! loop only on points inside the element
- ! exclude edges to ensure this point is not shared with other elements
- imin = 2
- imax = NGLLX - 1
-
- jmin = 2
- jmax = NGLLY - 1
-
- kmin = 2
- kmax = NGLLZ - 1
- endif
-
- do k = kmin,kmax
- do j = jmin,jmax
- do i = imin,imax
-
- iglob = ibool(i,j,k,ispec)
-
- if (.not. SOURCES_CAN_BE_BURIED_EXT_MESH) then
- if ((.not. iglob_is_surface_external_mesh(iglob)) .or. (.not. ispec_is_surface_external_mesh(ispec))) then
- cycle
- endif
- endif
-
- ! keep this point if it is closer to the source
- dist = dsqrt((x_target_source-dble(xstore(iglob)))**2 &
- +(y_target_source-dble(ystore(iglob)))**2 &
- +(z_target_source-dble(zstore(iglob)))**2)
- if(dist < distmin) then
- distmin = dist
- ispec_selected_source(isource) = ispec
- ix_initial_guess_source = i
- iy_initial_guess_source = j
- iz_initial_guess_source = k
-
- ! store xi,eta,gamma and x,y,z of point found
- ! note: they have range [1.0d0,NGLLX/Y/Z], used for point sources
- ! see e.g. in compute_add_source_elastic.f90
- xi_source(isource) = dble(ix_initial_guess_source)
- eta_source(isource) = dble(iy_initial_guess_source)
- gamma_source(isource) = dble(iz_initial_guess_source)
-
- x_found_source(isource) = xstore(iglob)
- y_found_source(isource) = ystore(iglob)
- z_found_source(isource) = zstore(iglob)
-
- ! compute final distance between asked and found (converted to km)
- final_distance_source(isource) = dsqrt((x_target_source-x_found_source(isource))**2 + &
- (y_target_source-y_found_source(isource))**2 + (z_target_source-z_found_source(isource))**2)
-
- endif
-
- enddo
- enddo
- enddo
-
- ! end of loop on all the elements in current slice
- enddo
-
- if (ispec_selected_source(isource) == 0) then
- final_distance_source(isource) = HUGEVAL
- endif
-
- ! sets whether acoustic (1) or elastic (2)
- if( ispec_is_acoustic( ispec_selected_source(isource) ) ) then
- idomain(isource) = IDOMAIN_ACOUSTIC
- else if( ispec_is_elastic( ispec_selected_source(isource) ) ) then
- idomain(isource) = IDOMAIN_ELASTIC
- else
- idomain(isource) = 0
- endif
-
- ! get normal to the face of the hexahedra if receiver is on the surface
- if ((.not. SOURCES_CAN_BE_BURIED_EXT_MESH) .and. &
- .not. (ispec_selected_source(isource) == 0)) then
-
- ! note: at this point, xi_source,.. are in range [1.0d0,NGLLX/Y/Z] for point sources only,
- ! for non-point sources the range is limited to [2.0d0,NGLLX/Y/Z - 1]
- if( .not. USE_FORCE_POINT_SOURCE ) call exit_MPI(myrank,'error locate source: no point source at surface')
-
- ! initialize indices
- pt0_ix = -1
- pt0_iy = -1
- pt0_iz = -1
- pt1_ix = -1
- pt1_iy = -1
- pt1_iz = -1
- pt2_ix = -1
- pt2_iy = -1
- pt2_iz = -1
-
- ! we get two vectors of the face (three points) to compute the normal
- if (nint(xi_source(isource)) == 1 .and. &
- iglob_is_surface_external_mesh(ibool(1,2,2,ispec_selected_source(isource)))) then
- pt0_ix = 1
- pt0_iy = NGLLY
- pt0_iz = 1
- pt1_ix = 1
- pt1_iy = 1
- pt1_iz = 1
- pt2_ix = 1
- pt2_iy = NGLLY
- pt2_iz = NGLLZ
- endif
- if (nint(xi_source(isource)) == NGLLX .and. &
- iglob_is_surface_external_mesh(ibool(NGLLX,2,2,ispec_selected_source(isource)))) then
- pt0_ix = NGLLX
- pt0_iy = 1
- pt0_iz = 1
- pt1_ix = NGLLX
- pt1_iy = NGLLY
- pt1_iz = 1
- pt2_ix = NGLLX
- pt2_iy = 1
- pt2_iz = NGLLZ
- endif
- if (nint(eta_source(isource)) == 1 .and. &
- iglob_is_surface_external_mesh(ibool(2,1,2,ispec_selected_source(isource)))) then
- pt0_ix = 1
- pt0_iy = 1
- pt0_iz = 1
- pt1_ix = NGLLX
- pt1_iy = 1
- pt1_iz = 1
- pt2_ix = 1
- pt2_iy = 1
- pt2_iz = NGLLZ
- endif
- if (nint(eta_source(isource)) == NGLLY .and. &
- iglob_is_surface_external_mesh(ibool(2,NGLLY,2,ispec_selected_source(isource)))) then
- pt0_ix = NGLLX
- pt0_iy = NGLLY
- pt0_iz = 1
- pt1_ix = 1
- pt1_iy = NGLLY
- pt1_iz = 1
- pt2_ix = NGLLX
- pt2_iy = NGLLY
- pt2_iz = NGLLZ
- endif
- if (nint(gamma_source(isource)) == 1 .and. &
- iglob_is_surface_external_mesh(ibool(2,2,1,ispec_selected_source(isource)))) then
- pt0_ix = NGLLX
- pt0_iy = 1
- pt0_iz = 1
- pt1_ix = 1
- pt1_iy = 1
- pt1_iz = 1
- pt2_ix = NGLLX
- pt2_iy = NGLLY
- pt2_iz = 1
- endif
- if (nint(gamma_source(isource)) == NGLLZ .and. &
- iglob_is_surface_external_mesh(ibool(2,2,NGLLZ,ispec_selected_source(isource)))) then
- pt0_ix = 1
- pt0_iy = 1
- pt0_iz = NGLLZ
- pt1_ix = NGLLX
- pt1_iy = 1
- pt1_iz = NGLLZ
- pt2_ix = 1
- pt2_iy = NGLLY
- pt2_iz = NGLLZ
- endif
-
- if (pt0_ix<0 .or.pt0_iy<0 .or. pt0_iz<0 .or. &
- pt1_ix<0 .or. pt1_iy<0 .or. pt1_iz<0 .or. &
- pt2_ix<0 .or. pt2_iy<0 .or. pt2_iz<0) then
- call exit_mpi(myrank,'error in computing normal for sources.')
- endif
-
- u_vector(1) = xstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
- - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
- u_vector(2) = ystore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
- - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
- u_vector(3) = zstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
- - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
- v_vector(1) = xstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
- - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
- v_vector(2) = ystore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
- - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
- v_vector(3) = zstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
- - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
-
- ! cross product
- w_vector(1) = u_vector(2)*v_vector(3) - u_vector(3)*v_vector(2)
- w_vector(2) = u_vector(3)*v_vector(1) - u_vector(1)*v_vector(3)
- w_vector(3) = u_vector(1)*v_vector(2) - u_vector(2)*v_vector(1)
-
- ! normalize vector w
- w_vector(:) = w_vector(:)/sqrt(w_vector(1)**2+w_vector(2)**2+w_vector(3)**2)
-
- ! build the two other vectors for a direct base: we normalize u, and v=w^u
- u_vector(:) = u_vector(:)/sqrt(u_vector(1)**2+u_vector(2)**2+u_vector(3)**2)
- v_vector(1) = w_vector(2)*u_vector(3) - w_vector(3)*u_vector(2)
- v_vector(2) = w_vector(3)*u_vector(1) - w_vector(1)*u_vector(3)
- v_vector(3) = w_vector(1)*u_vector(2) - w_vector(2)*u_vector(1)
-
- ! build rotation matrice nu for seismograms
- ! East (u)
- nu_source(1,1,isource) = u_vector(1)
- nu_source(1,2,isource) = v_vector(1)
- nu_source(1,3,isource) = w_vector(1)
- ! North (v)
- nu_source(2,1,isource) = u_vector(2)
- nu_source(2,2,isource) = v_vector(2)
- nu_source(2,3,isource) = w_vector(2)
- ! Vertical (w)
- nu_source(3,1,isource) = u_vector(3)
- nu_source(3,2,isource) = v_vector(3)
- nu_source(3,3,isource) = w_vector(3)
-
- endif ! of if (.not. SOURCES_CAN_BE_BURIED_EXT_MESH)
-
-! *******************************************
-! find the best (xi,eta,gamma) for the source
-! *******************************************
-
- ! for point sources, the location will be exactly at a GLL point
- ! otherwise this tries to find best location
- if(.not. USE_FORCE_POINT_SOURCE) then
-
- ! uses actual location interpolators, in range [-1,1]
- xi = xigll(ix_initial_guess_source)
- eta = yigll(iy_initial_guess_source)
- gamma = zigll(iz_initial_guess_source)
-
- ! define coordinates of the control points of the element
- do ia=1,NGNOD
- iax = 0
- iay = 0
- iaz = 0
- if(iaddx(ia) == 0) then
- iax = 1
- else if(iaddx(ia) == 1) then
- iax = (NGLLX+1)/2
- else if(iaddx(ia) == 2) then
- iax = NGLLX
- else
- call exit_MPI(myrank,'incorrect value of iaddx')
- endif
-
- if(iaddy(ia) == 0) then
- iay = 1
- else if(iaddy(ia) == 1) then
- iay = (NGLLY+1)/2
- else if(iaddy(ia) == 2) then
- iay = NGLLY
- else
- call exit_MPI(myrank,'incorrect value of iaddy')
- endif
-
- if(iaddz(ia) == 0) then
- iaz = 1
- else if(iaddz(ia) == 1) then
- iaz = (NGLLZ+1)/2
- else if(iaddz(ia) == 2) then
- iaz = NGLLZ
- else
- call exit_MPI(myrank,'incorrect value of iaddz')
- endif
-
- iglob = ibool(iax,iay,iaz,ispec_selected_source(isource))
- xelm(ia) = dble(xstore(iglob))
- yelm(ia) = dble(ystore(iglob))
- zelm(ia) = dble(zstore(iglob))
-
- enddo
-
- ! iterate to solve the non linear system
- do iter_loop = 1,NUM_ITER
-
- ! recompute jacobian for the new point
- call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
-
- ! compute distance to target location
- dx = - (x - x_target_source)
- dy = - (y - y_target_source)
- dz = - (z - z_target_source)
-
- ! compute increments
- dxi = xix*dx + xiy*dy + xiz*dz
- deta = etax*dx + etay*dy + etaz*dz
- dgamma = gammax*dx + gammay*dy + gammaz*dz
-
- ! update values
- xi = xi + dxi
- eta = eta + deta
- gamma = gamma + dgamma
-
- ! impose that we stay in that element
- ! (useful if user gives a source outside the mesh for instance)
- if (xi > 1.d0) xi = 1.d0
- if (xi < -1.d0) xi = -1.d0
- if (eta > 1.d0) eta = 1.d0
- if (eta < -1.d0) eta = -1.d0
- if (gamma > 1.d0) gamma = 1.d0
- if (gamma < -1.d0) gamma = -1.d0
-
- enddo
-
- ! compute final coordinates of point found
- call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
-
- ! store xi,eta,gamma and x,y,z of point found
- ! note: xi/eta/gamma will be in range [-1,1]
- xi_source(isource) = xi
- eta_source(isource) = eta
- gamma_source(isource) = gamma
- x_found_source(isource) = x
- y_found_source(isource) = y
- z_found_source(isource) = z
-
- ! compute final distance between asked and found (converted to km)
- final_distance_source(isource) = dsqrt((x_target_source-x_found_source(isource))**2 + &
- (y_target_source-y_found_source(isource))**2 + (z_target_source-z_found_source(isource))**2)
-
- endif ! of if (.not. USE_FORCE_POINT_SOURCE)
-
- ! end of loop on all the sources
- enddo
-
- ! now gather information from all the nodes
- ngather = NSOURCES/NGATHER_SOURCES
- if (mod(NSOURCES,NGATHER_SOURCES)/= 0) ngather = ngather+1
- do ig = 1, ngather
- ns = (ig-1) * NGATHER_SOURCES + 1
- ne = min(ig*NGATHER_SOURCES, NSOURCES)
- ng = ne - ns + 1
-
- ispec_selected_source_all(:,:) = -1
-
- ! avoids warnings about temporary creations of arrays for function call by compiler
- allocate(tmp_i_local(ng),tmp_i_all_local(ng,0:NPROC-1),stat=ier)
- if( ier /= 0 ) stop 'error allocating array tmp_i_local'
- tmp_i_local(:) = ispec_selected_source(ns:ne)
- call gather_all_i(tmp_i_local,ng,tmp_i_all_local,ng,NPROC)
- ispec_selected_source_all(1:ng,:) = tmp_i_all_local(:,:)
-
- ! acoustic/elastic domain
- tmp_i_local(:) = idomain(ns:ne)
- call gather_all_i(tmp_i_local,ng,tmp_i_all_local,ng,NPROC)
- idomain_all(1:ng,:) = tmp_i_all_local(:,:)
-
- deallocate(tmp_i_local,tmp_i_all_local)
-
- ! avoids warnings about temporary creations of arrays for function call by compiler
- allocate(tmp_local(ng),tmp_all_local(ng,0:NPROC-1),stat=ier)
- if( ier /= 0 ) stop 'error allocating array tmp_local'
- tmp_local(:) = xi_source(ns:ne)
- call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
- xi_source_all(1:ng,:) = tmp_all_local(:,:)
-
- tmp_local(:) = eta_source(ns:ne)
- call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
- eta_source_all(1:ng,:) = tmp_all_local(:,:)
-
- tmp_local(:) = gamma_source(ns:ne)
- call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
- gamma_source_all(1:ng,:) = tmp_all_local(:,:)
-
- tmp_local(:) = final_distance_source(ns:ne)
- call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
- final_distance_source_all(1:ng,:) = tmp_all_local(:,:)
-
- tmp_local(:) = x_found_source(ns:ne)
- call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
- x_found_source_all(1:ng,:) = tmp_all_local(:,:)
-
- tmp_local(:) = y_found_source(ns:ne)
- call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
- y_found_source_all(1:ng,:) = tmp_all_local(:,:)
-
- tmp_local(:) = z_found_source(ns:ne)
- call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
- z_found_source_all(1:ng,:) = tmp_all_local(:,:)
-
- do i=1,3
- do j=1,3
- tmp_local(:) = nu_source(i,j,ns:ne)
- call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
- nu_source_all(i,j,1:ng,:) = tmp_all_local(:,:)
- enddo
- enddo
- deallocate(tmp_local,tmp_all_local)
-
- ! this is executed by main process only
- if(myrank == 0) then
-
- ! check that the gather operation went well
- if(any(ispec_selected_source_all(1:ng,:) == -1)) call exit_MPI(myrank,'gather operation failed for source')
-
- ! loop on all the sources
- do is = 1,ng
- isource = ns + is - 1
-
- ! loop on all the results to determine the best slice
- distmin = HUGEVAL
- do iprocloop = 0,NPROC-1
- if(final_distance_source_all(is,iprocloop) < distmin) then
- distmin = final_distance_source_all(is,iprocloop)
- islice_selected_source(isource) = iprocloop
- ispec_selected_source(isource) = ispec_selected_source_all(is,iprocloop)
- xi_source(isource) = xi_source_all(is,iprocloop)
- eta_source(isource) = eta_source_all(is,iprocloop)
- gamma_source(isource) = gamma_source_all(is,iprocloop)
- x_found_source(isource) = x_found_source_all(is,iprocloop)
- y_found_source(isource) = y_found_source_all(is,iprocloop)
- z_found_source(isource) = z_found_source_all(is,iprocloop)
- nu_source(:,:,isource) = nu_source_all(:,:,isource,iprocloop)
- idomain(isource) = idomain_all(is,iprocloop)
- endif
- enddo
- final_distance_source(isource) = distmin
-
- enddo
- endif !myrank
- enddo ! ngather
-
- if (myrank == 0) then
-
- do isource = 1,NSOURCES
-
- if(SHOW_DETAILS_LOCATE_SOURCE .or. NSOURCES == 1) then
-
- write(IMAIN,*)
- write(IMAIN,*) '*************************************'
- write(IMAIN,*) ' locating source ',isource
- write(IMAIN,*) '*************************************'
- write(IMAIN,*)
- write(IMAIN,*) 'source located in slice ',islice_selected_source(isource)
- write(IMAIN,*) ' in element ',ispec_selected_source(isource)
-
- if( idomain(isource) == IDOMAIN_ACOUSTIC ) then
- write(IMAIN,*) ' in acoustic domain'
- else if( idomain(isource) == IDOMAIN_ELASTIC ) then
- write(IMAIN,*) ' in elastic domain'
- else
- write(IMAIN,*) ' in unknown domain'
- endif
-
- write(IMAIN,*)
- if(USE_FORCE_POINT_SOURCE) then
- write(IMAIN,*) ' i index of source in that element: ',nint(xi_source(isource))
- write(IMAIN,*) ' j index of source in that element: ',nint(eta_source(isource))
- write(IMAIN,*) ' k index of source in that element: ',nint(gamma_source(isource))
- write(IMAIN,*)
- write(IMAIN,*) ' component direction: ',COMPONENT_FORCE_SOURCE
- write(IMAIN,*)
- write(IMAIN,*) ' nu1 = ',nu_source(1,:,isource)
- write(IMAIN,*) ' nu2 = ',nu_source(2,:,isource)
- write(IMAIN,*) ' nu3 = ',nu_source(3,:,isource)
- write(IMAIN,*)
- write(IMAIN,*) ' at (x,y,z) coordinates = ',x_found_source(isource),y_found_source(isource),z_found_source(isource)
-
- ! prints frequency content for point forces
- f0 = hdur(isource)
- t0_ricker = 1.2d0/f0
- write(IMAIN,*) ' using a source of dominant frequency ',f0
- write(IMAIN,*) ' lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
- write(IMAIN,*) ' lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
- write(IMAIN,*) ' t0_ricker = ',t0_ricker,'tshift_cmt = ',tshift_cmt(isource)
- write(IMAIN,*)
- write(IMAIN,*) ' half duration -> frequency: ',hdur(isource),' seconds**(-1)'
- else
- write(IMAIN,*) ' xi coordinate of source in that element: ',xi_source(isource)
- write(IMAIN,*) ' eta coordinate of source in that element: ',eta_source(isource)
- write(IMAIN,*) ' gamma coordinate of source in that element: ',gamma_source(isource)
- write(IMAIN,*)
- ! add message if source is a Heaviside
- if(hdur(isource) <= 5.*DT) then
- write(IMAIN,*)
- write(IMAIN,*) 'Source time function is a Heaviside, convolve later'
- write(IMAIN,*)
- endif
- write(IMAIN,*) ' half duration: ',hdur(isource),' seconds'
- endif
- write(IMAIN,*) ' time shift: ',tshift_cmt(isource),' seconds'
- write(IMAIN,*)
- write(IMAIN,*) 'original (requested) position of the source:'
- write(IMAIN,*)
- write(IMAIN,*) ' latitude: ',lat(isource)
- write(IMAIN,*) ' longitude: ',long(isource)
- write(IMAIN,*)
- if( SUPPRESS_UTM_PROJECTION ) then
- write(IMAIN,*) ' x: ',utm_x_source(isource)
- write(IMAIN,*) ' y: ',utm_y_source(isource)
- else
- write(IMAIN,*) ' UTM x: ',utm_x_source(isource)
- write(IMAIN,*) ' UTM y: ',utm_y_source(isource)
- endif
- if( USE_SOURCES_RECVS_Z ) then
- write(IMAIN,*) ' z: ',depth(isource),' km'
- else
- write(IMAIN,*) ' depth: ',depth(isource),' km'
- write(IMAIN,*) 'topo elevation: ',elevation(isource)
- endif
-
- write(IMAIN,*)
- write(IMAIN,*) 'position of the source that will be used:'
- write(IMAIN,*)
- if( SUPPRESS_UTM_PROJECTION ) then
- write(IMAIN,*) ' x: ',x_found_source(isource)
- write(IMAIN,*) ' y: ',y_found_source(isource)
- else
- write(IMAIN,*) ' UTM x: ',x_found_source(isource)
- write(IMAIN,*) ' UTM y: ',y_found_source(isource)
- endif
- if( USE_SOURCES_RECVS_Z ) then
- write(IMAIN,*) ' z: ',z_found_source(isource)
- else
- write(IMAIN,*) ' depth: ',dabs(z_found_source(isource) - elevation(isource))/1000.,' km'
- write(IMAIN,*) ' z: ',z_found_source(isource)
- endif
- write(IMAIN,*)
-
- ! display error in location estimate
- write(IMAIN,*) 'error in location of the source: ',sngl(final_distance_source(isource)),' m'
-
- ! add warning if estimate is poor
- ! (usually means source outside the mesh given by the user)
- if(final_distance_source(isource) > 3000.d0) then
- write(IMAIN,*)
- write(IMAIN,*) '*****************************************************'
- write(IMAIN,*) '*****************************************************'
- write(IMAIN,*) '***** WARNING: source location estimate is poor *****'
- write(IMAIN,*) '*****************************************************'
- write(IMAIN,*) '*****************************************************'
- endif
-
- endif ! end of detailed output to locate source
-
- ! checks CMTSOLUTION format for acoustic case
- if( idomain(isource) == IDOMAIN_ACOUSTIC ) then
- if( Mxx(isource) /= Myy(isource) .or. Myy(isource) /= Mzz(isource) .or. &
- Mxy(isource) > TINYVAL .or. Mxz(isource) > TINYVAL .or. Myz(isource) > TINYVAL ) then
- write(IMAIN,*)
- write(IMAIN,*) ' error CMTSOLUTION format for acoustic source:'
- write(IMAIN,*) ' acoustic source needs explosive moment tensor with'
- write(IMAIN,*) ' Mrr = Mtt = Mpp '
- write(IMAIN,*) ' and '
- write(IMAIN,*) ' Mrt = Mrp = Mtp = zero'
- write(IMAIN,*)
- call exit_mpi(myrank,'error acoustic source')
- endif
- endif
-
- ! checks source domain
- if( idomain(isource) /= IDOMAIN_ACOUSTIC .and. idomain(isource) /= IDOMAIN_ELASTIC ) then
- ! only acoustic/elastic domain implement yet
- call exit_MPI(myrank,'source located in unknown domain')
- endif
-
- ! end of loop on all the sources
- enddo
-
- if( .not. SHOW_DETAILS_LOCATE_SOURCE .and. NSOURCES > 1 ) then
- write(IMAIN,*)
- write(IMAIN,*) '*************************************'
- write(IMAIN,*) ' using sources ',NSOURCES
- write(IMAIN,*) '*************************************'
- write(IMAIN,*)
- endif
-
- if(PRINT_SOURCE_TIME_FUNCTION) then
- write(IMAIN,*)
- write(IMAIN,*) 'printing the source-time function'
- endif
-
- ! display maximum error in location estimate
- write(IMAIN,*)
- write(IMAIN,*) 'maximum error in location of the sources: ',sngl(maxval(final_distance_source)),' m'
- write(IMAIN,*)
-
- ! sets new utm coordinates for best locations
- utm_x_source(:) = x_found_source(:)
- utm_y_source(:) = y_found_source(:)
-
- endif ! end of section executed by main process only
-
- ! main process broadcasts the results to all the slices
- call bcast_all_i(islice_selected_source,NSOURCES)
- call bcast_all_i(ispec_selected_source,NSOURCES)
- call bcast_all_dp(xi_source,NSOURCES)
- call bcast_all_dp(eta_source,NSOURCES)
- call bcast_all_dp(gamma_source,NSOURCES)
- call bcast_all_dp(utm_x_source,NSOURCES)
- call bcast_all_dp(utm_y_source,NSOURCES)
-
- ! elapsed time since beginning of source detection
- if(myrank == 0) then
- tCPU = wtime() - time_start
- write(IMAIN,*)
- write(IMAIN,*) 'Elapsed time for detection of sources in seconds = ',tCPU
- write(IMAIN,*)
- write(IMAIN,*) 'End of source detection - done'
- write(IMAIN,*)
- ! output source information to a file so that we can load it and write to SU headers later
- open(unit=IOUT_SU,file=trim(OUTPUT_FILES)//'/output_list_sources.txt',status='unknown')
- do isource=1,NSOURCES
- write(IOUT_SU,*) x_found_source(isource),y_found_source(isource),z_found_source(isource)
- enddo
- close(IOUT_SU)
- endif
-
- end subroutine locate_source
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!----
+!---- locate_source finds the correct position of the source
+!----
+
+ subroutine locate_source(ibool,NSOURCES,myrank,NSPEC_AB,NGLOB_AB,xstore,ystore,zstore, &
+ xigll,yigll,zigll,NPROC, &
+ tshift_cmt,min_tshift_cmt_original,yr,jda,ho,mi,utm_x_source,utm_y_source, &
+ DT,hdur,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
+ islice_selected_source,ispec_selected_source, &
+ xi_source,eta_source,gamma_source, &
+ UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+ PRINT_SOURCE_TIME_FUNCTION, &
+ nu_source,iglob_is_surface_external_mesh,ispec_is_surface_external_mesh, &
+ ispec_is_acoustic,ispec_is_elastic, &
+ num_free_surface_faces,free_surface_ispec,free_surface_ijk)
+
+ implicit none
+
+ include "constants.h"
+
+ integer NPROC,UTM_PROJECTION_ZONE
+ integer NSPEC_AB,NGLOB_AB,NSOURCES
+
+ logical PRINT_SOURCE_TIME_FUNCTION,SUPPRESS_UTM_PROJECTION
+
+ double precision DT
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+
+ integer myrank
+
+ ! arrays containing coordinates of the points
+ real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: xstore,ystore,zstore
+
+ logical, dimension(NSPEC_AB) :: ispec_is_acoustic,ispec_is_elastic
+
+ integer yr,jda,ho,mi
+
+ double precision tshift_cmt(NSOURCES)
+ double precision sec,min_tshift_cmt_original
+
+ integer iprocloop
+
+ integer i,j,k,ispec,iglob,iglob_selected,inode,iface,isource
+ integer imin,imax,jmin,jmax,kmin,kmax,igll,jgll,kgll
+ integer iselected,jselected,iface_selected,iadjust,jadjust
+ integer iproc(1)
+
+ double precision, dimension(NSOURCES) :: utm_x_source,utm_y_source
+ double precision dist
+ double precision xi,eta,gamma,dx,dy,dz,dxi,deta
+
+ ! Gauss-Lobatto-Legendre points of integration
+ double precision xigll(NGLLX)
+ double precision yigll(NGLLY)
+ double precision zigll(NGLLZ)
+
+ ! topology of the control points of the surface element
+ integer iax,iay,iaz
+ integer iaddx(NGNOD),iaddy(NGNOD),iaddz(NGNOD)
+
+ ! coordinates of the control points of the surface element
+ double precision xelm(NGNOD),yelm(NGNOD),zelm(NGNOD)
+
+ integer iter_loop
+
+ integer ia
+ double precision x,y,z
+ double precision xix,xiy,xiz
+ double precision etax,etay,etaz
+ double precision gammax,gammay,gammaz
+ double precision dgamma
+
+ double precision final_distance_source(NSOURCES)
+
+ double precision x_target_source,y_target_source,z_target_source
+
+ double precision,dimension(1) :: altitude_source,distmin_ele
+ double precision,dimension(NPROC) :: distmin_ele_all,elevation_all
+ double precision,dimension(4) :: elevation_node,dist_node
+
+ integer islice_selected_source(NSOURCES)
+
+ ! timer MPI
+ double precision, external :: wtime
+ double precision time_start,tCPU
+
+ integer ispec_selected_source(NSOURCES)
+
+ integer ngather, ns, ne, ig, is, ng
+
+ integer, dimension(NGATHER_SOURCES,0:NPROC-1) :: ispec_selected_source_all
+ double precision, dimension(NGATHER_SOURCES,0:NPROC-1) :: xi_source_all,eta_source_all,gamma_source_all, &
+ final_distance_source_all,x_found_source_all,y_found_source_all,z_found_source_all
+ double precision, dimension(3,3,NGATHER_SOURCES,0:NPROC-1) :: nu_source_all
+
+ double precision, dimension(:), allocatable :: tmp_local
+ double precision, dimension(:,:),allocatable :: tmp_all_local
+
+ double precision hdur(NSOURCES)
+ double precision :: f0,t0_ricker
+
+ double precision, dimension(NSOURCES) :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
+ double precision, dimension(NSOURCES) :: xi_source,eta_source,gamma_source
+ double precision, dimension(3,3,NSOURCES) :: nu_source
+
+ double precision, dimension(NSOURCES) :: lat,long,depth
+ double precision moment_tensor(6,NSOURCES)
+
+ character(len=256) OUTPUT_FILES
+
+ double precision, dimension(NSOURCES) :: x_found_source,y_found_source,z_found_source
+ double precision, dimension(NSOURCES) :: elevation
+ double precision distmin
+
+ integer, dimension(:), allocatable :: tmp_i_local
+ integer, dimension(:,:),allocatable :: tmp_i_all_local
+
+ ! for surface locating and normal computing with external mesh
+ integer :: pt0_ix,pt0_iy,pt0_iz,pt1_ix,pt1_iy,pt1_iz,pt2_ix,pt2_iy,pt2_iz
+ integer :: num_free_surface_faces
+ real(kind=CUSTOM_REAL), dimension(3) :: u_vector,v_vector,w_vector
+ logical, dimension(NGLOB_AB) :: iglob_is_surface_external_mesh
+ logical, dimension(NSPEC_AB) :: ispec_is_surface_external_mesh
+ integer, dimension(num_free_surface_faces) :: free_surface_ispec
+ integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
+
+ integer ix_initial_guess_source,iy_initial_guess_source,iz_initial_guess_source
+ integer ier
+ integer, dimension(NSOURCES) :: idomain
+ integer, dimension(NGATHER_SOURCES,0:NPROC-1) :: idomain_all
+
+ ! get the base pathname for output files
+ call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
+
+ ! read all the sources (note: each process reads the source file)
+ call get_cmt(yr,jda,ho,mi,sec,tshift_cmt,hdur,lat,long,depth,moment_tensor, &
+ DT,NSOURCES,min_tshift_cmt_original)
+
+ ! define topology of the control element
+ call usual_hex_nodes(iaddx,iaddy,iaddz)
+
+ ! get MPI starting time
+ time_start = wtime()
+
+ ! user output
+ if( myrank == 0 ) then
+ if(SUPPRESS_UTM_PROJECTION ) then
+ write(IMAIN,*) 'no UTM projection:'
+ else
+ write(IMAIN,*) 'UTM projection:'
+ write(IMAIN,*) ' UTM zone: ',UTM_PROJECTION_ZONE
+ endif
+ if( USE_SOURCES_RECVS_Z ) then
+ write(IMAIN,*) ' (depth) becomes directly (z) coordinate'
+ endif
+ endif
+
+ ! loop on all the sources
+ do isource = 1,NSOURCES
+
+ !
+ ! r -> z, theta -> -y, phi -> x
+ !
+ ! Mrr = Mzz
+ ! Mtt = Myy
+ ! Mpp = Mxx
+ ! Mrt = -Myz
+ ! Mrp = Mxz
+ ! Mtp = -Mxy
+
+ ! get the moment tensor
+ Mzz(isource) = + moment_tensor(1,isource)
+ Mxx(isource) = + moment_tensor(3,isource)
+ Myy(isource) = + moment_tensor(2,isource)
+ Mxz(isource) = + moment_tensor(5,isource)
+ Myz(isource) = - moment_tensor(4,isource)
+ Mxy(isource) = - moment_tensor(6,isource)
+
+ ! gets UTM x,y
+ call utm_geo(long(isource),lat(isource),utm_x_source(isource),utm_y_source(isource), &
+ UTM_PROJECTION_ZONE,ILONGLAT2UTM,SUPPRESS_UTM_PROJECTION)
+
+ ! get approximate topography elevation at source long/lat coordinates
+ ! set distance to huge initial value
+ distmin = HUGEVAL
+ if(num_free_surface_faces > 0) then
+ iglob_selected = 1
+ ! loop only on points inside the element
+ ! exclude edges to ensure this point is not shared with other elements
+ imin = 2
+ imax = NGLLX - 1
+
+ jmin = 2
+ jmax = NGLLY - 1
+
+ iselected = 0
+ jselected = 0
+ iface_selected = 0
+ do iface=1,num_free_surface_faces
+ do j=jmin,jmax
+ do i=imin,imax
+
+ ispec = free_surface_ispec(iface)
+ igll = free_surface_ijk(1,(j-1)*NGLLY+i,iface)
+ jgll = free_surface_ijk(2,(j-1)*NGLLY+i,iface)
+ kgll = free_surface_ijk(3,(j-1)*NGLLY+i,iface)
+ iglob = ibool(igll,jgll,kgll,ispec)
+
+ ! keep this point if it is closer to the receiver
+ dist = dsqrt((utm_x_source(isource)-dble(xstore(iglob)))**2 + &
+ (utm_y_source(isource)-dble(ystore(iglob)))**2)
+ if(dist < distmin) then
+ distmin = dist
+ iglob_selected = iglob
+ iface_selected = iface
+ iselected = i
+ jselected = j
+ altitude_source(1) = zstore(iglob_selected)
+ endif
+ enddo
+ enddo
+ ! end of loop on all the elements on the free surface
+ end do
+ ! weighted mean at current point of topography elevation of the four closest nodes
+ ! set distance to huge initial value
+ distmin = HUGEVAL
+ do j=jselected,jselected+1
+ do i=iselected,iselected+1
+ inode = 1
+ do jadjust=0,1
+ do iadjust= 0,1
+ ispec = free_surface_ispec(iface_selected)
+ igll = free_surface_ijk(1,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+ jgll = free_surface_ijk(2,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+ kgll = free_surface_ijk(3,(j-jadjust-1)*NGLLY+i-iadjust,iface_selected)
+ iglob = ibool(igll,jgll,kgll,ispec)
+
+ elevation_node(inode) = zstore(iglob)
+ dist_node(inode) = dsqrt((utm_x_source(isource)-dble(xstore(iglob)))**2 + &
+ (utm_y_source(isource)-dble(ystore(iglob)))**2)
+ inode = inode + 1
+ end do
+ end do
+ dist = sum(dist_node)
+ if(dist < distmin) then
+ distmin = dist
+ altitude_source(1) = (dist_node(1)/dist)*elevation_node(1) + &
+ (dist_node(2)/dist)*elevation_node(2) + &
+ (dist_node(3)/dist)*elevation_node(3) + &
+ (dist_node(4)/dist)*elevation_node(4)
+ endif
+ end do
+ end do
+ end if
+ ! MPI communications to determine the best slice
+ distmin_ele(1)= distmin
+ call gather_all_dp(distmin_ele,1,distmin_ele_all,1,NPROC)
+ call gather_all_dp(altitude_source,1,elevation_all,1,NPROC)
+ if(myrank == 0) then
+ iproc = minloc(distmin_ele_all)
+ altitude_source(1) = elevation_all(iproc(1))
+ end if
+ call bcast_all_dp(altitude_source,1)
+ elevation(isource) = altitude_source(1)
+
+ ! orientation consistent with the UTM projection
+ ! East
+ nu_source(1,1,isource) = 1.d0
+ nu_source(1,2,isource) = 0.d0
+ nu_source(1,3,isource) = 0.d0
+ ! North
+ nu_source(2,1,isource) = 0.d0
+ nu_source(2,2,isource) = 1.d0
+ nu_source(2,3,isource) = 0.d0
+ ! Vertical
+ nu_source(3,1,isource) = 0.d0
+ nu_source(3,2,isource) = 0.d0
+ nu_source(3,3,isource) = 1.d0
+
+ x_target_source = utm_x_source(isource)
+ y_target_source = utm_y_source(isource)
+
+ ! source Z coordinate
+ if( USE_SOURCES_RECVS_Z ) then
+ ! alternative: depth is given as z value directly
+ z_target_source = depth(isource)
+ else
+ ! depth in CMTSOLUTION given in km
+ z_target_source = - depth(isource)*1000.0d0 + elevation(isource)
+ endif
+
+ ! set distance to huge initial value
+ distmin = HUGEVAL
+
+ ispec_selected_source(isource) = 0
+ ix_initial_guess_source = 0
+ iy_initial_guess_source = 0
+ iz_initial_guess_source = 0
+ do ispec=1,NSPEC_AB
+
+ ! define the interval in which we look for points
+ if(USE_FORCE_POINT_SOURCE) then
+ imin = 1
+ imax = NGLLX
+
+ jmin = 1
+ jmax = NGLLY
+
+ kmin = 1
+ kmax = NGLLZ
+
+ else
+ ! loop only on points inside the element
+ ! exclude edges to ensure this point is not shared with other elements
+ imin = 2
+ imax = NGLLX - 1
+
+ jmin = 2
+ jmax = NGLLY - 1
+
+ kmin = 2
+ kmax = NGLLZ - 1
+ endif
+
+ do k = kmin,kmax
+ do j = jmin,jmax
+ do i = imin,imax
+
+ iglob = ibool(i,j,k,ispec)
+
+ if (.not. SOURCES_CAN_BE_BURIED_EXT_MESH) then
+ if ((.not. iglob_is_surface_external_mesh(iglob)) .or. (.not. ispec_is_surface_external_mesh(ispec))) then
+ cycle
+ endif
+ endif
+
+ ! keep this point if it is closer to the source
+ dist = dsqrt((x_target_source-dble(xstore(iglob)))**2 &
+ +(y_target_source-dble(ystore(iglob)))**2 &
+ +(z_target_source-dble(zstore(iglob)))**2)
+ if(dist < distmin) then
+ distmin = dist
+ ispec_selected_source(isource) = ispec
+ ix_initial_guess_source = i
+ iy_initial_guess_source = j
+ iz_initial_guess_source = k
+
+ ! store xi,eta,gamma and x,y,z of point found
+ ! note: they have range [1.0d0,NGLLX/Y/Z], used for point sources
+ ! see e.g. in compute_add_source_elastic.f90
+ xi_source(isource) = dble(ix_initial_guess_source)
+ eta_source(isource) = dble(iy_initial_guess_source)
+ gamma_source(isource) = dble(iz_initial_guess_source)
+
+ x_found_source(isource) = xstore(iglob)
+ y_found_source(isource) = ystore(iglob)
+ z_found_source(isource) = zstore(iglob)
+
+ ! compute final distance between asked and found (converted to km)
+ final_distance_source(isource) = dsqrt((x_target_source-x_found_source(isource))**2 + &
+ (y_target_source-y_found_source(isource))**2 + (z_target_source-z_found_source(isource))**2)
+
+ endif
+
+ enddo
+ enddo
+ enddo
+
+ ! end of loop on all the elements in current slice
+ enddo
+
+ if (ispec_selected_source(isource) == 0) then
+ final_distance_source(isource) = HUGEVAL
+ endif
+
+ ! sets whether acoustic (1) or elastic (2)
+ if( ispec_is_acoustic( ispec_selected_source(isource) ) ) then
+ idomain(isource) = IDOMAIN_ACOUSTIC
+ else if( ispec_is_elastic( ispec_selected_source(isource) ) ) then
+ idomain(isource) = IDOMAIN_ELASTIC
+ else
+ idomain(isource) = 0
+ endif
+
+ ! get normal to the face of the hexahedra if receiver is on the surface
+ if ((.not. SOURCES_CAN_BE_BURIED_EXT_MESH) .and. &
+ .not. (ispec_selected_source(isource) == 0)) then
+
+ ! note: at this point, xi_source,.. are in range [1.0d0,NGLLX/Y/Z] for point sources only,
+ ! for non-point sources the range is limited to [2.0d0,NGLLX/Y/Z - 1]
+ if( .not. USE_FORCE_POINT_SOURCE ) call exit_MPI(myrank,'error locate source: no point source at surface')
+
+ ! initialize indices
+ pt0_ix = -1
+ pt0_iy = -1
+ pt0_iz = -1
+ pt1_ix = -1
+ pt1_iy = -1
+ pt1_iz = -1
+ pt2_ix = -1
+ pt2_iy = -1
+ pt2_iz = -1
+
+ ! we get two vectors of the face (three points) to compute the normal
+ if (nint(xi_source(isource)) == 1 .and. &
+ iglob_is_surface_external_mesh(ibool(1,2,2,ispec_selected_source(isource)))) then
+ pt0_ix = 1
+ pt0_iy = NGLLY
+ pt0_iz = 1
+ pt1_ix = 1
+ pt1_iy = 1
+ pt1_iz = 1
+ pt2_ix = 1
+ pt2_iy = NGLLY
+ pt2_iz = NGLLZ
+ endif
+ if (nint(xi_source(isource)) == NGLLX .and. &
+ iglob_is_surface_external_mesh(ibool(NGLLX,2,2,ispec_selected_source(isource)))) then
+ pt0_ix = NGLLX
+ pt0_iy = 1
+ pt0_iz = 1
+ pt1_ix = NGLLX
+ pt1_iy = NGLLY
+ pt1_iz = 1
+ pt2_ix = NGLLX
+ pt2_iy = 1
+ pt2_iz = NGLLZ
+ endif
+ if (nint(eta_source(isource)) == 1 .and. &
+ iglob_is_surface_external_mesh(ibool(2,1,2,ispec_selected_source(isource)))) then
+ pt0_ix = 1
+ pt0_iy = 1
+ pt0_iz = 1
+ pt1_ix = NGLLX
+ pt1_iy = 1
+ pt1_iz = 1
+ pt2_ix = 1
+ pt2_iy = 1
+ pt2_iz = NGLLZ
+ endif
+ if (nint(eta_source(isource)) == NGLLY .and. &
+ iglob_is_surface_external_mesh(ibool(2,NGLLY,2,ispec_selected_source(isource)))) then
+ pt0_ix = NGLLX
+ pt0_iy = NGLLY
+ pt0_iz = 1
+ pt1_ix = 1
+ pt1_iy = NGLLY
+ pt1_iz = 1
+ pt2_ix = NGLLX
+ pt2_iy = NGLLY
+ pt2_iz = NGLLZ
+ endif
+ if (nint(gamma_source(isource)) == 1 .and. &
+ iglob_is_surface_external_mesh(ibool(2,2,1,ispec_selected_source(isource)))) then
+ pt0_ix = NGLLX
+ pt0_iy = 1
+ pt0_iz = 1
+ pt1_ix = 1
+ pt1_iy = 1
+ pt1_iz = 1
+ pt2_ix = NGLLX
+ pt2_iy = NGLLY
+ pt2_iz = 1
+ endif
+ if (nint(gamma_source(isource)) == NGLLZ .and. &
+ iglob_is_surface_external_mesh(ibool(2,2,NGLLZ,ispec_selected_source(isource)))) then
+ pt0_ix = 1
+ pt0_iy = 1
+ pt0_iz = NGLLZ
+ pt1_ix = NGLLX
+ pt1_iy = 1
+ pt1_iz = NGLLZ
+ pt2_ix = 1
+ pt2_iy = NGLLY
+ pt2_iz = NGLLZ
+ endif
+
+ if (pt0_ix<0 .or.pt0_iy<0 .or. pt0_iz<0 .or. &
+ pt1_ix<0 .or. pt1_iy<0 .or. pt1_iz<0 .or. &
+ pt2_ix<0 .or. pt2_iy<0 .or. pt2_iz<0) then
+ call exit_mpi(myrank,'error in computing normal for sources.')
+ endif
+
+ u_vector(1) = xstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
+ - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+ u_vector(2) = ystore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
+ - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+ u_vector(3) = zstore(ibool(pt1_ix,pt1_iy,pt1_iz,ispec_selected_source(isource))) &
+ - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+ v_vector(1) = xstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
+ - xstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+ v_vector(2) = ystore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
+ - ystore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+ v_vector(3) = zstore(ibool(pt2_ix,pt2_iy,pt2_iz,ispec_selected_source(isource))) &
+ - zstore(ibool(pt0_ix,pt0_iy,pt0_iz,ispec_selected_source(isource)))
+
+ ! cross product
+ w_vector(1) = u_vector(2)*v_vector(3) - u_vector(3)*v_vector(2)
+ w_vector(2) = u_vector(3)*v_vector(1) - u_vector(1)*v_vector(3)
+ w_vector(3) = u_vector(1)*v_vector(2) - u_vector(2)*v_vector(1)
+
+ ! normalize vector w
+ w_vector(:) = w_vector(:)/sqrt(w_vector(1)**2+w_vector(2)**2+w_vector(3)**2)
+
+ ! build the two other vectors for a direct base: we normalize u, and v=w^u
+ u_vector(:) = u_vector(:)/sqrt(u_vector(1)**2+u_vector(2)**2+u_vector(3)**2)
+ v_vector(1) = w_vector(2)*u_vector(3) - w_vector(3)*u_vector(2)
+ v_vector(2) = w_vector(3)*u_vector(1) - w_vector(1)*u_vector(3)
+ v_vector(3) = w_vector(1)*u_vector(2) - w_vector(2)*u_vector(1)
+
+ ! build rotation matrice nu for seismograms
+ ! East (u)
+ nu_source(1,1,isource) = u_vector(1)
+ nu_source(1,2,isource) = v_vector(1)
+ nu_source(1,3,isource) = w_vector(1)
+ ! North (v)
+ nu_source(2,1,isource) = u_vector(2)
+ nu_source(2,2,isource) = v_vector(2)
+ nu_source(2,3,isource) = w_vector(2)
+ ! Vertical (w)
+ nu_source(3,1,isource) = u_vector(3)
+ nu_source(3,2,isource) = v_vector(3)
+ nu_source(3,3,isource) = w_vector(3)
+
+ endif ! of if (.not. SOURCES_CAN_BE_BURIED_EXT_MESH)
+
+! *******************************************
+! find the best (xi,eta,gamma) for the source
+! *******************************************
+
+ ! for point sources, the location will be exactly at a GLL point
+ ! otherwise this tries to find best location
+ if(.not. USE_FORCE_POINT_SOURCE) then
+
+ ! uses actual location interpolators, in range [-1,1]
+ xi = xigll(ix_initial_guess_source)
+ eta = yigll(iy_initial_guess_source)
+ gamma = zigll(iz_initial_guess_source)
+
+ ! define coordinates of the control points of the element
+ do ia=1,NGNOD
+ iax = 0
+ iay = 0
+ iaz = 0
+ if(iaddx(ia) == 0) then
+ iax = 1
+ else if(iaddx(ia) == 1) then
+ iax = (NGLLX+1)/2
+ else if(iaddx(ia) == 2) then
+ iax = NGLLX
+ else
+ call exit_MPI(myrank,'incorrect value of iaddx')
+ endif
+
+ if(iaddy(ia) == 0) then
+ iay = 1
+ else if(iaddy(ia) == 1) then
+ iay = (NGLLY+1)/2
+ else if(iaddy(ia) == 2) then
+ iay = NGLLY
+ else
+ call exit_MPI(myrank,'incorrect value of iaddy')
+ endif
+
+ if(iaddz(ia) == 0) then
+ iaz = 1
+ else if(iaddz(ia) == 1) then
+ iaz = (NGLLZ+1)/2
+ else if(iaddz(ia) == 2) then
+ iaz = NGLLZ
+ else
+ call exit_MPI(myrank,'incorrect value of iaddz')
+ endif
+
+ iglob = ibool(iax,iay,iaz,ispec_selected_source(isource))
+ xelm(ia) = dble(xstore(iglob))
+ yelm(ia) = dble(ystore(iglob))
+ zelm(ia) = dble(zstore(iglob))
+
+ enddo
+
+ ! iterate to solve the non linear system
+ do iter_loop = 1,NUM_ITER
+
+ ! recompute jacobian for the new point
+ call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+
+ ! compute distance to target location
+ dx = - (x - x_target_source)
+ dy = - (y - y_target_source)
+ dz = - (z - z_target_source)
+
+ ! compute increments
+ dxi = xix*dx + xiy*dy + xiz*dz
+ deta = etax*dx + etay*dy + etaz*dz
+ dgamma = gammax*dx + gammay*dy + gammaz*dz
+
+ ! update values
+ xi = xi + dxi
+ eta = eta + deta
+ gamma = gamma + dgamma
+
+ ! impose that we stay in that element
+ ! (useful if user gives a source outside the mesh for instance)
+ if (xi > 1.d0) xi = 1.d0
+ if (xi < -1.d0) xi = -1.d0
+ if (eta > 1.d0) eta = 1.d0
+ if (eta < -1.d0) eta = -1.d0
+ if (gamma > 1.d0) gamma = 1.d0
+ if (gamma < -1.d0) gamma = -1.d0
+
+ enddo
+
+ ! compute final coordinates of point found
+ call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+
+ ! store xi,eta,gamma and x,y,z of point found
+ ! note: xi/eta/gamma will be in range [-1,1]
+ xi_source(isource) = xi
+ eta_source(isource) = eta
+ gamma_source(isource) = gamma
+ x_found_source(isource) = x
+ y_found_source(isource) = y
+ z_found_source(isource) = z
+
+ ! compute final distance between asked and found (converted to km)
+ final_distance_source(isource) = dsqrt((x_target_source-x_found_source(isource))**2 + &
+ (y_target_source-y_found_source(isource))**2 + (z_target_source-z_found_source(isource))**2)
+
+ endif ! of if (.not. USE_FORCE_POINT_SOURCE)
+
+ ! end of loop on all the sources
+ enddo
+
+ ! now gather information from all the nodes
+ ngather = NSOURCES/NGATHER_SOURCES
+ if (mod(NSOURCES,NGATHER_SOURCES)/= 0) ngather = ngather+1
+ do ig = 1, ngather
+ ns = (ig-1) * NGATHER_SOURCES + 1
+ ne = min(ig*NGATHER_SOURCES, NSOURCES)
+ ng = ne - ns + 1
+
+ ispec_selected_source_all(:,:) = -1
+
+ ! avoids warnings about temporary creations of arrays for function call by compiler
+ allocate(tmp_i_local(ng),tmp_i_all_local(ng,0:NPROC-1),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array tmp_i_local'
+ tmp_i_local(:) = ispec_selected_source(ns:ne)
+ call gather_all_i(tmp_i_local,ng,tmp_i_all_local,ng,NPROC)
+ ispec_selected_source_all(1:ng,:) = tmp_i_all_local(:,:)
+
+ ! acoustic/elastic domain
+ tmp_i_local(:) = idomain(ns:ne)
+ call gather_all_i(tmp_i_local,ng,tmp_i_all_local,ng,NPROC)
+ idomain_all(1:ng,:) = tmp_i_all_local(:,:)
+
+ deallocate(tmp_i_local,tmp_i_all_local)
+
+ ! avoids warnings about temporary creations of arrays for function call by compiler
+ allocate(tmp_local(ng),tmp_all_local(ng,0:NPROC-1),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array tmp_local'
+ tmp_local(:) = xi_source(ns:ne)
+ call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+ xi_source_all(1:ng,:) = tmp_all_local(:,:)
+
+ tmp_local(:) = eta_source(ns:ne)
+ call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+ eta_source_all(1:ng,:) = tmp_all_local(:,:)
+
+ tmp_local(:) = gamma_source(ns:ne)
+ call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+ gamma_source_all(1:ng,:) = tmp_all_local(:,:)
+
+ tmp_local(:) = final_distance_source(ns:ne)
+ call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+ final_distance_source_all(1:ng,:) = tmp_all_local(:,:)
+
+ tmp_local(:) = x_found_source(ns:ne)
+ call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+ x_found_source_all(1:ng,:) = tmp_all_local(:,:)
+
+ tmp_local(:) = y_found_source(ns:ne)
+ call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+ y_found_source_all(1:ng,:) = tmp_all_local(:,:)
+
+ tmp_local(:) = z_found_source(ns:ne)
+ call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+ z_found_source_all(1:ng,:) = tmp_all_local(:,:)
+
+ do i=1,3
+ do j=1,3
+ tmp_local(:) = nu_source(i,j,ns:ne)
+ call gather_all_dp(tmp_local,ng,tmp_all_local,ng,NPROC)
+ nu_source_all(i,j,1:ng,:) = tmp_all_local(:,:)
+ enddo
+ enddo
+ deallocate(tmp_local,tmp_all_local)
+
+ ! this is executed by main process only
+ if(myrank == 0) then
+
+ ! check that the gather operation went well
+ if(any(ispec_selected_source_all(1:ng,:) == -1)) call exit_MPI(myrank,'gather operation failed for source')
+
+ ! loop on all the sources
+ do is = 1,ng
+ isource = ns + is - 1
+
+ ! loop on all the results to determine the best slice
+ distmin = HUGEVAL
+ do iprocloop = 0,NPROC-1
+ if(final_distance_source_all(is,iprocloop) < distmin) then
+ distmin = final_distance_source_all(is,iprocloop)
+ islice_selected_source(isource) = iprocloop
+ ispec_selected_source(isource) = ispec_selected_source_all(is,iprocloop)
+ xi_source(isource) = xi_source_all(is,iprocloop)
+ eta_source(isource) = eta_source_all(is,iprocloop)
+ gamma_source(isource) = gamma_source_all(is,iprocloop)
+ x_found_source(isource) = x_found_source_all(is,iprocloop)
+ y_found_source(isource) = y_found_source_all(is,iprocloop)
+ z_found_source(isource) = z_found_source_all(is,iprocloop)
+ nu_source(:,:,isource) = nu_source_all(:,:,isource,iprocloop)
+ idomain(isource) = idomain_all(is,iprocloop)
+ endif
+ enddo
+ final_distance_source(isource) = distmin
+
+ enddo
+ endif !myrank
+ enddo ! ngather
+
+ if (myrank == 0) then
+
+ do isource = 1,NSOURCES
+
+ if(SHOW_DETAILS_LOCATE_SOURCE .or. NSOURCES == 1) then
+
+ write(IMAIN,*)
+ write(IMAIN,*) '*************************************'
+ write(IMAIN,*) ' locating source ',isource
+ write(IMAIN,*) '*************************************'
+ write(IMAIN,*)
+ write(IMAIN,*) 'source located in slice ',islice_selected_source(isource)
+ write(IMAIN,*) ' in element ',ispec_selected_source(isource)
+
+ if( idomain(isource) == IDOMAIN_ACOUSTIC ) then
+ write(IMAIN,*) ' in acoustic domain'
+ else if( idomain(isource) == IDOMAIN_ELASTIC ) then
+ write(IMAIN,*) ' in elastic domain'
+ else
+ write(IMAIN,*) ' in unknown domain'
+ endif
+
+ write(IMAIN,*)
+ if(USE_FORCE_POINT_SOURCE) then
+ write(IMAIN,*) ' i index of source in that element: ',nint(xi_source(isource))
+ write(IMAIN,*) ' j index of source in that element: ',nint(eta_source(isource))
+ write(IMAIN,*) ' k index of source in that element: ',nint(gamma_source(isource))
+ write(IMAIN,*)
+ write(IMAIN,*) ' component direction: ',COMPONENT_FORCE_SOURCE
+ write(IMAIN,*)
+ write(IMAIN,*) ' nu1 = ',nu_source(1,:,isource)
+ write(IMAIN,*) ' nu2 = ',nu_source(2,:,isource)
+ write(IMAIN,*) ' nu3 = ',nu_source(3,:,isource)
+ write(IMAIN,*)
+ write(IMAIN,*) ' at (x,y,z) coordinates = ',x_found_source(isource),y_found_source(isource),z_found_source(isource)
+
+ ! prints frequency content for point forces
+ f0 = hdur(isource)
+ t0_ricker = 1.2d0/f0
+ write(IMAIN,*) ' using a source of dominant frequency ',f0
+ write(IMAIN,*) ' lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+ write(IMAIN,*) ' lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+ write(IMAIN,*) ' t0_ricker = ',t0_ricker,'tshift_cmt = ',tshift_cmt(isource)
+ write(IMAIN,*)
+ write(IMAIN,*) ' half duration -> frequency: ',hdur(isource),' seconds**(-1)'
+ else
+ write(IMAIN,*) ' xi coordinate of source in that element: ',xi_source(isource)
+ write(IMAIN,*) ' eta coordinate of source in that element: ',eta_source(isource)
+ write(IMAIN,*) ' gamma coordinate of source in that element: ',gamma_source(isource)
+ write(IMAIN,*)
+ ! add message if source is a Heaviside
+ if(hdur(isource) <= 5.*DT) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'Source time function is a Heaviside, convolve later'
+ write(IMAIN,*)
+ endif
+ write(IMAIN,*) ' half duration: ',hdur(isource),' seconds'
+ endif
+ write(IMAIN,*) ' time shift: ',tshift_cmt(isource),' seconds'
+ write(IMAIN,*)
+ write(IMAIN,*) 'original (requested) position of the source:'
+ write(IMAIN,*)
+ write(IMAIN,*) ' latitude: ',lat(isource)
+ write(IMAIN,*) ' longitude: ',long(isource)
+ write(IMAIN,*)
+ if( SUPPRESS_UTM_PROJECTION ) then
+ write(IMAIN,*) ' x: ',utm_x_source(isource)
+ write(IMAIN,*) ' y: ',utm_y_source(isource)
+ else
+ write(IMAIN,*) ' UTM x: ',utm_x_source(isource)
+ write(IMAIN,*) ' UTM y: ',utm_y_source(isource)
+ endif
+ if( USE_SOURCES_RECVS_Z ) then
+ write(IMAIN,*) ' z: ',depth(isource),' km'
+ else
+ write(IMAIN,*) ' depth: ',depth(isource),' km'
+ write(IMAIN,*) 'topo elevation: ',elevation(isource)
+ endif
+
+ write(IMAIN,*)
+ write(IMAIN,*) 'position of the source that will be used:'
+ write(IMAIN,*)
+ if( SUPPRESS_UTM_PROJECTION ) then
+ write(IMAIN,*) ' x: ',x_found_source(isource)
+ write(IMAIN,*) ' y: ',y_found_source(isource)
+ else
+ write(IMAIN,*) ' UTM x: ',x_found_source(isource)
+ write(IMAIN,*) ' UTM y: ',y_found_source(isource)
+ endif
+ if( USE_SOURCES_RECVS_Z ) then
+ write(IMAIN,*) ' z: ',z_found_source(isource)
+ else
+ write(IMAIN,*) ' depth: ',dabs(z_found_source(isource) - elevation(isource))/1000.,' km'
+ write(IMAIN,*) ' z: ',z_found_source(isource)
+ endif
+ write(IMAIN,*)
+
+ ! display error in location estimate
+ write(IMAIN,*) 'error in location of the source: ',sngl(final_distance_source(isource)),' m'
+
+ ! add warning if estimate is poor
+ ! (usually means source outside the mesh given by the user)
+ if(final_distance_source(isource) > 3000.d0) then
+ write(IMAIN,*)
+ write(IMAIN,*) '*****************************************************'
+ write(IMAIN,*) '*****************************************************'
+ write(IMAIN,*) '***** WARNING: source location estimate is poor *****'
+ write(IMAIN,*) '*****************************************************'
+ write(IMAIN,*) '*****************************************************'
+ endif
+
+ endif ! end of detailed output to locate source
+
+ ! checks CMTSOLUTION format for acoustic case
+ if( idomain(isource) == IDOMAIN_ACOUSTIC ) then
+ if( Mxx(isource) /= Myy(isource) .or. Myy(isource) /= Mzz(isource) .or. &
+ Mxy(isource) > TINYVAL .or. Mxz(isource) > TINYVAL .or. Myz(isource) > TINYVAL ) then
+ write(IMAIN,*)
+ write(IMAIN,*) ' error CMTSOLUTION format for acoustic source:'
+ write(IMAIN,*) ' acoustic source needs explosive moment tensor with'
+ write(IMAIN,*) ' Mrr = Mtt = Mpp '
+ write(IMAIN,*) ' and '
+ write(IMAIN,*) ' Mrt = Mrp = Mtp = zero'
+ write(IMAIN,*)
+ call exit_mpi(myrank,'error acoustic source')
+ endif
+ endif
+
+ ! checks source domain
+ if( idomain(isource) /= IDOMAIN_ACOUSTIC .and. idomain(isource) /= IDOMAIN_ELASTIC ) then
+ ! only acoustic/elastic domain implement yet
+ call exit_MPI(myrank,'source located in unknown domain')
+ endif
+
+ ! end of loop on all the sources
+ enddo
+
+ if( .not. SHOW_DETAILS_LOCATE_SOURCE .and. NSOURCES > 1 ) then
+ write(IMAIN,*)
+ write(IMAIN,*) '*************************************'
+ write(IMAIN,*) ' using sources ',NSOURCES
+ write(IMAIN,*) '*************************************'
+ write(IMAIN,*)
+ endif
+
+ if(PRINT_SOURCE_TIME_FUNCTION) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'printing the source-time function'
+ endif
+
+ ! display maximum error in location estimate
+ write(IMAIN,*)
+ write(IMAIN,*) 'maximum error in location of the sources: ',sngl(maxval(final_distance_source)),' m'
+ write(IMAIN,*)
+
+ ! sets new utm coordinates for best locations
+ utm_x_source(:) = x_found_source(:)
+ utm_y_source(:) = y_found_source(:)
+
+ endif ! end of section executed by main process only
+
+ ! main process broadcasts the results to all the slices
+ call bcast_all_i(islice_selected_source,NSOURCES)
+ call bcast_all_i(ispec_selected_source,NSOURCES)
+ call bcast_all_dp(xi_source,NSOURCES)
+ call bcast_all_dp(eta_source,NSOURCES)
+ call bcast_all_dp(gamma_source,NSOURCES)
+ call bcast_all_dp(utm_x_source,NSOURCES)
+ call bcast_all_dp(utm_y_source,NSOURCES)
+
+ ! elapsed time since beginning of source detection
+ if(myrank == 0) then
+ tCPU = wtime() - time_start
+ write(IMAIN,*)
+ write(IMAIN,*) 'Elapsed time for detection of sources in seconds = ',tCPU
+ write(IMAIN,*)
+ write(IMAIN,*) 'End of source detection - done'
+ write(IMAIN,*)
+ ! output source information to a file so that we can load it and write to SU headers later
+ open(unit=IOUT_SU,file=trim(OUTPUT_FILES)//'/output_list_sources.txt',status='unknown')
+ do isource=1,NSOURCES
+ write(IOUT_SU,*) x_found_source(isource),y_found_source(isource),z_found_source(isource)
+ enddo
+ close(IOUT_SU)
+ endif
+
+ end subroutine locate_source
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -75,7 +75,7 @@
end subroutine noise_distribution_direction
-subroutine noise_distribution_direction_non_uniform(xcoord_in,ycoord_in,zcoord_in, &
+ subroutine noise_distribution_dir_non_uni(xcoord_in,ycoord_in,zcoord_in, &
normal_x_noise_out,normal_y_noise_out,normal_z_noise_out, &
mask_noise_out)
implicit none
@@ -88,7 +88,7 @@
!PB VARIABLES TO DEFINE THE REGION OF NOISE
real(kind=CUSTOM_REAL) :: xcoord,ycoord,zcoord,xcoord_center,ycoord_center
real :: lon,lat,colat,lon_cn,lat_cn,dsigma,d,dmax
-
+
! coordinates "x/y/zcoord_in" actually contain r theta phi, therefore convert back to x y z
! call rthetaphi_2_xyz(xcoord,ycoord,zcoord, xcoord_in,ycoord_in,zcoord_in)
xcoord=xcoord_in
@@ -111,7 +111,9 @@
colat=atan(sqrt(xcoord**2+ycoord**2)/zcoord)
lat=(PI/2)-colat
-!PB CALCULATE THE DISTANCE BETWEEN CENTER OF NOISE REGION AND EACH POINT OF THE MODEL'S FREE SURFACE !PB dsigma IS THE "3D" ANGLE BETWEEN THE TWO POINTS, THEN d = R*dsigma
+!PB CALCULATE THE DISTANCE BETWEEN CENTER OF NOISE REGION AND EACH
+! POINT OF THE MODEL'S FREE SURFACE !PB dsigma IS THE "3D" ANGLE BETWEEN
+! THE TWO POINTS, THEN d = R*dsigma
dsigma=acos(sin(lon)*sin(lon_cn)+cos(lon)*cos(lon_cn)*cos(lat-lat_cn))
d=sqrt(xcoord**2+ycoord**2+zcoord**2)*dsigma
@@ -155,8 +157,8 @@
!******************************** change your noise characteristics above ****************************************
!*****************************************************************************************************************
- end subroutine noise_distribution_direction_non_uniform
-
+ end subroutine noise_distribution_dir_non_uni
+
! =============================================================================================================
! =============================================================================================================
! =============================================================================================================
@@ -165,7 +167,7 @@
subroutine read_parameters_noise(myrank,nrec,NSTEP,nmovie_points, &
islice_selected_rec,xi_receiver,eta_receiver,gamma_receiver,nu, &
noise_sourcearray,xigll,yigll,zigll, &
- ibool, &
+ ibool, &
xstore,ystore,zstore, &
irec_master_noise,normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
NSPEC_AB_VAL,NGLOB_AB_VAL, &
@@ -174,9 +176,9 @@
implicit none
include "constants.h"
! input parameters
- integer :: myrank, nrec, NSTEP, nmovie_points
+ integer :: myrank, nrec, NSTEP, nmovie_points
integer :: NSPEC_AB_VAL,NGLOB_AB_VAL
-
+
integer, dimension(nrec) :: islice_selected_rec
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB_VAL) :: ibool
@@ -187,23 +189,23 @@
double precision, dimension(NDIM,NDIM,nrec) :: nu
real(kind=CUSTOM_REAL), dimension(NGLOB_AB_VAL) :: xstore,ystore,zstore
- integer :: num_free_surface_faces
+ integer :: num_free_surface_faces
integer, dimension(num_free_surface_faces) :: free_surface_ispec
integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
-
+
logical, dimension(NSPEC_AB_VAL) :: ispec_is_acoustic
!daniel: from global code...
!integer, dimension(NSPEC2D_TOP_VAL) :: ibelm_top ! equals free_surface_ispec
!integer :: NSPEC2D_TOP_VAL ! equals num_free_surface_faces
!integer :: nspec_top ! equals num_free_surface_faces
-
+
! output parameters
integer :: irec_master_noise
real(kind=CUSTOM_REAL) :: noise_sourcearray(NDIM,NGLLX,NGLLY,NGLLZ,NSTEP)
real(kind=CUSTOM_REAL), dimension(nmovie_points) :: normal_x_noise,normal_y_noise,normal_z_noise,mask_noise
! local parameters
- integer :: ipoin,ispec,i,j,k,iglob,ios,iface,igll
+ integer :: ipoin,ispec,i,j,k,iglob,ios,iface,igll
real(kind=CUSTOM_REAL) :: normal_x_noise_out,normal_y_noise_out,normal_z_noise_out,mask_noise_out
character(len=256) :: filename
@@ -212,7 +214,7 @@
open(unit=IIN_NOISE,file=trim(filename),status='old',action='read',iostat=ios)
if( ios /= 0 ) &
call exit_MPI(myrank, 'file '//trim(filename)//' does NOT exist! This file contains the ID of the master receiver')
-
+
read(IIN_NOISE,*,iostat=ios) irec_master_noise
if( ios /= 0 ) call exit_MPI(myrank,'error reading file irec_master_noise')
close(IIN_NOISE)
@@ -274,11 +276,11 @@
! mask_noise_out)
! Setup for NOISE_TOMOGRAPHY by Piero Basini
- call noise_distribution_direction_non_uniform(xstore(iglob), &
+ call noise_distribution_dir_non_uni(xstore(iglob), &
ystore(iglob),zstore(iglob), &
normal_x_noise_out,normal_y_noise_out,normal_z_noise_out, &
mask_noise_out)
-
+
normal_x_noise(ipoin) = normal_x_noise_out
normal_y_noise(ipoin) = normal_y_noise_out
normal_z_noise(ipoin) = normal_z_noise_out
@@ -401,19 +403,19 @@
! size of single record
reclen=CUSTOM_REAL*NDIM*NGLLSQUARE*NSPEC_TOP
-
- ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer
+
+ ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer
if( NSPEC_TOP > 2147483647 / (CUSTOM_REAL * NGLLSQUARE * NDIM) ) then
print *,'reclen of noise surface_movie needed exceeds integer 4-byte limit: ',reclen
print *,' ',CUSTOM_REAL, NDIM, NGLLSQUARE, NSPEC_TOP
print*,'bit size fortran: ',bit_size(NSPEC_TOP)
call exit_MPI(myrank,"error NSPEC_TOP integer limit")
endif
-
+
! total file size
filesize = reclen
filesize = filesize*NSTEP
-
+
write(outputname,"('/proc',i6.6,'_surface_movie')") myrank
if (NOISE_TOMOGRAPHY==1) call open_file_abs_w(2,trim(LOCAL_PATH)//trim(outputname), &
len_trim(trim(LOCAL_PATH)//trim(outputname)), &
@@ -551,9 +553,9 @@
! adds noise source (only if this proc carries the noise)
if(myrank == islice_selected_rec(irec_master_noise)) then
-
+
ispec = ispec_selected_rec(irec_master_noise)
-
+
! adds nosie source contributions
do k=1,NGLLZ
do j=1,NGLLY
@@ -598,15 +600,15 @@
!integer :: NSPEC2D_TOP_VAL ! equals num_free_surface_faces
!integer, dimension(NSPEC2D_TOP_VAL) :: ibelm_top ! equals free_surface_ispec
!integer :: ispec2D ! equals iface
-
+
! local parameters
- integer :: ispec,i,j,k,iglob,iface,igll
+ integer :: ispec,i,j,k,iglob,iface,igll
real(kind=CUSTOM_REAL),dimension(NDIM,NGLLSQUARE,num_free_surface_faces) :: noise_surface_movie
integer(kind=8) :: Mesh_pointer
- logical :: GPU_MODE
+ logical :: GPU_MODE
if(.NOT. GPU_MODE) then
- ! loops over surface points
+ ! loops over surface points
! get coordinates of surface mesh and surface displacement
do iface = 1, num_free_surface_faces
@@ -658,7 +660,7 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB_VAL) :: accel ! both input and output
real(kind=CUSTOM_REAL), dimension(nmovie_points) :: normal_x_noise,normal_y_noise,normal_z_noise, mask_noise
- integer :: num_free_surface_faces
+ integer :: num_free_surface_faces
integer, dimension(num_free_surface_faces) :: free_surface_ispec
integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
real(kind=CUSTOM_REAL) :: free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces)
@@ -667,10 +669,10 @@
!integer :: nspec_top ! equals num_free_surface_faces
!integer :: NSPEC2D_TOP_VAL ! equal num_free_surface_faces
!integer, dimension(NSPEC2D_TOP_VAL) :: ibelm_top ! equals free_surface_ispec
- !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_VAL) :: jacobian2D_top
+ !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_VAL) :: jacobian2D_top
! equals to: free_surface_jacobian2Dw including weights wgllwgll
!real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-
+
! local parameters
integer :: ipoin,ispec,i,j,k,iglob,iface,igll
real(kind=CUSTOM_REAL) :: eta
@@ -680,19 +682,19 @@
integer(kind=8) :: Mesh_pointer
logical :: GPU_MODE
integer :: NOISE_TOMOGRAPHY
-
+
! read surface movie
call read_abs(2,noise_surface_movie,CUSTOM_REAL*NDIM*NGLLSQUARE*num_free_surface_faces,it)
if(GPU_MODE) then
- call noise_read_add_surface_movie_cuda(Mesh_pointer, noise_surface_movie,&
- num_free_surface_faces,NOISE_TOMOGRAPHY)
+ call noise_read_add_surface_movie_cu(Mesh_pointer, noise_surface_movie,&
+ num_free_surface_faces,NOISE_TOMOGRAPHY)
else ! GPU_MODE==0
! get coordinates of surface mesh and surface displacement
ipoin = 0
- ! loops over surface points
+ ! loops over surface points
! puts noise distrubution and direction onto the surface points
do iface = 1, num_free_surface_faces
@@ -711,7 +713,7 @@
noise_surface_movie(3,igll,iface) * normal_z_noise(ipoin)
accel(1,iglob) = accel(1,iglob) + eta * mask_noise(ipoin) * normal_x_noise(ipoin) &
- * free_surface_jacobian2Dw(igll,iface)
+ * free_surface_jacobian2Dw(igll,iface)
accel(2,iglob) = accel(2,iglob) + eta * mask_noise(ipoin) * normal_y_noise(ipoin) &
* free_surface_jacobian2Dw(igll,iface)
accel(3,iglob) = accel(3,iglob) + eta * mask_noise(ipoin) * normal_z_noise(ipoin) &
@@ -742,13 +744,13 @@
integer :: it
integer :: nmovie_points
integer :: NSPEC_AB_VAL,NGLOB_AB_VAL
-
+
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB_VAL) :: ibool
real(kind=CUSTOM_REAL) :: deltat
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB_VAL) :: displ
real(kind=CUSTOM_REAL), dimension(nmovie_points) :: normal_x_noise,normal_y_noise,normal_z_noise
- integer :: num_free_surface_faces
+ integer :: num_free_surface_faces
integer, dimension(num_free_surface_faces) :: free_surface_ispec
integer, dimension(3,NGLLSQUARE,num_free_surface_faces) :: free_surface_ijk
@@ -768,7 +770,7 @@
! GPU_MODE parameters
integer(kind=8) :: Mesh_pointer
logical :: GPU_MODE
-
+
! read surface movie, needed for Sigma_kl
call read_abs(2,noise_surface_movie,CUSTOM_REAL*NDIM*NGLLSQUARE*num_free_surface_faces,it)
@@ -806,7 +808,7 @@
enddo
else ! GPU_MODE==1
- call compute_kernels_strength_noise_cuda(Mesh_pointer, noise_surface_movie,num_free_surface_faces,deltat)
+ call compute_kernels_strgth_noise_cu(Mesh_pointer, noise_surface_movie,num_free_surface_faces,deltat)
endif ! GPU_MODE
end subroutine compute_kernels_strength_noise
@@ -828,7 +830,7 @@
character(len=256) :: prname
call create_name_database(prname,myrank,LOCAL_PATH)
-
+
open(unit=IOUT_NOISE,file=trim(prname)//'sigma_kernel.bin',status='unknown', &
form='unformatted',action='write')
write(IOUT_NOISE) Sigma_kl
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -158,7 +158,7 @@
seismograms_a(:,:,:) = 0._CUSTOM_REAL
endif
- ! synchronize all the processes
+ ! synchronize all the processes
call sync_all()
! prepares attenuation arrays
@@ -542,21 +542,21 @@
! size of single record
b_reclen_field = CUSTOM_REAL * NDIM * NGLLSQUARE * num_abs_boundary_faces
- ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer
+ ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer
if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NDIM * NGLLSQUARE) ) then
print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_field
print *,' ',CUSTOM_REAL, NDIM, NGLLSQUARE, num_abs_boundary_faces
print*,'bit size fortran: ',bit_size(b_reclen_field)
call exit_MPI(myrank,"error b_reclen_field integer limit")
endif
-
+
! total file size
filesize = b_reclen_field
filesize = filesize*NSTEP
if (SIMULATION_TYPE == 3) then
! opens existing files
-
+
! uses fortran routines for reading
!open(unit=IOABS,file=trim(prname)//'absorb_field.bin',status='old',&
! action='read',form='unformatted',access='direct', &
@@ -591,14 +591,14 @@
! size of single record
b_reclen_potential = CUSTOM_REAL * NGLLSQUARE * num_abs_boundary_faces
- ! check integer size limit: size of b_reclen_potential must fit onto an 4-byte integer
+ ! check integer size limit: size of b_reclen_potential must fit onto an 4-byte integer
if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NGLLSQUARE) ) then
print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_potential
print *,' ',CUSTOM_REAL, NGLLSQUARE, num_abs_boundary_faces
print*,'bit size fortran: ',bit_size(b_reclen_potential)
call exit_MPI(myrank,"error b_reclen_potential integer limit")
endif
-
+
! total file size (two lines to implicitly convert to 8-byte integers)
filesize = b_reclen_potential
filesize = filesize*NSTEP
@@ -610,7 +610,7 @@
! print*,'file size fortran: ',filesize
! print*,'file bit size fortran: ',bit_size(filesize)
!endif
-
+
if (SIMULATION_TYPE == 3) then
! opens existing files
! uses fortran routines for reading
@@ -618,7 +618,7 @@
! action='read',form='unformatted',access='direct', &
! recl=b_reclen_potential+2*4,iostat=ier )
!if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_potential.bin file')
-
+
! uses c routines for faster reading
call open_file_abs_r(1,trim(prname)//'absorb_potential.bin', &
len_trim(trim(prname)//'absorb_potential.bin'), &
@@ -638,9 +638,8 @@
endif
endif
-
else
- ! dummy array
+ ! needs dummy array
b_num_abs_boundary_faces = 1
if( ELASTIC_SIMULATION ) then
allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
@@ -651,10 +650,22 @@
allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
endif
+ endif
+ else ! ABSORBING_CONDITIONS
+ ! needs dummy array
+ b_num_abs_boundary_faces = 1
+ if( ELASTIC_SIMULATION ) then
+ allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array b_absorb_field'
+ endif
+ if( ACOUSTIC_SIMULATION ) then
+ allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
endif
endif
+
end subroutine prepare_timerun_adjoint
!
@@ -677,11 +688,11 @@
! for noise simulations
if ( NOISE_TOMOGRAPHY /= 0 ) then
- ! checks if free surface is defined
+ ! checks if free surface is defined
if( num_free_surface_faces == 0 ) then
stop 'error: noise simulations need a free surface'
endif
-
+
! allocates arrays
allocate(noise_sourcearray(NDIM,NGLLX,NGLLY,NGLLZ,NSTEP),stat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error allocating noise source array')
@@ -738,15 +749,16 @@
implicit none
real :: free_mb,used_mb,total_mb
-
+ integer :: ncuda_devices,ncuda_devices_min,ncuda_devices_max
+
! GPU_MODE now defined in Par_file
if(myrank == 0 ) then
write(IMAIN,*)
write(IMAIN,*) "GPU_MODE Active. Preparing Fields and Constants on Device."
write(IMAIN,*)
endif
-
- ! prepares general fields on GPU
+
+ ! prepares general fields on GPU
call prepare_constants_device(Mesh_pointer, &
NGLLX, NSPEC_AB, NGLOB_AB, &
xix, xiy, xiz, etax,etay,etaz, gammax, gammay, gammaz, &
@@ -762,56 +774,85 @@
abs_boundary_jacobian2Dw, &
num_abs_boundary_faces, &
ispec_is_inner, &
- NSOURCES, sourcearrays, islice_selected_source, ispec_selected_source, &
- number_receiver_global, ispec_selected_rec, nrec, nrec_local, &
- SIMULATION_TYPE)
+ NSOURCES, nsources_local, &
+ sourcearrays, islice_selected_source, ispec_selected_source, &
+ number_receiver_global, ispec_selected_rec, &
+ nrec, nrec_local, &
+ SIMULATION_TYPE,ncuda_devices)
- ! prepares fields on GPU for acoustic simulations
- if( ACOUSTIC_SIMULATION ) &
+ call min_all_i(ncuda_devices,ncuda_devices_min)
+ call max_all_i(ncuda_devices,ncuda_devices_max)
+
+ ! prepares fields on GPU for acoustic simulations
+ if( ACOUSTIC_SIMULATION ) then
call prepare_fields_acoustic_device(Mesh_pointer,rmass_acoustic,rhostore,kappastore, &
num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
ispec_is_acoustic, &
- NOISE_TOMOGRAPHY,num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
+ NOISE_TOMOGRAPHY,num_free_surface_faces, &
+ free_surface_ispec,free_surface_ijk, &
ABSORBING_CONDITIONS,b_reclen_potential,b_absorb_potential, &
- SIMULATION_TYPE,rho_ac_kl,kappa_ac_kl, &
ELASTIC_SIMULATION, num_coupling_ac_el_faces, &
coupling_ac_el_ispec,coupling_ac_el_ijk, &
coupling_ac_el_normal,coupling_ac_el_jacobian2Dw)
-
- ! prepares fields on GPU for elastic simulations
- if( ELASTIC_SIMULATION ) &
+
+ if( SIMULATION_TYPE == 3 ) &
+ call prepare_fields_acoustic_adj_dev(Mesh_pointer, &
+ SIMULATION_TYPE,rho_ac_kl,kappa_ac_kl, &
+ APPROXIMATE_HESS_KL)
+
+ endif
+
+ ! prepares fields on GPU for elastic simulations
+ if( ELASTIC_SIMULATION ) then
call prepare_fields_elastic_device(Mesh_pointer, NDIM*NGLOB_AB, &
rmass,rho_vp,rho_vs, &
num_phase_ispec_elastic,phase_ispec_inner_elastic, &
ispec_is_elastic, &
ABSORBING_CONDITIONS,b_absorb_field,b_reclen_field, &
- SIMULATION_TYPE,rho_kl,mu_kl,kappa_kl, &
+ SIMULATION_TYPE,SAVE_FORWARD, &
COMPUTE_AND_STORE_STRAIN, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz, &
+ ATTENUATION, &
+ size(R_xx), &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ OCEANS,rmass_ocean_load, &
+ NOISE_TOMOGRAPHY, &
+ free_surface_normal,free_surface_ispec,free_surface_ijk, &
+ num_free_surface_faces)
+
+ if( SIMULATION_TYPE == 3 ) &
+ call prepare_fields_elastic_adj_dev(Mesh_pointer, NDIM*NGLOB_AB, &
+ SIMULATION_TYPE, &
+ rho_kl,mu_kl,kappa_kl, &
+ COMPUTE_AND_STORE_STRAIN, &
epsilon_trace_over_3, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz, &
b_epsilon_trace_over_3, &
ATTENUATION,size(R_xx), &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
b_alphaval,b_betaval,b_gammaval, &
- OCEANS,rmass_ocean_load, &
- free_surface_normal,num_free_surface_faces)
+ APPROXIMATE_HESS_KL)
+ endif
+
! prepares needed receiver array for adjoint runs
if( SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3 ) &
- call prepare_adjoint_sim2_or_3_constants_device(Mesh_pointer, &
+ call prepare_sim2_or_3_const_device(Mesh_pointer, &
islice_selected_rec,size(islice_selected_rec))
-
- ! prepares fields on GPU for noise simulations
+
+ ! prepares fields on GPU for noise simulations
if ( NOISE_TOMOGRAPHY > 0 ) then
! note: noise tomography is only supported for elastic domains so far.
-
+
! copies noise arrays to GPU
call prepare_fields_noise_device(Mesh_pointer, NSPEC_AB, NGLOB_AB, &
- free_surface_ispec,free_surface_ijk,num_free_surface_faces,size(free_surface_ijk), &
+ free_surface_ispec, &
+ free_surface_ijk, &
+ num_free_surface_faces, &
SIMULATION_TYPE,NOISE_TOMOGRAPHY, &
NSTEP,noise_sourcearray, &
normal_x_noise,normal_y_noise,normal_z_noise, &
@@ -821,34 +862,39 @@
endif ! NOISE_TOMOGRAPHY
! sends initial data to device
-
+
! puts acoustic initial fields onto GPU
if( ACOUSTIC_SIMULATION ) then
- call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
+ call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
if( SIMULATION_TYPE == 3 ) &
- call transfer_b_fields_acoustic_to_device(NGLOB_AB,b_potential_acoustic, &
- b_potential_dot_acoustic,b_potential_dot_dot_acoustic,Mesh_pointer)
+ call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
+ b_potential_dot_acoustic,b_potential_dot_dot_acoustic,Mesh_pointer)
endif
! puts elastic initial fields onto GPU
if( ELASTIC_SIMULATION ) then
! transfer forward and backward fields to device with initial values
- call transfer_fields_to_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ call transfer_fields_el_to_device(NDIM*NGLOB_AB,displ,veloc,accel,Mesh_pointer)
if(SIMULATION_TYPE == 3) &
- call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc, b_accel,Mesh_pointer)
+ call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
endif
- ! outputs usage
+ ! outputs GPU usage to files for all processes
+ call output_free_device_memory(myrank)
+
+ ! outputs usage for main process
if( myrank == 0 ) then
+ write(IMAIN,*)" GPU number of devices per node: min =",ncuda_devices_min
+ write(IMAIN,*)" max =",ncuda_devices_max
+ write(IMAIN,*)
+
call get_free_device_memory(free_mb,used_mb,total_mb)
write(IMAIN,*)" GPU usage: free =",free_mb," MB",nint(free_mb/total_mb*100.0),"%"
write(IMAIN,*)" used =",used_mb," MB",nint(used_mb/total_mb*100.0),"%"
write(IMAIN,*)" total =",total_mb," MB",nint(total_mb/total_mb*100.0),"%"
write(IMAIN,*)
- endif
+ endif
- ! outputs GPU usage to files for all processes
- call output_free_device_memory(myrank)
-
+
end subroutine prepare_timerun_GPU
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -34,7 +34,7 @@
use specfem_par_poroelastic
implicit none
real(kind=CUSTOM_REAL):: minl,maxl,min_all,max_all
- integer :: ier
+ integer :: ier,inum
! start reading the databasesa
@@ -163,14 +163,21 @@
factor_common(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array one_minus_sum_beta etc.'
+ ! reads mass matrices
read(27) rmass
+
if( OCEANS ) then
! ocean mass matrix
allocate(rmass_ocean_load(NGLOB_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array rmass_ocean_load'
read(27) rmass_ocean_load
+ else
+ ! dummy allocation
+ allocate(rmass_ocean_load(1),stat=ier)
+ if( ier /= 0 ) stop 'error allocating dummy array rmass_ocean_load'
endif
- !pll
+
+ !pll material parameters for stacey conditions
read(27) rho_vp
read(27) rho_vs
@@ -209,11 +216,13 @@
abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces), &
abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces), &
abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array abs_boundary_ispec etc.'
- read(27) abs_boundary_ispec
- read(27) abs_boundary_ijk
- read(27) abs_boundary_jacobian2Dw
- read(27) abs_boundary_normal
+ if( ier /= 0 ) stop 'error allocating array abs_boundary_ispec etc.'
+ if( num_abs_boundary_faces > 0 ) then
+ read(27) abs_boundary_ispec
+ read(27) abs_boundary_ijk
+ read(27) abs_boundary_jacobian2Dw
+ read(27) abs_boundary_normal
+ endif
! free surface
read(27) num_free_surface_faces
@@ -221,34 +230,43 @@
free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces), &
free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces), &
free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array free_surface_ispec etc.'
- read(27) free_surface_ispec
- read(27) free_surface_ijk
- read(27) free_surface_jacobian2Dw
- read(27) free_surface_normal
-
+ if( ier /= 0 ) stop 'error allocating array free_surface_ispec etc.'
+ if( num_free_surface_faces > 0 ) then
+ read(27) free_surface_ispec
+ read(27) free_surface_ijk
+ read(27) free_surface_jacobian2Dw
+ read(27) free_surface_normal
+ endif
! acoustic-elastic coupling surface
read(27) num_coupling_ac_el_faces
allocate(coupling_ac_el_normal(NDIM,NGLLSQUARE,num_coupling_ac_el_faces), &
coupling_ac_el_jacobian2Dw(NGLLSQUARE,num_coupling_ac_el_faces), &
coupling_ac_el_ijk(3,NGLLSQUARE,num_coupling_ac_el_faces), &
coupling_ac_el_ispec(num_coupling_ac_el_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array coupling_ac_el_normal etc.'
- read(27) coupling_ac_el_ispec
- read(27) coupling_ac_el_ijk
- read(27) coupling_ac_el_jacobian2Dw
- read(27) coupling_ac_el_normal
+ if( ier /= 0 ) stop 'error allocating array coupling_ac_el_normal etc.'
+ if( num_coupling_ac_el_faces > 0 ) then
+ read(27) coupling_ac_el_ispec
+ read(27) coupling_ac_el_ijk
+ read(27) coupling_ac_el_jacobian2Dw
+ read(27) coupling_ac_el_normal
+ endif
! MPI interfaces
read(27) num_interfaces_ext_mesh
- read(27) max_nibool_interfaces_ext_mesh
allocate(my_neighbours_ext_mesh(num_interfaces_ext_mesh), &
- nibool_interfaces_ext_mesh(num_interfaces_ext_mesh), &
- ibool_interfaces_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ nibool_interfaces_ext_mesh(num_interfaces_ext_mesh),stat=ier)
if( ier /= 0 ) stop 'error allocating array my_neighbours_ext_mesh etc.'
- read(27) my_neighbours_ext_mesh
- read(27) nibool_interfaces_ext_mesh
- read(27) ibool_interfaces_ext_mesh
+ if( num_interfaces_ext_mesh > 0 ) then
+ read(27) max_nibool_interfaces_ext_mesh
+ allocate(ibool_interfaces_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibool_interfaces_ext_mesh'
+ read(27) my_neighbours_ext_mesh
+ read(27) nibool_interfaces_ext_mesh
+ read(27) ibool_interfaces_ext_mesh
+ else
+ max_nibool_interfaces_ext_mesh = 0
+ allocate(ibool_interfaces_ext_mesh(0,0),stat=ier)
+ endif
if( ANISOTROPY ) then
read(27) c11store
@@ -276,6 +294,21 @@
close(27)
+ call sum_all_i(count(ispec_is_acoustic(:)),inum)
+ if( myrank == 0 ) then
+ write(IMAIN,*) 'total acoustic elements:',inum
+ endif
+ call sum_all_i(count(ispec_is_elastic(:)),inum)
+ if( myrank == 0 ) then
+ write(IMAIN,*) 'total elastic elements :',inum
+ endif
+ call sum_all_i(num_interfaces_ext_mesh,inum)
+ if(myrank == 0) then
+ write(IMAIN,*) 'number of MPI partition interfaces: ',inum
+ write(IMAIN,*)
+ endif
+
+
! MPI communications
allocate(buffer_send_vector_ext_mesh(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh), &
buffer_recv_vector_ext_mesh(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh), &
@@ -290,6 +323,9 @@
! locate inner and outer elements
call rmd_setup_inner_outer_elemnts()
+!daniel: todo mesh coloring
+! call rmd_setup_color_perm()
+
! gets model dimensions
minl = minval( xstore )
maxl = maxval( xstore )
@@ -307,8 +343,10 @@
! check courant criteria on mesh
if( ELASTIC_SIMULATION ) then
- call check_mesh_resolution(myrank,NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore, &
- kappastore,mustore,rho_vp,rho_vs,DT,model_speed_max,min_resolved_period )
+ call check_mesh_resolution(myrank,NSPEC_AB,NGLOB_AB, &
+ ibool,xstore,ystore,zstore, &
+ kappastore,mustore,rho_vp,rho_vs, &
+ DT,model_speed_max,min_resolved_period )
else if( ACOUSTIC_SIMULATION ) then
allocate(rho_vp(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array rho_vp'
@@ -316,8 +354,10 @@
if( ier /= 0 ) stop 'error allocating array rho_vs'
rho_vp = sqrt( kappastore / rhostore ) * rhostore
rho_vs = 0.0_CUSTOM_REAL
- call check_mesh_resolution(myrank,NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore, &
- kappastore,mustore,rho_vp,rho_vs,DT,model_speed_max,min_resolved_period )
+ call check_mesh_resolution(myrank,NSPEC_AB,NGLOB_AB, &
+ ibool,xstore,ystore,zstore, &
+ kappastore,mustore,rho_vp,rho_vs, &
+ DT,model_speed_max,min_resolved_period )
deallocate(rho_vp,rho_vs)
endif
@@ -340,6 +380,7 @@
integer :: iinterface,ier
character(len=256) :: filename
logical,dimension(:),allocatable :: iglob_is_inner
+ real :: percentage_edge
! allocates arrays
allocate(ispec_is_inner(NSPEC_AB),stat=ier)
@@ -363,7 +404,7 @@
do j = 1, NGLLY
do i = 1, NGLLX
iglob = ibool(i,j,k,ispec)
- ispec_is_inner(ispec) = iglob_is_inner(iglob) .and. ispec_is_inner(ispec)
+ ispec_is_inner(ispec) = ( iglob_is_inner(iglob) .and. ispec_is_inner(ispec) )
enddo
enddo
enddo
@@ -397,6 +438,8 @@
! stores indices of inner and outer elements for faster(?) computation
num_phase_ispec_acoustic = max(nspec_inner_acoustic,nspec_outer_acoustic)
allocate( phase_ispec_inner_acoustic(num_phase_ispec_acoustic,2),stat=ier)
+ phase_ispec_inner_acoustic(:,:) = 0
+
if( ier /= 0 ) stop 'error allocating array phase_ispec_inner_acoustic'
nspec_inner_acoustic = 0
nspec_outer_acoustic = 0
@@ -432,8 +475,12 @@
! stores indices of inner and outer elements for faster(?) computation
num_phase_ispec_elastic = max(nspec_inner_elastic,nspec_outer_elastic)
+ if( num_phase_ispec_elastic <= 0 ) stop 'error elastic simulation: num_phase_ispec_elastic is zero'
+
allocate( phase_ispec_inner_elastic(num_phase_ispec_elastic,2),stat=ier)
if( ier /= 0 ) stop 'error allocating array phase_ispec_inner_elastic'
+ phase_ispec_inner_elastic(:,:) = 0
+
nspec_inner_elastic = 0
nspec_outer_elastic = 0
do ispec = 1, NSPEC_AB
@@ -451,9 +498,301 @@
!print *,'rank ',myrank,' elastic outer spec: ',nspec_outer_elastic
endif
+ if(myrank == 0) then
+ percentage_edge = 100.*count(ispec_is_inner(:))/real(NSPEC_AB)
+ write(IMAIN,*) 'for overlapping of communications with calculations:'
+ write(IMAIN,*) ' percentage of edge elements ',100. -percentage_edge,'%'
+ write(IMAIN,*) ' percentage of volume elements ',percentage_edge,'%'
+ write(IMAIN,*)
+ endif
+
+
end subroutine rmd_setup_inner_outer_elemnts
+!
+!-------------------------------------------------------------------------------------------------
+!
+!daniel: todo mesh coloring
+ subroutine rmd_setup_color_perm()
+
+ use specfem_par
+ use specfem_par_elastic
+ use specfem_par_acoustic
+ implicit none
+ ! local parameters
+ ! added for color permutation
+ integer :: nb_colors_outer_elements,nb_colors_inner_elements
+ integer, dimension(:), allocatable :: perm
+ integer, dimension(:), allocatable :: first_elem_number_in_this_color
+ integer, dimension(:), allocatable :: num_of_elems_in_this_color
+
+ integer :: icolor,ispec,ispec_counter
+ integer :: nspec_outer,nspec_outer_min_global,nspec_outer_max_global
+
+ integer :: ispec_inner_acoustic,ispec_outer_acoustic
+ integer :: ispec_inner_elastic,ispec_outer_elastic
+ integer :: nspec,nglob
+
+ ! added for sorting
+ integer, dimension(:,:,:,:), allocatable :: temp_array_int
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: temp_array_real
+ logical, dimension(:), allocatable :: temp_array_logical_1D
+ !integer, dimension(:), allocatable :: temp_array_int_1D
+
+ nspec = NSPEC_AB
+ nglob = NGLOB_AB
+
+ !!!! David Michea: detection of the edges, coloring and permutation separately
+ allocate(perm(nspec))
+
+ ! implement mesh coloring for GPUs if needed, to create subsets of disconnected elements
+ ! to remove dependencies and the need for atomic operations in the sum of
+ ! elemental contributions in the solver
+ if(USE_MESH_COLORING_GPU) then
+
+ allocate(first_elem_number_in_this_color(MAX_NUMBER_OF_COLORS + 1))
+
+ call get_perm_color_faster(ispec_is_inner,ibool,perm,nspec,nglob, &
+ nb_colors_outer_elements,nb_colors_inner_elements, &
+ nspec_outer,first_elem_number_in_this_color,myrank)
+
+ ! for the last color, the next color is fictitious and its first (fictitious) element number is nspec + 1
+ first_elem_number_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements + 1) = nspec + 1
+
+ allocate(num_of_elems_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements))
+
+ ! save mesh coloring
+ open(unit=99,file=prname(1:len_trim(prname))//'num_of_elems_in_this_color.dat',status='unknown')
+
+ ! number of colors for outer elements
+ write(99,*) nb_colors_outer_elements
+
+ ! number of colors for inner elements
+ write(99,*) nb_colors_inner_elements
+
+ ! number of elements in each color
+ do icolor = 1, nb_colors_outer_elements + nb_colors_inner_elements
+ num_of_elems_in_this_color(icolor) = first_elem_number_in_this_color(icolor+1) - first_elem_number_in_this_color(icolor)
+ write(99,*) num_of_elems_in_this_color(icolor)
+ enddo
+ close(99)
+
+ ! check that the sum of all the numbers of elements found in each color is equal
+ ! to the total number of elements in the mesh
+ if(sum(num_of_elems_in_this_color) /= nspec) then
+ print *,'nspec = ',nspec
+ print *,'total number of elements in all the colors of the mesh = ',sum(num_of_elems_in_this_color)
+ call exit_MPI(myrank, 'incorrect total number of elements in all the colors of the mesh')
+ endif
+
+ ! check that the sum of all the numbers of elements found in each color for the outer elements is equal
+ ! to the total number of outer elements found in the mesh
+ if(sum(num_of_elems_in_this_color(1:nb_colors_outer_elements)) /= nspec_outer) then
+ print *,'nspec_outer = ',nspec_outer
+ print *,'total number of elements in all the colors of the mesh for outer elements = ',sum(num_of_elems_in_this_color)
+ call exit_MPI(myrank, 'incorrect total number of elements in all the colors of the mesh for outer elements')
+ endif
+
+ deallocate(first_elem_number_in_this_color)
+ deallocate(num_of_elems_in_this_color)
+
+ else
+
+!! DK DK for regular C + MPI version for CPUs: do not use colors but nonetheless put all the outer elements
+!! DK DK first in order to be able to overlap non-blocking MPI communications with calculations
+
+!! DK DK nov 2010, for Rosa Badia / StarSs:
+!! no need for mesh coloring, but need to implement inner/outer subsets for non blocking MPI for StarSs
+ ispec_counter = 0
+ perm(:) = 0
+
+ ! first generate all the outer elements
+ do ispec = 1,nspec
+ if( ispec_is_inner(ispec) .eqv. .false.) then
+ ispec_counter = ispec_counter + 1
+ perm(ispec) = ispec_counter
+ endif
+ enddo
+
+ ! make sure we have detected some outer elements
+ !if(ispec_counter <= 0) call exit_MPI(myrank, 'fatal error: no outer elements detected!')
+
+ ! store total number of outer elements
+ nspec_outer = ispec_counter
+
+ ! then generate all the inner elements
+ do ispec = 1,nspec
+ if( ispec_is_inner(ispec) .eqv. .true.) then
+ ispec_counter = ispec_counter + 1
+ perm(ispec) = ispec_counter - nspec_outer ! starts again at 1
+ endif
+ enddo
+
+ ! test that all the elements have been used once and only once
+ if(ispec_counter /= nspec) call exit_MPI(myrank, 'fatal error: ispec_counter not equal to nspec')
+
+ ! do basic checks
+ if(minval(perm) /= 1) call exit_MPI(myrank, 'minval(perm) should be 1')
+ !if(maxval(perm) /= nspec) call exit_MPI(myrank, 'maxval(perm) should be nspec')
+
+
+ endif
+
+ ! sets up elements for loops in acoustic simulations
+ if( ACOUSTIC_SIMULATION ) then
+ ispec_inner_acoustic = 0
+ ispec_outer_acoustic = 0
+ do ispec = 1, NSPEC_AB
+ if( ispec_is_acoustic(ispec) ) then
+ if( ispec_is_inner(ispec) .eqv. .true. ) then
+ !ispec_inner_acoustic = ispec_inner_acoustic + 1
+ ispec_inner_acoustic = perm(ispec)
+ if( ispec_inner_acoustic < 1 .or. ispec_inner_acoustic > num_phase_ispec_acoustic ) &
+ call exit_MPI(myrank,'error inner acoustic permutation')
+
+ phase_ispec_inner_acoustic(ispec_inner_acoustic,2) = ispec
+
+ else
+ !ispec_outer_acoustic = ispec_outer_acoustic + 1
+ ispec_outer_acoustic = perm(ispec)
+ if( ispec_outer_acoustic < 1 .or. ispec_outer_acoustic > num_phase_ispec_acoustic ) &
+ call exit_MPI(myrank,'error outer acoustic permutation')
+
+ phase_ispec_inner_acoustic(ispec_outer_acoustic,1) = ispec
+
+ endif
+ endif
+ enddo
+ endif
+
+ ! sets up elements for loops in elastic simulations
+ if( ELASTIC_SIMULATION ) then
+ ispec_inner_elastic = 0
+ ispec_outer_elastic = 0
+ do ispec = 1, NSPEC_AB
+ if( ispec_is_elastic(ispec) ) then
+ if( ispec_is_inner(ispec) .eqv. .true. ) then
+ !ispec_inner_elastic = ispec_inner_elastic + 1
+ ispec_inner_elastic = perm(ispec)
+ if( ispec_inner_elastic < 1 .or. ispec_inner_elastic > num_phase_ispec_elastic ) then
+ print*,'error: inner elastic permutation',ispec_inner_elastic,num_phase_ispec_elastic,nspec_outer
+ call exit_MPI(myrank,'error inner elastic permutation')
+ endif
+ phase_ispec_inner_elastic(ispec_inner_elastic,2) = ispec
+
+ else
+ !ispec_outer_elastic = ispec_outer_elastic + 1
+ ispec_outer_elastic = perm(ispec)
+ if( ispec_outer_elastic < 1 .or. ispec_outer_elastic > num_phase_ispec_elastic ) then
+ print*,'error: outer elastic permutation',ispec_outer_elastic,num_phase_ispec_elastic,nspec_outer
+ call exit_MPI(myrank,'error outer elastic permutation')
+ endif
+ phase_ispec_inner_elastic(ispec_outer_elastic,1) = ispec
+
+ endif
+ endif
+ enddo
+ endif
+
+ ! sorts array according to permutation
+ ! SORT_MESH_INNER_OUTER
+!daniel
+ if( .false. ) then
+
+ ! permutation of ibool
+ allocate(temp_array_int(NGLLX,NGLLY,NGLLZ,nspec))
+ call permute_elements_integer(ibool,temp_array_int,perm,nspec)
+ deallocate(temp_array_int)
+
+ ! element domain flags
+ allocate(temp_array_logical_1D(nglob))
+ temp_array_logical_1D(:) = ispec_is_acoustic
+ do ispec = 1, nspec
+ ispec_is_acoustic(perm(ispec)) = temp_array_logical_1D(ispec)
+ enddo
+ temp_array_logical_1D(:) = ispec_is_elastic
+ do ispec = 1, nspec
+ ispec_is_elastic(perm(ispec)) = temp_array_logical_1D(ispec)
+ enddo
+ !temp_array_logical_1D(:) = ispec_is_poroelastic
+ !do ispec = 1, nspec
+ ! ispec_is_poroelastic(perm(ispec)) = temp_array_logical_1D(ispec)
+ !enddo
+ deallocate(temp_array_logical_1D)
+
+ ! mesh arrays
+ allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec))
+ call permute_elements_real(xix,temp_array_real,perm,nspec)
+ call permute_elements_real(xiy,temp_array_real,perm,nspec)
+ call permute_elements_real(xiz,temp_array_real,perm,nspec)
+ call permute_elements_real(etax,temp_array_real,perm,nspec)
+ call permute_elements_real(etay,temp_array_real,perm,nspec)
+ call permute_elements_real(etaz,temp_array_real,perm,nspec)
+ call permute_elements_real(gammax,temp_array_real,perm,nspec)
+ call permute_elements_real(gammay,temp_array_real,perm,nspec)
+ call permute_elements_real(gammaz,temp_array_real,perm,nspec)
+ call permute_elements_real(jacobian,temp_array_real,perm,nspec)
+
+ call permute_elements_real(kappastore,temp_array_real,perm,nspec)
+ call permute_elements_real(mustore,temp_array_real,perm,nspec)
+
+ ! acoustic arrays
+ if( ACOUSTIC_SIMULATION ) then
+ call permute_elements_real(rhostore,temp_array_real,perm,nspec)
+ endif
+
+ ! elastic arrays
+ if( ELASTIC_SIMULATION ) then
+ call permute_elements_real(rho_vp,temp_array_real,perm,nspec)
+ call permute_elements_real(rho_vs,temp_array_real,perm,nspec)
+ if( ANISOTROPY ) then
+ call permute_elements_real(c11store,temp_array_real,perm,nspec)
+ call permute_elements_real(c12store,temp_array_real,perm,nspec)
+ call permute_elements_real(c13store,temp_array_real,perm,nspec)
+ call permute_elements_real(c14store,temp_array_real,perm,nspec)
+ call permute_elements_real(c15store,temp_array_real,perm,nspec)
+ call permute_elements_real(c16store,temp_array_real,perm,nspec)
+ call permute_elements_real(c22store,temp_array_real,perm,nspec)
+ call permute_elements_real(c23store,temp_array_real,perm,nspec)
+ call permute_elements_real(c24store,temp_array_real,perm,nspec)
+ call permute_elements_real(c25store,temp_array_real,perm,nspec)
+ call permute_elements_real(c33store,temp_array_real,perm,nspec)
+ call permute_elements_real(c34store,temp_array_real,perm,nspec)
+ call permute_elements_real(c35store,temp_array_real,perm,nspec)
+ call permute_elements_real(c36store,temp_array_real,perm,nspec)
+ call permute_elements_real(c44store,temp_array_real,perm,nspec)
+ call permute_elements_real(c45store,temp_array_real,perm,nspec)
+ call permute_elements_real(c46store,temp_array_real,perm,nspec)
+ call permute_elements_real(c55store,temp_array_real,perm,nspec)
+ call permute_elements_real(c56store,temp_array_real,perm,nspec)
+ call permute_elements_real(c66store,temp_array_real,perm,nspec)
+ endif
+ endif
+
+ deallocate(temp_array_real)
+ endif
+
+ ! user output
+ !call MPI_ALLREDUCE(nspec_outer,nspec_outer_min_global,1,MPI_INTEGER,MPI_MIN,MPI_COMM_WORLD,ier)
+ !call MPI_ALLREDUCE(nspec_outer,nspec_outer_max_global,1,MPI_INTEGER,MPI_MAX,MPI_COMM_WORLD,ier)
+ call min_all_i(nspec_outer,nspec_outer_min_global)
+ call max_all_i(nspec_outer,nspec_outer_max_global)
+ call min_all_i(nspec_outer,nspec_outer_min_global)
+ call max_all_i(nspec_outer,nspec_outer_max_global)
+
+ if(myrank == 0) then
+ write(IMAIN,*) 'mesh coloring:'
+ write(IMAIN,*) ' permutation : use coloring = ',USE_MESH_COLORING_GPU
+ write(IMAIN,*) ' outer elements: min = ',nspec_outer_min_global
+ write(IMAIN,*) ' max = ',nspec_outer_max_global
+ write(IMAIN,*)
+ endif
+
+ deallocate(perm)
+
+ end subroutine rmd_setup_color_perm
+
!
!-------------------------------------------------------------------------------------------------
!
@@ -513,7 +852,11 @@
! preconditioner
if ( APPROXIMATE_HESS_KL ) then
allocate(hess_kl(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT),stat=ier)
- if( ier /= 0 ) stop 'error allocating array hess_kl'
+ if( ier /= 0 ) stop 'error allocating array hess_kl'
+ else
+ ! dummy allocation
+ allocate(hess_kl(0,0,0,0),stat=ier)
+ if( ier /= 0 ) stop 'error allocating dummy array hess_kl'
endif
! MPI handling
@@ -569,13 +912,17 @@
kappa_ac_kl(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT), &
alpha_ac_kl(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT),stat=ier)
if( ier /= 0 ) stop 'error allocating array rho_ac_kl etc.'
-
+
! preconditioner
if ( APPROXIMATE_HESS_KL ) then
allocate(hess_ac_kl(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT),stat=ier)
- if( ier /= 0 ) stop 'error allocating array hess_ac_kl'
+ if( ier /= 0 ) stop 'error allocating array hess_ac_kl'
+ else
+ ! dummy allocation
+ allocate(hess_ac_kl(0,0,0,0),stat=ier)
+ if( ier /= 0 ) stop 'error allocating dummy array hess_ac_kl'
endif
-
+
! MPI handling
allocate(b_request_send_scalar_ext_mesh(num_interfaces_ext_mesh), &
b_request_recv_scalar_ext_mesh(num_interfaces_ext_mesh), &
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/save_adjoint_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/save_adjoint_kernels.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/save_adjoint_kernels.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -196,9 +196,9 @@
if ( APPROXIMATE_HESS_KL ) then
call save_kernels_hessian()
endif
-
+
end subroutine save_adjoint_kernels
-
+
!
!-------------------------------------------------------------------------------------------------
!
@@ -211,7 +211,7 @@
implicit none
integer :: ier
-
+
! acoustic domains
if( ACOUSTIC_SIMULATION ) then
! scales approximate hessian
@@ -223,7 +223,7 @@
if( ier /= 0 ) stop 'error opening file hess_acoustic_kernel.bin'
write(27) hess_ac_kl
close(27)
- endif
+ endif
! elastic domains
if( ELASTIC_SIMULATION ) then
@@ -236,7 +236,7 @@
if( ier /= 0 ) stop 'error opening file hess_kernel.bin'
write(27) hess_kl
close(27)
- endif
-
+ endif
+
end subroutine save_kernels_hessian
-
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_GLL_points.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_GLL_points.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_GLL_points.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -41,9 +41,9 @@
! set up GLL points, weights and derivation matrices for reference element (between -1,1)
call define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz)
+ hprime_xx,hprime_yy,hprime_zz, &
+ hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz)
! define transpose of derivation matrix
do j = 1,NGLLY
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -206,6 +206,12 @@
call exit_mpi(myrank,'error negative USER_T0 parameter in constants.h')
endif
+ ! count number of sources located in this slice
+ nsources_local = 0
+ do isource = 1, NSOURCES
+ if(myrank == islice_selected_source(isource)) nsources_local = nsources_local + 1
+ enddo
+
! checks if source is in an acoustic element and exactly on the free surface because pressure is zero there
call setup_sources_check_acoustic()
@@ -514,7 +520,7 @@
real(kind=CUSTOM_REAL) :: junk
integer :: isource,ispec
integer :: irec !,irec_local
- integer :: i,j,k,iglob
+ integer :: i,j,k,iglob
integer :: icomp,itime,nadj_files_found,nadj_files_found_tot,ier
character(len=3),dimension(NDIM) :: comp ! = (/ "BHE", "BHN", "BHZ" /)
character(len=256) :: filename
@@ -569,9 +575,9 @@
iglob = ibool(nint(xi_source(isource)), &
nint(eta_source(isource)), &
nint(gamma_source(isource)), &
- ispec)
+ ispec)
! sets sourcearrays
- sourcearray(:,:,:,:) = 0.0
+ sourcearray(:,:,:,:) = 0.0
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -589,13 +595,18 @@
enddo
enddo
enddo
- endif
+ endif
! stores source excitations
sourcearrays(isource,:,:,:,:) = sourcearray(:,:,:,:)
endif
enddo
+ else
+ ! SIMULATION_TYPE == 2
+ ! allocate dummy array (needed for subroutine calls)
+ allocate(sourcearrays(0,0,0,0,0),stat=ier)
+ if( ier /= 0 ) stop 'error allocating dummy sourcearrays'
endif
! ADJOINT simulations
@@ -693,17 +704,19 @@
integer :: irec,irec_local,isource,ier
-! stores local receivers interpolation factors
+ ! needs to be allocate for subroutine calls (even if nrec_local == 0)
+ allocate(number_receiver_global(nrec_local),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array number_receiver_global'
+
+ ! stores local receivers interpolation factors
if (nrec_local > 0) then
- ! allocate Lagrange interpolators for receivers
+ ! allocate Lagrange interpolators for receivers
allocate(hxir_store(nrec_local,NGLLX), &
hetar_store(nrec_local,NGLLY), &
hgammar_store(nrec_local,NGLLZ),stat=ier)
if( ier /= 0 ) stop 'error allocating array hxir_store etc.'
- ! define local to global receiver numbering mapping
- allocate(number_receiver_global(nrec_local),stat=ier)
- if( ier /= 0 ) stop 'error allocating array number_reciever_global'
+ ! define local to global receiver numbering mapping
irec_local = 0
if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
do irec = 1,nrec
@@ -776,7 +789,11 @@
double precision :: xmesh,ymesh,zmesh
real(kind=CUSTOM_REAL),dimension(NGNOD) :: xelm,yelm,zelm
integer :: ia,ispec,isource,irec,ier,totalpoints
- INTEGER(kind=4) :: system_command_status
+
+ !INTEGER(kind=4) :: system_command_status
+ !integer :: ret
+ !integer,external :: system
+
character(len=256) :: filename,filename_new,system_command,system_command1,system_command2
! determines number of points for vtk file
@@ -800,7 +817,7 @@
endif
! sources
- if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
+ if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
do isource=1,NSOURCES
! spectral element id
ispec = ispec_selected_source(isource)
@@ -855,7 +872,7 @@
endif
enddo ! NSOURCES
endif
-
+
! receivers
do irec=1,nrec
ispec = ispec_selected_rec(irec)
@@ -906,38 +923,49 @@
close(IOVTK)
! creates additional receiver and source files
- if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
+ if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
! extracts receiver locations
filename = trim(OUTPUT_FILES)//'/sr.vtk'
filename_new = trim(OUTPUT_FILES)//'/receiver.vtk'
+
+ ! vtk file for receivers only
write(system_command, &
"('awk ',a1,'{if(NR<5) print $0;if(NR==5)print ',a1,'POINTS',i6,' float',a1,';if(NR>5+',i6,')print $0}',a1,' < ',a,' > ',a)")&
"'",'"',nrec,'"',NSOURCES,"'",trim(filename),trim(filename_new)
- call system(system_command,system_command_status)
+!daniel:
+! gfortran
+! call system(trim(system_command),system_command_status)
+! ifort
+! ret = system(trim(system_command))
+
! extracts source locations
!"('awk ',a1,'{if(NR< 6 + ',i6,') print $0}END{print}',a1,' < ',a,' > ',a)")&
filename_new = trim(OUTPUT_FILES)//'/source.vtk'
-
+
write(system_command1, &
"('awk ',a1,'{if(NR<5) print $0;if(NR==5)print ',a1,'POINTS',i6,' float',a1,';')") &
"'",'"',NSOURCES,'"'
!daniel
- !print*,'command 1:',trim(system_command1)
+ !print*,'command 1:',trim(system_command1)
write(system_command2, &
"('if(NR>5 && NR <6+',i6,')print $0}END{print ',a,'}',a1,' < ',a,' > ',a)") &
NSOURCES,'" "',"'",trim(filename),trim(filename_new)
-
- !print*,'command 2:',trim(system_command2)
-
+
+ !print*,'command 2:',trim(system_command2)
+
system_command = trim(system_command1)//trim(system_command2)
- !print*,'command:',trim(system_command)
+ !print*,'command:',trim(system_command)
+!daniel:
+! gfortran
+! call system(trim(system_command),system_command_status)
+! ifort
+! ret = system(trim(system_command))
- call system(trim(system_command),system_command_status)
- endif
+ endif
endif
end subroutine setup_sources_receivers_VTKfile
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -1,339 +1,339 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-!
-! United States and French Government Sponsorship Acknowledged.
-
- subroutine specfem3D()
-
- use specfem_par
-
-
-!=============================================================================!
-! !
-! specfem3D is a 3-D spectral-element solver for a local or regional model. !
-! It uses a mesh generated by program generate_databases !
-! !
-!=============================================================================!
-!
-! If you use this code for your own research, please cite at least one article
-! written by the developers of the package, for instance:
-!
-! @ARTICLE{TrKoLi08,
-! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
-! title = {Spectral-Element and Adjoint Methods in Seismology},
-! journal = {Communications in Computational Physics},
-! year = {2008},
-! volume = {3},
-! pages = {1-32},
-! number = {1}}
-!
-! or
-!
-! @ARTICLE{VaCaSaKoVi99,
-! author = {R. Vai and J. M. Castillo-Covarrubias and F. J. S\'anchez-Sesma and
-! D. Komatitsch and J. P. Vilotte},
-! title = {Elastic wave propagation in an irregularly layered medium},
-! journal = {Soil Dynamics and Earthquake Engineering},
-! year = {1999},
-! volume = {18},
-! pages = {11-18},
-! number = {1},
-! doi = {10.1016/S0267-7261(98)00027-X}}
-!
-! @ARTICLE{LeChKoHuTr09,
-! author = {Shiann Jong Lee and Yu Chang Chan and Dimitri Komatitsch and Bor
-! Shouh Huang and Jeroen Tromp},
-! title = {Effects of realistic surface topography on seismic ground motion
-! in the {Y}angminshan region of {T}aiwan based upon the spectral-element
-! method and {LiDAR DTM}},
-! journal = {Bull. Seismol. Soc. Am.},
-! year = {2009},
-! volume = {99},
-! pages = {681-693},
-! number = {2A},
-! doi = {10.1785/0120080264}}
-!
-! @ARTICLE{LeChLiKoHuTr08,
-! author = {Shiann Jong Lee and How Wei Chen and Qinya Liu and Dimitri Komatitsch
-! and Bor Shouh Huang and Jeroen Tromp},
-! title = {Three-Dimensional Simulations of Seismic Wave Propagation in the
-! {T}aipei Basin with Realistic Topography Based upon the Spectral-Element Method},
-! journal = {Bull. Seismol. Soc. Am.},
-! year = {2008},
-! volume = {98},
-! pages = {253-264},
-! number = {1},
-! doi = {10.1785/0120070033}}
-!
-! @ARTICLE{LeKoHuTr09,
-! author = {S. J. Lee and Dimitri Komatitsch and B. S. Huang and J. Tromp},
-! title = {Effects of topography on seismic wave propagation: An example from
-! northern {T}aiwan},
-! journal = {Bull. Seismol. Soc. Am.},
-! year = {2009},
-! volume = {99},
-! pages = {314-325},
-! number = {1},
-! doi = {10.1785/0120080020}}
-!
-! @ARTICLE{KoErGoMi10,
-! author = {Dimitri Komatitsch and Gordon Erlebacher and Dominik G\"oddeke and
-! David Mich\'ea},
-! title = {High-order finite-element seismic wave propagation modeling with
-! {MPI} on a large {GPU} cluster},
-! journal = {J. Comput. Phys.},
-! year = {2010},
-! volume = {229},
-! pages = {7692-7714},
-! number = {20},
-! doi = {10.1016/j.jcp.2010.06.024}}
-!
-! @ARTICLE{KoGoErMi10,
-! author = {Dimitri Komatitsch and Dominik G\"oddeke and Gordon Erlebacher and
-! David Mich\'ea},
-! title = {Modeling the propagation of elastic waves using spectral elements
-! on a cluster of 192 {GPU}s},
-! journal = {Computer Science Research and Development},
-! year = {2010},
-! volume = {25},
-! pages = {75-82},
-! number = {1-2},
-! doi = {10.1007/s00450-010-0109-1}}
-!
-! @ARTICLE{KoMiEr09,
-! author = {Dimitri Komatitsch and David Mich\'ea and Gordon Erlebacher},
-! title = {Porting a high-order finite-element earthquake modeling application
-! to {NVIDIA} graphics cards using {CUDA}},
-! journal = {Journal of Parallel and Distributed Computing},
-! year = {2009},
-! volume = {69},
-! pages = {451-460},
-! number = {5},
-! doi = {10.1016/j.jpdc.2009.01.006}}
-!
-! @ARTICLE{LiPoKoTr04,
-! author = {Qinya Liu and Jascha Polet and Dimitri Komatitsch and Jeroen Tromp},
-! title = {Spectral-element moment tensor inversions for earthquakes in {S}outhern {C}alifornia},
-! journal={Bull. Seismol. Soc. Am.},
-! year = {2004},
-! volume = {94},
-! pages = {1748-1761},
-! number = {5},
-! doi = {10.1785/012004038}}
-!
-! @INCOLLECTION{ChKoViCaVaFe07,
-! author = {Emmanuel Chaljub and Dimitri Komatitsch and Jean-Pierre Vilotte and
-! Yann Capdeville and Bernard Valette and Gaetano Festa},
-! title = {Spectral Element Analysis in Seismology},
-! booktitle = {Advances in Wave Propagation in Heterogeneous Media},
-! publisher = {Elsevier - Academic Press},
-! year = {2007},
-! editor = {Ru-Shan Wu and Val\'erie Maupin},
-! volume = {48},
-! series = {Advances in Geophysics},
-! pages = {365-419}}
-!
-! @ARTICLE{KoVi98,
-! author={D. Komatitsch and J. P. Vilotte},
-! title={The spectral-element method: an efficient tool to simulate the seismic response of 2{D} and 3{D} geological structures},
-! journal={Bull. Seismol. Soc. Am.},
-! year=1998,
-! volume=88,
-! number=2,
-! pages={368-392}}
-!
-! @ARTICLE{KoTr99,
-! author={D. Komatitsch and J. Tromp},
-! year=1999,
-! title={Introduction to the spectral-element method for 3-{D} seismic wave propagation},
-! journal={Geophys. J. Int.},
-! volume=139,
-! number=3,
-! pages={806-822},
-! doi={10.1046/j.1365-246x.1999.00967.x}}
-!
-! @ARTICLE{KoLiTrSuStSh04,
-! author={Dimitri Komatitsch and Qinya Liu and Jeroen Tromp and Peter S\"{u}ss
-! and Christiane Stidham and John H. Shaw},
-! year=2004,
-! title={Simulations of Ground Motion in the {L}os {A}ngeles {B}asin
-! based upon the Spectral-Element Method},
-! journal={Bull. Seism. Soc. Am.},
-! volume=94,
-! number=1,
-! pages={187-206}}
-!
-! and/or another article from http://web.univ-pau.fr/~dkomati1/publications.html
-!
-!
-! If you use the kernel capabilities of the code, please cite at least one article
-! written by the developers of the package, for instance:
-!
-! @ARTICLE{TrKoLi08,
-! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
-! title = {Spectral-Element and Adjoint Methods in Seismology},
-! journal = {Communications in Computational Physics},
-! year = {2008},
-! volume = {3},
-! pages = {1-32},
-! number = {1}}
-!
-! or
-!
-! @ARTICLE{LiTr06,
-! author={Qinya Liu and Jeroen Tromp},
-! title={Finite-frequency kernels based on adjoint methods},
-! journal={Bull. Seismol. Soc. Am.},
-! year=2006,
-! volume=96,
-! number=6,
-! pages={2383-2397},
-! doi={10.1785/0120060041}}
-!
-!
-! Reference frame - convention:
-! ----------------------------
-!
-! The code uses the following convention for the reference frame:
-!
-! - X axis is East
-! - Y axis is North
-! - Z axis is up
-!
-! Note that this convention is different from both the Aki-Richards convention
-! and the Harvard CMT convention.
-!
-! Let us recall that the Aki-Richards convention is:
-!
-! - X axis is North
-! - Y axis is East
-! - Z axis is down
-!
-! and that the Harvard CMT convention is:
-!
-! - X axis is South
-! - Y axis is East
-! - Z axis is up
-!
-! To report bugs or suggest improvements to the code, please send an email
-! to Jeroen Tromp <jtromp AT princeton.edu> and/or use our online
-! bug tracking system at http://www.geodynamics.org/roundup .
-!
-! Evolution of the code:
-! ---------------------
-!
-! MPI v. 2.0 "Sesame" (Spectral ElementS on Any MEsh), November 2010:
-! Dimitri Komatitsch, Nicolas Le Goff, Roland Martin and Pieyre Le Loher, University of Pau, France,
-! Jeroen Tromp and the Princeton group of developers, Princeton University, USA,
-! and Emanuele Casarotti, INGV Roma, Italy:
-! support for CUBIT meshes decomposed by SCOTCH, METIS or ZOLTAN;
-! much faster solver using Michel Deville's inlined matrix products.
-!
-! MPI v. 1.4 Dimitri Komatitsch, University of Pau, Qinya Liu and others, Caltech, September 2006:
-! better adjoint and kernel calculations, faster and better I/Os
-! on very large systems, many small improvements and bug fixes
-!
-! MPI v. 1.3 Dimitri Komatitsch, University of Pau, and Qinya Liu, Caltech, July 2005:
-! serial version, regular mesh, adjoint and kernel calculations, ParaView support
-!
-! MPI v. 1.2 Min Chen and Dimitri Komatitsch, Caltech, July 2004:
-! full anisotropy, volume movie
-!
-! MPI v. 1.1 Dimitri Komatitsch, Caltech, October 2002: Zhu's Moho map, scaling
-! of Vs with depth, Hauksson's regional model, attenuation, oceans, movies
-!
-! MPI v. 1.0 Dimitri Komatitsch, Caltech, USA, May 2002: first MPI version based on global code
-!
-! Dimitri Komatitsch, IPG Paris, France, December 1996: first 3-D solver for the CM-5 Connection Machine,
-! parallelized on 128 processors using Connection Machine Fortran
-!
-
-! in case of an acoustic medium, a potential Chi of (density * displacement) is used as in Chaljub and Valette,
-! Geophysical Journal International, vol. 158, p. 131-141 (2004) and *NOT* a velocity potential
-! as in Komatitsch and Tromp, Geophysical Journal International, vol. 150, p. 303-318 (2002).
-!
-! This permits acoustic-elastic coupling based on a non-iterative time scheme.
-! Displacement is then:
-! u = grad(Chi) / rho
-! Velocity is then:
-! v = grad(Chi_dot) / rho
-! (Chi_dot being the time derivative of Chi)
-! and pressure is:
-! p = - Chi_dot_dot
-! (Chi_dot_dot being the time second derivative of Chi).
-!
-! The source in an acoustic element is a pressure source.
-!
-! First-order acoustic-acoustic discontinuities are also handled automatically
-! because pressure is continuous at such an interface, therefore Chi_dot_dot
-! is continuous, therefore Chi is also continuous, which is consistent with
-! the spectral-element basis functions and with the assembling process.
-! This is the reason why a simple displacement potential u = grad(Chi) would
-! not work because it would be discontinuous at such an interface and would
-! therefore not be consistent with the basis functions.
-
-! ************** PROGRAM STARTS HERE **************
-
- ! force Flush-To-Zero if available to avoid very slow Gradual Underflow trapping
- call force_ftz()
-
-! reads in parameters
- call initialize_simulation()
-
-
-! reads in external mesh
- call read_mesh_databases()
-
-
-! sets up reference element GLL points/weights/derivatives
- call setup_GLL_points()
-
-
-! detects surfaces
- call detect_mesh_surfaces()
-
-
-! reads topography & bathymetry
- call read_topography_bathymetry()
-
-
-! prepares sources and receivers
- call setup_sources_receivers()
-
-
-! sets up and precomputes simulation arrays
- call prepare_timerun()
-
-
-! steps through time iterations
- call iterate_time()
-
-
-! saves last time frame and finishes kernel calculations
- call finalize_simulation()
-
- end subroutine specfem3D
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+!
+! United States and French Government Sponsorship Acknowledged.
+
+ subroutine specfem3D()
+
+ use specfem_par
+
+
+!=============================================================================!
+! !
+! specfem3D is a 3-D spectral-element solver for a local or regional model. !
+! It uses a mesh generated by program generate_databases !
+! !
+!=============================================================================!
+!
+! If you use this code for your own research, please cite at least one article
+! written by the developers of the package, for instance:
+!
+! @ARTICLE{TrKoLi08,
+! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
+! title = {Spectral-Element and Adjoint Methods in Seismology},
+! journal = {Communications in Computational Physics},
+! year = {2008},
+! volume = {3},
+! pages = {1-32},
+! number = {1}}
+!
+! or
+!
+! @ARTICLE{VaCaSaKoVi99,
+! author = {R. Vai and J. M. Castillo-Covarrubias and F. J. S\'anchez-Sesma and
+! D. Komatitsch and J. P. Vilotte},
+! title = {Elastic wave propagation in an irregularly layered medium},
+! journal = {Soil Dynamics and Earthquake Engineering},
+! year = {1999},
+! volume = {18},
+! pages = {11-18},
+! number = {1},
+! doi = {10.1016/S0267-7261(98)00027-X}}
+!
+! @ARTICLE{LeChKoHuTr09,
+! author = {Shiann Jong Lee and Yu Chang Chan and Dimitri Komatitsch and Bor
+! Shouh Huang and Jeroen Tromp},
+! title = {Effects of realistic surface topography on seismic ground motion
+! in the {Y}angminshan region of {T}aiwan based upon the spectral-element
+! method and {LiDAR DTM}},
+! journal = {Bull. Seismol. Soc. Am.},
+! year = {2009},
+! volume = {99},
+! pages = {681-693},
+! number = {2A},
+! doi = {10.1785/0120080264}}
+!
+! @ARTICLE{LeChLiKoHuTr08,
+! author = {Shiann Jong Lee and How Wei Chen and Qinya Liu and Dimitri Komatitsch
+! and Bor Shouh Huang and Jeroen Tromp},
+! title = {Three-Dimensional Simulations of Seismic Wave Propagation in the
+! {T}aipei Basin with Realistic Topography Based upon the Spectral-Element Method},
+! journal = {Bull. Seismol. Soc. Am.},
+! year = {2008},
+! volume = {98},
+! pages = {253-264},
+! number = {1},
+! doi = {10.1785/0120070033}}
+!
+! @ARTICLE{LeKoHuTr09,
+! author = {S. J. Lee and Dimitri Komatitsch and B. S. Huang and J. Tromp},
+! title = {Effects of topography on seismic wave propagation: An example from
+! northern {T}aiwan},
+! journal = {Bull. Seismol. Soc. Am.},
+! year = {2009},
+! volume = {99},
+! pages = {314-325},
+! number = {1},
+! doi = {10.1785/0120080020}}
+!
+! @ARTICLE{KoErGoMi10,
+! author = {Dimitri Komatitsch and Gordon Erlebacher and Dominik G\"oddeke and
+! David Mich\'ea},
+! title = {High-order finite-element seismic wave propagation modeling with
+! {MPI} on a large {GPU} cluster},
+! journal = {J. Comput. Phys.},
+! year = {2010},
+! volume = {229},
+! pages = {7692-7714},
+! number = {20},
+! doi = {10.1016/j.jcp.2010.06.024}}
+!
+! @ARTICLE{KoGoErMi10,
+! author = {Dimitri Komatitsch and Dominik G\"oddeke and Gordon Erlebacher and
+! David Mich\'ea},
+! title = {Modeling the propagation of elastic waves using spectral elements
+! on a cluster of 192 {GPU}s},
+! journal = {Computer Science Research and Development},
+! year = {2010},
+! volume = {25},
+! pages = {75-82},
+! number = {1-2},
+! doi = {10.1007/s00450-010-0109-1}}
+!
+! @ARTICLE{KoMiEr09,
+! author = {Dimitri Komatitsch and David Mich\'ea and Gordon Erlebacher},
+! title = {Porting a high-order finite-element earthquake modeling application
+! to {NVIDIA} graphics cards using {CUDA}},
+! journal = {Journal of Parallel and Distributed Computing},
+! year = {2009},
+! volume = {69},
+! pages = {451-460},
+! number = {5},
+! doi = {10.1016/j.jpdc.2009.01.006}}
+!
+! @ARTICLE{LiPoKoTr04,
+! author = {Qinya Liu and Jascha Polet and Dimitri Komatitsch and Jeroen Tromp},
+! title = {Spectral-element moment tensor inversions for earthquakes in {S}outhern {C}alifornia},
+! journal={Bull. Seismol. Soc. Am.},
+! year = {2004},
+! volume = {94},
+! pages = {1748-1761},
+! number = {5},
+! doi = {10.1785/012004038}}
+!
+! @INCOLLECTION{ChKoViCaVaFe07,
+! author = {Emmanuel Chaljub and Dimitri Komatitsch and Jean-Pierre Vilotte and
+! Yann Capdeville and Bernard Valette and Gaetano Festa},
+! title = {Spectral Element Analysis in Seismology},
+! booktitle = {Advances in Wave Propagation in Heterogeneous Media},
+! publisher = {Elsevier - Academic Press},
+! year = {2007},
+! editor = {Ru-Shan Wu and Val\'erie Maupin},
+! volume = {48},
+! series = {Advances in Geophysics},
+! pages = {365-419}}
+!
+! @ARTICLE{KoVi98,
+! author={D. Komatitsch and J. P. Vilotte},
+! title={The spectral-element method: an efficient tool to simulate the seismic response of 2{D} and 3{D} geological structures},
+! journal={Bull. Seismol. Soc. Am.},
+! year=1998,
+! volume=88,
+! number=2,
+! pages={368-392}}
+!
+! @ARTICLE{KoTr99,
+! author={D. Komatitsch and J. Tromp},
+! year=1999,
+! title={Introduction to the spectral-element method for 3-{D} seismic wave propagation},
+! journal={Geophys. J. Int.},
+! volume=139,
+! number=3,
+! pages={806-822},
+! doi={10.1046/j.1365-246x.1999.00967.x}}
+!
+! @ARTICLE{KoLiTrSuStSh04,
+! author={Dimitri Komatitsch and Qinya Liu and Jeroen Tromp and Peter S\"{u}ss
+! and Christiane Stidham and John H. Shaw},
+! year=2004,
+! title={Simulations of Ground Motion in the {L}os {A}ngeles {B}asin
+! based upon the Spectral-Element Method},
+! journal={Bull. Seism. Soc. Am.},
+! volume=94,
+! number=1,
+! pages={187-206}}
+!
+! and/or another article from http://web.univ-pau.fr/~dkomati1/publications.html
+!
+!
+! If you use the kernel capabilities of the code, please cite at least one article
+! written by the developers of the package, for instance:
+!
+! @ARTICLE{TrKoLi08,
+! author = {Jeroen Tromp and Dimitri Komatitsch and Qinya Liu},
+! title = {Spectral-Element and Adjoint Methods in Seismology},
+! journal = {Communications in Computational Physics},
+! year = {2008},
+! volume = {3},
+! pages = {1-32},
+! number = {1}}
+!
+! or
+!
+! @ARTICLE{LiTr06,
+! author={Qinya Liu and Jeroen Tromp},
+! title={Finite-frequency kernels based on adjoint methods},
+! journal={Bull. Seismol. Soc. Am.},
+! year=2006,
+! volume=96,
+! number=6,
+! pages={2383-2397},
+! doi={10.1785/0120060041}}
+!
+!
+! Reference frame - convention:
+! ----------------------------
+!
+! The code uses the following convention for the reference frame:
+!
+! - X axis is East
+! - Y axis is North
+! - Z axis is up
+!
+! Note that this convention is different from both the Aki-Richards convention
+! and the Harvard CMT convention.
+!
+! Let us recall that the Aki-Richards convention is:
+!
+! - X axis is North
+! - Y axis is East
+! - Z axis is down
+!
+! and that the Harvard CMT convention is:
+!
+! - X axis is South
+! - Y axis is East
+! - Z axis is up
+!
+! To report bugs or suggest improvements to the code, please send an email
+! to Jeroen Tromp <jtromp AT princeton.edu> and/or use our online
+! bug tracking system at http://www.geodynamics.org/roundup .
+!
+! Evolution of the code:
+! ---------------------
+!
+! MPI v. 2.0 "Sesame" (Spectral ElementS on Any MEsh), November 2010:
+! Dimitri Komatitsch, Nicolas Le Goff, Roland Martin and Pieyre Le Loher, University of Pau, France,
+! Jeroen Tromp and the Princeton group of developers, Princeton University, USA,
+! and Emanuele Casarotti, INGV Roma, Italy:
+! support for CUBIT meshes decomposed by SCOTCH, METIS or ZOLTAN;
+! much faster solver using Michel Deville's inlined matrix products.
+!
+! MPI v. 1.4 Dimitri Komatitsch, University of Pau, Qinya Liu and others, Caltech, September 2006:
+! better adjoint and kernel calculations, faster and better I/Os
+! on very large systems, many small improvements and bug fixes
+!
+! MPI v. 1.3 Dimitri Komatitsch, University of Pau, and Qinya Liu, Caltech, July 2005:
+! serial version, regular mesh, adjoint and kernel calculations, ParaView support
+!
+! MPI v. 1.2 Min Chen and Dimitri Komatitsch, Caltech, July 2004:
+! full anisotropy, volume movie
+!
+! MPI v. 1.1 Dimitri Komatitsch, Caltech, October 2002: Zhu's Moho map, scaling
+! of Vs with depth, Hauksson's regional model, attenuation, oceans, movies
+!
+! MPI v. 1.0 Dimitri Komatitsch, Caltech, USA, May 2002: first MPI version based on global code
+!
+! Dimitri Komatitsch, IPG Paris, France, December 1996: first 3-D solver for the CM-5 Connection Machine,
+! parallelized on 128 processors using Connection Machine Fortran
+!
+
+! in case of an acoustic medium, a potential Chi of (density * displacement) is used as in Chaljub and Valette,
+! Geophysical Journal International, vol. 158, p. 131-141 (2004) and *NOT* a velocity potential
+! as in Komatitsch and Tromp, Geophysical Journal International, vol. 150, p. 303-318 (2002).
+!
+! This permits acoustic-elastic coupling based on a non-iterative time scheme.
+! Displacement is then:
+! u = grad(Chi) / rho
+! Velocity is then:
+! v = grad(Chi_dot) / rho
+! (Chi_dot being the time derivative of Chi)
+! and pressure is:
+! p = - Chi_dot_dot
+! (Chi_dot_dot being the time second derivative of Chi).
+!
+! The source in an acoustic element is a pressure source.
+!
+! First-order acoustic-acoustic discontinuities are also handled automatically
+! because pressure is continuous at such an interface, therefore Chi_dot_dot
+! is continuous, therefore Chi is also continuous, which is consistent with
+! the spectral-element basis functions and with the assembling process.
+! This is the reason why a simple displacement potential u = grad(Chi) would
+! not work because it would be discontinuous at such an interface and would
+! therefore not be consistent with the basis functions.
+
+! ************** PROGRAM STARTS HERE **************
+
+ ! force Flush-To-Zero if available to avoid very slow Gradual Underflow trapping
+ call force_ftz()
+
+! reads in parameters
+ call initialize_simulation()
+
+
+! reads in external mesh
+ call read_mesh_databases()
+
+
+! sets up reference element GLL points/weights/derivatives
+ call setup_GLL_points()
+
+
+! detects surfaces
+ call detect_mesh_surfaces()
+
+
+! reads topography & bathymetry
+ call read_topography_bathymetry()
+
+
+! prepares sources and receivers
+ call setup_sources_receivers()
+
+
+! sets up and precomputes simulation arrays
+ call prepare_timerun()
+
+
+! steps through time iterations
+ call iterate_time()
+
+
+! saves last time frame and finishes kernel calculations
+ call finalize_simulation()
+
+ end subroutine specfem3D
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -42,14 +42,27 @@
implicit none
-! attenuation
- integer :: NSPEC_ATTENUATION_AB
+! parameters deduced from parameters read from file
+ integer :: NPROC
+ integer :: NSPEC_AB, NGLOB_AB
+! mesh parameters
+ integer, dimension(:,:,:,:), allocatable :: ibool
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: xstore,ystore,zstore
+
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz,jacobian
+
+! material properties
+ ! isotropic
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: kappastore,mustore
+
! CUDA mesh pointer<->integer wrapper
integer(kind=8) :: Mesh_pointer
+
! Global GPU toggle. Set in Par_file
logical :: GPU_MODE
-
+
! use integer array to store topography values
integer :: NX_TOPO,NY_TOPO
double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
@@ -70,17 +83,11 @@
integer, dimension(:), allocatable :: free_surface_ispec
integer :: num_free_surface_faces
-! mesh parameters
- integer, dimension(:,:,:,:), allocatable :: ibool
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: xstore,ystore,zstore
+! attenuation
+ integer :: NSPEC_ATTENUATION_AB
+ character(len=256) prname_Q
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz,jacobian
-! material properties
- ! isotropic
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: kappastore,mustore
-
! additional mass matrix for ocean load
real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_ocean_load
@@ -102,14 +109,15 @@
double precision, external :: comp_source_time_function
double precision :: t0
real(kind=CUSTOM_REAL) :: stf_used_total
- integer :: NSOURCES
+ integer :: NSOURCES,nsources_local
! receiver information
character(len=256) :: rec_filename,filtered_rec_filename,dummystring
integer :: nrec,nrec_local,nrec_tot_found
integer :: nrec_simulation
- integer, allocatable, dimension(:) :: islice_selected_rec,ispec_selected_rec,number_receiver_global
- double precision, allocatable, dimension(:) :: xi_receiver,eta_receiver,gamma_receiver
+ integer, dimension(:), allocatable :: islice_selected_rec,ispec_selected_rec
+ integer, dimension(:), allocatable :: number_receiver_global
+ double precision, dimension(:), allocatable :: xi_receiver,eta_receiver,gamma_receiver
double precision, dimension(:,:), allocatable :: hpxir_store,hpetar_store,hpgammar_store
! timing information for the stations
@@ -144,13 +152,11 @@
double precision, external :: wtime
double precision :: time_start
-! parameters read from parameter file
- integer :: NPROC_XI,NPROC_ETA
+! parameters
+ integer :: SIMULATION_TYPE
integer :: NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,UTM_PROJECTION_ZONE
- integer :: SIMULATION_TYPE
double precision :: DT
- double precision :: LATITUDE_MIN,LATITUDE_MAX,LONGITUDE_MIN,LONGITUDE_MAX
logical :: ATTENUATION,USE_OLSEN_ATTENUATION, &
OCEANS,TOPOGRAPHY,ABSORBING_CONDITIONS,ANISOTROPY
@@ -161,11 +167,11 @@
integer :: NTSTEP_BETWEEN_OUTPUT_INFO
- character(len=256) OUTPUT_FILES,LOCAL_PATH,prname,prname_Q
+! parameters read from mesh parameter file
+ integer :: NPROC_XI,NPROC_ETA
+ double precision :: LATITUDE_MIN,LATITUDE_MAX,LONGITUDE_MIN,LONGITUDE_MAX
-! parameters deduced from parameters read from file
- integer :: NPROC
- integer :: NSPEC_AB, NGLOB_AB
+ character(len=256) OUTPUT_FILES,LOCAL_PATH,prname
! names of the data files for all the processors in MPI
character(len=256) outputname
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -31,7 +31,7 @@
use specfem_par
use specfem_par_movie
use specfem_par_elastic
- use specfem_par_acoustic
+ use specfem_par_acoustic
implicit none
! gets resulting array values onto CPU
@@ -42,17 +42,17 @@
( MOVIE_SURFACE .and. mod(it,NTSTEP_BETWEEN_FRAMES) == 0) .or. &
( MOVIE_VOLUME .and. mod(it,NTSTEP_BETWEEN_FRAMES) == 0) .or. &
( PNM_GIF_IMAGE .and. mod(it,NTSTEP_BETWEEN_FRAMES) == 0) &
- ) ) then
+ ) ) then
! acoustic domains
if( ACOUSTIC_SIMULATION ) then
- ! transfers whole fields
- call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
+ ! transfers whole fields
+ call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
endif
! elastic domains
if( ELASTIC_SIMULATION ) then
- ! transfers whole fields
- call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ ! transfers whole fields
+ call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
endif
endif
@@ -1224,7 +1224,7 @@
open(unit=27,file=trim(LOCAL_PATH)//trim(outputname),status='unknown',form='unformatted',iostat=ier)
if( ier /= 0 ) stop 'error opening file movie output velocity z'
write(27) tmpdata
- close(27)
+ close(27)
endif
! norm of velocity
@@ -1235,9 +1235,9 @@
if( ier /= 0 ) stop 'error opening file movie output velocity z'
write(27) tmpdata
close(27)
-
+
deallocate(tmpdata)
-
+
endif
end subroutine wmo_movie_volume_output
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90 2011-10-29 00:36:16 UTC (rev 19128)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90 2011-10-30 02:25:28 UTC (rev 19129)
@@ -46,37 +46,39 @@
double precision :: stf
! gets resulting array values onto CPU
- if(GPU_MODE) then
- ! this transfers fields only in elements with stations for efficiency
- if( ACOUSTIC_SIMULATION ) then
- ! only copy corresponding elements to CPU host
- ! timing: Elapsed time: 5.230904e-04
- call transfer_station_fields_acoustic_from_device( &
- potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
- b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
- Mesh_pointer,number_receiver_global, &
- ispec_selected_rec,ispec_selected_source,ibool,SIMULATION_TYPE)
+ if(GPU_MODE) then
+ if( nrec_local > 0 ) then
+ ! this transfers fields only in elements with stations for efficiency
+ if( ACOUSTIC_SIMULATION ) then
+ ! only copy corresponding elements to CPU host
+ ! timing: Elapsed time: 5.230904e-04
+ call transfer_station_ac_from_device( &
+ potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
+ b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
+ Mesh_pointer,number_receiver_global, &
+ ispec_selected_rec,ispec_selected_source,ibool,SIMULATION_TYPE)
- ! alternative: transfers whole fields
- ! timing: Elapsed time: 4.138947e-03
- !call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
- ! potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
- endif
-
- ! this transfers fields only in elements with stations for efficiency
- if( ELASTIC_SIMULATION ) then
- call transfer_station_fields_from_device( &
- displ,veloc,accel, &
- b_displ,b_veloc,b_accel, &
- Mesh_pointer,number_receiver_global, &
- ispec_selected_rec,ispec_selected_source, &
- ibool,SIMULATION_TYPE)
+ ! alternative: transfers whole fields
+ ! timing: Elapsed time: 4.138947e-03
+ !call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+ ! potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
+ endif
- ! alternative: transfers whole fields
- ! call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ ! this transfers fields only in elements with stations for efficiency
+ if( ELASTIC_SIMULATION ) then
+ call transfer_station_el_from_device( &
+ displ,veloc,accel, &
+ b_displ,b_veloc,b_accel, &
+ Mesh_pointer,number_receiver_global, &
+ ispec_selected_rec,ispec_selected_source, &
+ ibool,SIMULATION_TYPE)
+
+ ! alternative: transfers whole fields
+ ! call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ endif
endif
endif
-
+
do irec_local = 1,nrec_local
! gets global number of that receiver
@@ -282,7 +284,7 @@
! write ONE binary file for all receivers (nrec_local) within one proc
! SU format, with 240-byte-header for each trace
if ((mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0 .or. it==NSTEP) .and. SU_FORMAT) &
- call write_seismograms_su()
+ call write_seismograms_su()
end subroutine write_seismograms
@@ -788,7 +790,7 @@
!=====================================================================
subroutine write_seismograms_su()
-
+
use specfem_par
use specfem_par_acoustic
use specfem_par_elastic
@@ -799,7 +801,7 @@
character(len=256) procname,final_LOCAL_PATH
integer :: irec_local,irec,ios
real :: x_station,y_station
-
+
! headers
integer,parameter :: nheader=240 ! 240 bytes
integer(kind=2) :: i2head(nheader/2) ! 2-byte-integer
@@ -846,7 +848,7 @@
i2head(58) =NSTEP
i2head(59) =DT*1.0d6
write(IOUT_SU,rec=irec_local) r4head, seismograms_d(1,irec_local,:)
- enddo
+ enddo
close(IOUT_SU)
! write seismograms (dy)
open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dy_SU' ,&
@@ -863,7 +865,7 @@
i2head(58) =NSTEP
i2head(59) =DT*1.0d6
write(IOUT_SU,rec=irec_local) r4head, seismograms_d(2,irec_local,:)
- enddo
+ enddo
close(IOUT_SU)
! write seismograms (dz)
@@ -881,7 +883,7 @@
i2head(58) =NSTEP
i2head(59) =DT*1.0d6
write(IOUT_SU,rec=irec_local) r4head, seismograms_d(3,irec_local,:)
- enddo
+ enddo
close(IOUT_SU)
end subroutine write_seismograms_su
More information about the CIG-COMMITS
mailing list