[cig-commits] r19145 - in seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER: examples/waterlayered_halfspace/in_data_files src/cuda src/generate_databases src/meshfem3D src/shared src/specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Wed Nov 2 20:11:45 PDT 2011
Author: danielpeter
Date: 2011-11-02 20:11:45 -0700 (Wed, 02 Nov 2011)
New Revision: 19145
Added:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
Modified:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/CMTSOLUTION
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/STATIONS
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/Makefile.in
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/meshfem3D/save_databases.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/write_VTK_data.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
Log:
adds mesh coloring option
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/CMTSOLUTION
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/CMTSOLUTION 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/CMTSOLUTION 2011-11-03 03:11:45 UTC (rev 19145)
@@ -3,7 +3,7 @@
time shift: 0.0000
half duration: 2.0
latitude: 67000.0
-longitude: 67000.0
+longitude: 66999.9
depth: 1.0
Mrr: 1.000000e+23
Mtt: 1.000000e+23
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file 2011-11-03 03:11:45 UTC (rev 19145)
@@ -13,8 +13,8 @@
NPROC = 4
# time step parameters
-NSTEP = 4500
-DT = 0.005
+NSTEP = 4000
+DT = 0.01
# parameters describing the model
OCEANS = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/STATIONS
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/STATIONS 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/STATIONS 2011-11-03 03:11:45 UTC (rev 19145)
@@ -1,5 +1,13 @@
X10 DB 67000.00 10767.86 0.0 100.0
X20 DB 67000.00 22732.14 0.0 100.0
X30 DB 67000.00 34696.43 0.0 100.0
+X40 DB 67000.00 46660.71 0.0 100.0
+X50 DB 67000.00 58625.00 0.0 100.0
+X50 DB 67000.00 67000.00 0.0 100.0
+X60 DB 67000.00 75375.00 0.0 100.0
+X70 DB 67000.00 87339.29 0.0 100.0
+X80 DB 67000.00 99303.57 0.0 100.0
+X90 DB 67000.00 111269.86 0.0 100.0
X1 DB 67000.00 0.0 0.0 100.0
-X2 DB 67000.00 67000.0 0.0 25000.0
\ No newline at end of file
+X2 DB 67000.00 67000.0 0.0 25000.0
+X3 DB 67000.00 134000.0 0.0 100.0
\ No newline at end of file
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu 2011-11-03 03:11:45 UTC (rev 19145)
@@ -239,159 +239,43 @@
/* ----------------------------------------------------------------------------------------------- */
-void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, int SIMULATION_TYPE);
+//void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, int SIMULATION_TYPE);
-__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_acoustic,
- int num_phase_ispec_acoustic, int d_iphase,
- float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
- float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
- float* d_gammax, float* d_gammay, float* d_gammaz,
- float* hprime_xx, float* hprime_yy, float* hprime_zz,
- float* hprimewgll_xx, float* hprimewgll_yy, float* hprimewgll_zz,
- float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
- float* d_rhostore);
+//__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_acoustic,
+// int num_phase_ispec_acoustic, int d_iphase,
+// float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
+// float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
+// float* d_gammax, float* d_gammay, float* d_gammaz,
+// float* hprime_xx, float* hprime_yy, float* hprime_zz,
+// float* hprimewgll_xx, float* hprimewgll_yy, float* hprimewgll_zz,
+// float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
+// float* d_rhostore);
-/* ----------------------------------------------------------------------------------------------- */
-// main compute_forces_acoustic CUDA routine
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(compute_forces_acoustic_cuda,
- COMPUTE_FORCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
- int* iphase,
- int* nspec_outer_acoustic,
- int* nspec_inner_acoustic,
- int* SIMULATION_TYPE) {
-
-TRACE("compute_forces_acoustic_cuda");
- //double start_time = get_time();
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-
- int num_elements;
-
- if( *iphase == 1 )
- num_elements = *nspec_outer_acoustic;
- else
- num_elements = *nspec_inner_acoustic;
-
- if( num_elements == 0 ) return;
-
- //int myrank;
- /* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
- /* if(myrank==0) { */
-
- Kernel_2_acoustic(num_elements, mp, *iphase, *SIMULATION_TYPE);
-
- //cudaThreadSynchronize();
-
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- /* MPI_Barrier(MPI_COMM_WORLD); */
- //double end_time = get_time();
- //printf("Elapsed time: %e\n",end_time-start_time);
-//#endif
-}
-
-
-/* ----------------------------------------------------------------------------------------------- */
-
/* KERNEL 2 */
/* ----------------------------------------------------------------------------------------------- */
-void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, int SIMULATION_TYPE)
-{
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("before acoustic kernel Kernel 2");
-#endif
+__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,
+ int NGLOB, int* d_ibool,
+ int* d_phase_ispec_inner_acoustic,
+ int num_phase_ispec_acoustic,
+ int d_iphase,
+ int use_mesh_coloring_gpu,
+ float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
+ float* d_xix, float* d_xiy, float* d_xiz,
+ float* d_etax, float* d_etay, float* d_etaz,
+ float* d_gammax, float* d_gammay, float* d_gammaz,
+ float* hprime_xx, float* hprime_yy, float* hprime_zz,
+ float* hprimewgll_xx, float* hprimewgll_yy, float* hprimewgll_zz,
+ float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
+ float* d_rhostore){
- /* if the grid can handle the number of blocks, we let it be 1D */
- /* grid_2_x = nb_elem_color; */
- /* nb_elem_color is just how many blocks we are computing now */
-
- int num_blocks_x = nb_blocks_to_compute;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- int threads_2 = 128;//BLOCK_SIZE_K2;
- dim3 grid_2(num_blocks_x,num_blocks_y);
-
-
- // Cuda timing
- // cudaEvent_t start, stop;
- // float time;
- // cudaEventCreate(&start);
- // cudaEventCreate(&stop);
- // cudaEventRecord( start, 0 );
-
- Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
- mp->NGLOB_AB, mp->d_ibool,
- mp->d_phase_ispec_inner_acoustic,
- mp->num_phase_ispec_acoustic, d_iphase,
- mp->d_potential_acoustic, mp->d_potential_dot_dot_acoustic,
- mp->d_xix, mp->d_xiy, mp->d_xiz,
- mp->d_etax, mp->d_etay, mp->d_etaz,
- mp->d_gammax, mp->d_gammay, mp->d_gammaz,
- mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
- mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
- mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
- mp->d_rhostore);
-
- if(SIMULATION_TYPE == 3) {
- Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
- mp->NGLOB_AB, mp->d_ibool,
- mp->d_phase_ispec_inner_acoustic,
- mp->num_phase_ispec_acoustic, d_iphase,
- mp->d_b_potential_acoustic, mp->d_b_potential_dot_dot_acoustic,
- mp->d_xix, mp->d_xiy, mp->d_xiz,
- mp->d_etax, mp->d_etay, mp->d_etaz,
- mp->d_gammax, mp->d_gammay, mp->d_gammaz,
- mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
- mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
- mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
- mp->d_rhostore);
- }
-
- // cudaEventRecord( stop, 0 );
- // cudaEventSynchronize( stop );
- // cudaEventElapsedTime( &time, start, stop );
- // cudaEventDestroy( start );
- // cudaEventDestroy( stop );
- // printf("Kernel2 Execution Time: %f ms\n",time);
-
- /* cudaThreadSynchronize(); */
- /* TRACE("Kernel 2 finished"); */
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //printf("Tried to start with %dx1 blocks\n",nb_blocks_to_compute);
- exit_on_cuda_error("kernel Kernel_2");
-#endif
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-/* KERNEL 2 on device*/
-
-/* ----------------------------------------------------------------------------------------------- */
-
-
-__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
- int* d_phase_ispec_inner_acoustic,
- int num_phase_ispec_acoustic, int d_iphase,
- float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
- float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
- float* d_gammax, float* d_gammay, float* d_gammaz,
- float* hprime_xx, float* hprime_yy, float* hprime_zz,
- float* hprimewgll_xx, float* hprimewgll_yy, float* hprimewgll_zz,
- float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
- float* d_rhostore){
-
int bx = blockIdx.y*gridDim.x+blockIdx.x;
int tx = threadIdx.x;
@@ -428,16 +312,27 @@
// copy from global memory to shared memory
// each thread writes one of the NGLL^3 = 125 data points
if (active) {
- // iphase-1 and working_element-1 for Fortran->C array conventions
- working_element = d_phase_ispec_inner_acoustic[bx + num_phase_ispec_acoustic*(d_iphase-1)]-1;
+
+#ifdef USE_MESH_COLORING_GPU
+ working_element = bx;
+#else
+ //mesh coloring
+ if( use_mesh_coloring_gpu ){
+ working_element = bx;
+ }else{
+ // iphase-1 and working_element-1 for Fortran->C array conventions
+ working_element = d_phase_ispec_inner_acoustic[bx + num_phase_ispec_acoustic*(d_iphase-1)]-1;
+ }
+#endif
+
// iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
iglob = d_ibool[working_element*125 + tx]-1;
#ifdef USE_TEXTURES
- s_dummy_loc[tx] = tex1Dfetch(tex_potential_acoustic, iglob);
+ s_dummy_loc[tx] = tex1Dfetch(tex_potential_acoustic, iglob);
#else
// changing iglob indexing to match fortran row changes fast style
- s_dummy_loc[tx] = d_potential_acoustic[iglob];
+ s_dummy_loc[tx] = d_potential_acoustic[iglob];
#endif
}
@@ -589,14 +484,25 @@
d_potential_dot_dot_acoustic[iglob] = tex1Dfetch(tex_potential_dot_dot_acoustic, iglob)
- (fac1*temp1l + fac2*temp2l + fac3*temp3l);
#else
- /* OLD version that uses coloring to get around race condition. About 1.6x faster */
- // d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
- // d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
- // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+#ifdef USE_MESH_COLORING_GPU
+ // no atomic operation needed, colors don't share global points between elements
+ d_potential_dot_dot_acoustic[iglob] -= (fac1*temp1l + fac2*temp2l + fac3*temp3l);
+#else
+ //mesh coloring
+ if( use_mesh_coloring_gpu ){
+
+ // no atomic operation needed, colors don't share global points between elements
+ d_potential_dot_dot_acoustic[iglob] -= (fac1*temp1l + fac2*temp2l + fac3*temp3l);
+
+ }else{
+
atomicAdd(&d_potential_dot_dot_acoustic[iglob],-(fac1*temp1l + fac2*temp2l + fac3*temp3l));
+ }
#endif
+
+#endif
}
#else // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
@@ -607,6 +513,220 @@
/* ----------------------------------------------------------------------------------------------- */
+void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
+ int SIMULATION_TYPE,
+ int* d_ibool,
+ float* d_xix,
+ float* d_xiy,
+ float* d_xiz,
+ float* d_etax,
+ float* d_etay,
+ float* d_etaz,
+ float* d_gammax,
+ float* d_gammay,
+ float* d_gammaz,
+ float* d_rhostore)
+{
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("before acoustic kernel Kernel 2");
+#endif
+
+ /* if the grid can handle the number of blocks, we let it be 1D */
+ /* grid_2_x = nb_elem_color; */
+ /* nb_elem_color is just how many blocks we are computing now */
+
+ int num_blocks_x = nb_blocks_to_compute;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ int threads_2 = 128;//BLOCK_SIZE_K2;
+ dim3 grid_2(num_blocks_x,num_blocks_y);
+
+
+ // Cuda timing
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+
+ Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
+ mp->NGLOB_AB,
+ d_ibool,
+ mp->d_phase_ispec_inner_acoustic,
+ mp->num_phase_ispec_acoustic,
+ d_iphase,
+ mp->use_mesh_coloring_gpu,
+ mp->d_potential_acoustic, mp->d_potential_dot_dot_acoustic,
+ d_xix, d_xiy, d_xiz,
+ d_etax, d_etay, d_etaz,
+ d_gammax, d_gammay, d_gammaz,
+ mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
+ mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
+ mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+ d_rhostore);
+
+ if(SIMULATION_TYPE == 3) {
+ Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
+ mp->NGLOB_AB,
+ d_ibool,
+ mp->d_phase_ispec_inner_acoustic,
+ mp->num_phase_ispec_acoustic,
+ d_iphase,
+ mp->use_mesh_coloring_gpu,
+ mp->d_b_potential_acoustic, mp->d_b_potential_dot_dot_acoustic,
+ d_xix, d_xiy, d_xiz,
+ d_etax, d_etay, d_etaz,
+ d_gammax, d_gammay, d_gammaz,
+ mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
+ mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
+ mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+ d_rhostore);
+ }
+
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("Kernel2 Execution Time: %f ms\n",time);
+
+ /* cudaThreadSynchronize(); */
+ /* TRACE("Kernel 2 finished"); */
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("Tried to start with %dx1 blocks\n",nb_blocks_to_compute);
+ exit_on_cuda_error("kernel Kernel_2");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// main compute_forces_acoustic CUDA routine
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_forces_acoustic_cuda,
+ COMPUTE_FORCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+ int* iphase,
+ int* nspec_outer_acoustic,
+ int* nspec_inner_acoustic,
+ int* SIMULATION_TYPE) {
+
+ TRACE("compute_forces_acoustic_cuda");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ int num_elements;
+
+ if( *iphase == 1 )
+ num_elements = *nspec_outer_acoustic;
+ else
+ num_elements = *nspec_inner_acoustic;
+
+ if( num_elements == 0 ) return;
+
+ //int myrank;
+ /* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
+ /* if(myrank==0) { */
+
+ // mesh coloring
+ if( mp->use_mesh_coloring_gpu ){
+
+ // note: array offsets require sorted arrays, such that e.g. ibool starts with elastic elements
+ // and followed by acoustic ones.
+ // acoustic elements also start with outer than inner element ordering
+
+ int nb_colors,nb_blocks_to_compute;
+ int istart;
+ int color_offset,color_offset_nonpadded;
+
+ // sets up color loop
+ if( *iphase == 1 ){
+ // outer elements
+ nb_colors = mp->num_colors_outer_acoustic;
+ istart = 0;
+
+ // array offsets (acoustic elements start after elastic ones)
+ color_offset = mp->nspec_elastic * NGLL3_PADDED;
+ color_offset_nonpadded = mp->nspec_elastic * NGLL3_NONPADDED;
+ }else{
+ // inner element colors (start after outer elements)
+ nb_colors = mp->num_colors_outer_acoustic + mp->num_colors_inner_acoustic;
+ istart = mp->num_colors_outer_acoustic;
+
+ // array offsets (inner elements start after outer ones)
+ color_offset = ( mp->nspec_elastic + (*nspec_outer_acoustic) ) * NGLL3_PADDED;
+ color_offset_nonpadded = ( mp->nspec_elastic + (*nspec_outer_acoustic) ) * NGLL3_NONPADDED;
+ }
+
+ // loops over colors
+ for(int icolor = istart; icolor < nb_colors; icolor++){
+
+ nb_blocks_to_compute = mp->h_num_elem_colors_acoustic[icolor];
+
+ // checks
+ //if( nb_blocks_to_compute <= 0 ){
+ // printf("error number of acoustic color blocks: %d -- color = %d \n",nb_blocks_to_compute,icolor);
+ // exit(EXIT_FAILURE);
+ //}
+
+ Kernel_2_acoustic(nb_blocks_to_compute,mp,*iphase,
+ *SIMULATION_TYPE,
+ mp->d_ibool + color_offset_nonpadded,
+ mp->d_xix + color_offset,
+ mp->d_xiy + color_offset,
+ mp->d_xiz + color_offset,
+ mp->d_etax + color_offset,
+ mp->d_etay + color_offset,
+ mp->d_etaz + color_offset,
+ mp->d_gammax + color_offset,
+ mp->d_gammay + color_offset,
+ mp->d_gammaz + color_offset,
+ mp->d_rhostore + color_offset);
+
+ // for padded and aligned arrays
+ color_offset += nb_blocks_to_compute * NGLL3_PADDED;
+ // for no-aligned arrays
+ color_offset_nonpadded += nb_blocks_to_compute * NGLL3_NONPADDED;
+ }
+
+ }else{
+
+ // no mesh coloring: uses atomic updates
+
+ Kernel_2_acoustic(num_elements, mp, *iphase,
+ *SIMULATION_TYPE,
+ mp->d_ibool,
+ mp->d_xix,
+ mp->d_xiy,
+ mp->d_xiz,
+ mp->d_etax,
+ mp->d_etay,
+ mp->d_etaz,
+ mp->d_gammax,
+ mp->d_gammay,
+ mp->d_gammaz,
+ mp->d_rhostore);
+
+ }
+
+ //cudaThreadSynchronize();
+
+ //#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ /* MPI_Barrier(MPI_COMM_WORLD); */
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ //#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
/* KERNEL 3 */
/* ----------------------------------------------------------------------------------------------- */
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu 2011-11-03 03:11:45 UTC (rev 19145)
@@ -51,30 +51,28 @@
__constant__ float d_wgllwgll_yz[NGLL2];
-void Kernel_2(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
- int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE,int ATTENUATION);
-
+//void Kernel_2(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
+// int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE,int ATTENUATION);
//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic,
// int num_phase_ispec_elastic, int d_iphase, int* d_ibool);
+//__global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
+// int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,
+// float* d_displ, float* d_accel,
+// float* d_xix, float* d_xiy, float* d_xiz,
+// float* d_etax, float* d_etay, float* d_etaz,
+// float* d_gammax, float* d_gammay, float* d_gammaz,
+// float* d_kappav, float* d_muv,
+// //float* d_debug,
+// int COMPUTE_AND_STORE_STRAIN,
+// float* epsilondev_xx,float* epsilondev_yy,float* epsilondev_xy,
+// float* epsilondev_xz,float* epsilondev_yz,float* epsilon_trace_over_3,
+// int SIMULATION_TYPE,
+// int ATTENUATION,int NSPEC,
+// float* one_minus_sum_beta,float* factor_common,
+// float* R_xx,float* R_yy,float* R_xy,float* R_xz,float* R_yz,
+// float* alphaval,float* betaval,float* gammaval);
-__global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
- int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,
- float* d_displ, float* d_accel,
- float* d_xix, float* d_xiy, float* d_xiz,
- float* d_etax, float* d_etay, float* d_etaz,
- float* d_gammax, float* d_gammay, float* d_gammaz,
- float* d_kappav, float* d_muv,
- //float* d_debug,
- int COMPUTE_AND_STORE_STRAIN,
- float* epsilondev_xx,float* epsilondev_yy,float* epsilondev_xy,
- float* epsilondev_xz,float* epsilondev_yz,float* epsilon_trace_over_3,
- int SIMULATION_TYPE,
- int ATTENUATION,int NSPEC,
- float* one_minus_sum_beta,float* factor_common,
- float* R_xx,float* R_yy,float* R_xy,float* R_xz,float* R_yz,
- float* alphaval,float* betaval,float* gammaval);
-
/* ----------------------------------------------------------------------------------------------- */
// prepares a device array with with all inter-element edge-nodes -- this
@@ -173,9 +171,10 @@
/* ----------------------------------------------------------------------------------------------- */
__global__ void assemble_boundary_accel_on_device(float* d_accel, float* d_send_accel_buffer,
- int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
- int* d_nibool_interfaces_ext_mesh,
- int* d_ibool_interfaces_ext_mesh) {
+ int num_interfaces_ext_mesh,
+ int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
//int bx = blockIdx.y*gridDim.x+blockIdx.x;
@@ -183,7 +182,7 @@
int iinterface=0;
for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
- if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
+ if(id < d_nibool_interfaces_ext_mesh[iinterface]) {
// for testing atomic operations against not atomic operations (0.1ms vs. 0.04 ms)
// d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)] +=
@@ -228,7 +227,7 @@
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
cudaMemcpy(mp->d_send_accel_buffer, buffer_recv_vector_ext_mesh,
- 3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(realw), cudaMemcpyHostToDevice);
+ 3*(*max_nibool_interfaces_ext_mesh)*(*num_interfaces_ext_mesh)*sizeof(realw), cudaMemcpyHostToDevice);
int blocksize = 256;
int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
@@ -278,49 +277,34 @@
/* ----------------------------------------------------------------------------------------------- */
+// KERNEL 2
-extern "C"
-void FC_FUNC_(compute_forces_elastic_cuda,
- COMPUTE_FORCES_ELASTIC_CUDA)(long* Mesh_pointer_f,
- int* iphase,
- int* nspec_outer_elastic,
- int* nspec_inner_elastic,
- int* SIMULATION_TYPE,
- int* COMPUTE_AND_STORE_STRAIN,
- int* ATTENUATION) {
+/* ----------------------------------------------------------------------------------------------- */
-TRACE("compute_forces_elastic_cuda");
-// EPIK_TRACER("compute_forces_elastic_cuda");
-//printf("Running compute_forces\n");
- //double start_time = get_time();
+/* ----------------------------------------------------------------------------------------------- */
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic,
+// int num_phase_ispec_elastic, int d_iphase, int* d_ibool) {
+// int bx = blockIdx.x;
+// int tx = threadIdx.x;
+// int working_element;
+// //int ispec;
+// //int NGLL3_ALIGN = 128;
+// if(tx==0 && bx==0) {
+//
+// d_debug_output[0] = 420.0;
+//
+// d_debug_output[2] = num_phase_ispec_elastic;
+// d_debug_output[3] = d_iphase;
+// working_element = d_phase_ispec_inner_elastic[bx + num_phase_ispec_elastic*(d_iphase-1)]-1;
+// d_debug_output[4] = working_element;
+// d_debug_output[5] = d_phase_ispec_inner_elastic[0];
+// /* d_debug_output[1] = d_ibool[working_element*NGLL3_ALIGN + tx]-1; */
+// }
+// /* d_debug_output[1+tx+128*bx] = 69.0; */
+//
+//}
- int num_elements;
-
- if( *iphase == 1 )
- num_elements = *nspec_outer_elastic;
- else
- num_elements = *nspec_inner_elastic;
-
- if( num_elements == 0 ) return;
-
- //int myrank;
- /* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
- /* if(myrank==0) { */
-
- Kernel_2(num_elements,mp,*iphase,*COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE,*ATTENUATION);
-
- //cudaThreadSynchronize();
-
-//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-/* MPI_Barrier(MPI_COMM_WORLD); */
- //double end_time = get_time();
- //printf("Elapsed time: %e\n",end_time-start_time);
-//#endif
-}
-
-
/* ----------------------------------------------------------------------------------------------- */
// updates stress
@@ -434,168 +418,9 @@
return;
}
-/* ----------------------------------------------------------------------------------------------- */
-// KERNEL 2
-
/* ----------------------------------------------------------------------------------------------- */
-void Kernel_2(int nb_blocks_to_compute,
- Mesh* mp,
- int d_iphase,
- int COMPUTE_AND_STORE_STRAIN,
- int SIMULATION_TYPE,
- int ATTENUATION){
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("before kernel Kernel 2");
-#endif
-
- /* if the grid can handle the number of blocks, we let it be 1D */
- /* grid_2_x = nb_elem_color; */
- /* nb_elem_color is just how many blocks we are computing now */
-
- int num_blocks_x = nb_blocks_to_compute;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- //int threads_2 = 128;//BLOCK_SIZE_K2;
- //dim3 grid_2(num_blocks_x,num_blocks_y);
-
- int blocksize = 128;
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- // debugging
- //printf("Starting with grid %dx%d for %d blocks\n",num_blocks_x,num_blocks_y,nb_blocks_to_compute);
-// float* d_debug;
-// float* h_debug;
-// h_debug = (float*)calloc(128,sizeof(float));
-// cudaMalloc((void**)&d_debug,128*sizeof(float));
-// cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-
- // Cuda timing
- // cudaEvent_t start, stop;
- // float time;
- // cudaEventCreate(&start);
- // cudaEventCreate(&stop);
- // cudaEventRecord( start, 0 );
-
- //Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
- Kernel_2_impl<<<grid,threads>>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
- mp->d_phase_ispec_inner_elastic,
- mp->num_phase_ispec_elastic,
- d_iphase,
- mp->d_displ, mp->d_accel,
- mp->d_xix, mp->d_xiy, mp->d_xiz,
- mp->d_etax, mp->d_etay, mp->d_etaz,
- mp->d_gammax, mp->d_gammay, mp->d_gammaz,
- mp->d_kappav, mp->d_muv,
- //d_debug,
- COMPUTE_AND_STORE_STRAIN,
- mp->d_epsilondev_xx,
- mp->d_epsilondev_yy,
- mp->d_epsilondev_xy,
- mp->d_epsilondev_xz,
- mp->d_epsilondev_yz,
- mp->d_epsilon_trace_over_3,
- SIMULATION_TYPE,
- ATTENUATION,mp->NSPEC_AB,
- mp->d_one_minus_sum_beta,mp->d_factor_common,
- mp->d_R_xx,mp->d_R_yy,mp->d_R_xy,mp->d_R_xz,mp->d_R_yz,
- mp->d_alphaval,mp->d_betaval,mp->d_gammaval
- );
-
-
- // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- // int procid;
- // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- // if(procid==0) {
- // for(int i=0;i<17;i++) {
- // printf("cudadebug[%d] = %e\n",i,h_debug[i]);
- // }
- // }
-// free(h_debug);
-// cudaFree(d_debug);
-// #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-// exit_on_cuda_error("Kernel_2_impl");
-// #endif
-
- if(SIMULATION_TYPE == 3) {
- //Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
- Kernel_2_impl<<< grid,threads>>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
- mp->d_phase_ispec_inner_elastic,
- mp->num_phase_ispec_elastic,
- d_iphase,
- mp->d_b_displ, mp->d_b_accel,
- mp->d_xix, mp->d_xiy, mp->d_xiz,
- mp->d_etax, mp->d_etay, mp->d_etaz,
- mp->d_gammax, mp->d_gammay, mp->d_gammaz,
- mp->d_kappav, mp->d_muv,
- //d_debug,
- COMPUTE_AND_STORE_STRAIN,
- mp->d_b_epsilondev_xx,
- mp->d_b_epsilondev_yy,
- mp->d_b_epsilondev_xy,
- mp->d_b_epsilondev_xz,
- mp->d_b_epsilondev_yz,
- mp->d_b_epsilon_trace_over_3,
- SIMULATION_TYPE,
- ATTENUATION,mp->NSPEC_AB,
- mp->d_one_minus_sum_beta,mp->d_factor_common,
- mp->d_b_R_xx,mp->d_b_R_yy,mp->d_b_R_xy,mp->d_b_R_xz,mp->d_b_R_yz,
- mp->d_b_alphaval,mp->d_b_betaval,mp->d_b_gammaval
- );
- }
-
- // cudaEventRecord( stop, 0 );
- // cudaEventSynchronize( stop );
- // cudaEventElapsedTime( &time, start, stop );
- // cudaEventDestroy( start );
- // cudaEventDestroy( stop );
- // printf("Kernel2 Execution Time: %f ms\n",time);
-
- // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- // for(int i=0;i<10;i++) {
- // printf("debug[%d]=%e\n",i,h_debug[i]);
- // }
-
- /* cudaThreadSynchronize(); */
- /* LOG("Kernel 2 finished"); */
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("Kernel_2_impl ");
-#endif
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-//__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic,
-// int num_phase_ispec_elastic, int d_iphase, int* d_ibool) {
-// int bx = blockIdx.x;
-// int tx = threadIdx.x;
-// int working_element;
-// //int ispec;
-// //int NGLL3_ALIGN = 128;
-// if(tx==0 && bx==0) {
-//
-// d_debug_output[0] = 420.0;
-//
-// d_debug_output[2] = num_phase_ispec_elastic;
-// d_debug_output[3] = d_iphase;
-// working_element = d_phase_ispec_inner_elastic[bx + num_phase_ispec_elastic*(d_iphase-1)]-1;
-// d_debug_output[4] = working_element;
-// d_debug_output[5] = d_phase_ispec_inner_elastic[0];
-// /* d_debug_output[1] = d_ibool[working_element*NGLL3_ALIGN + tx]-1; */
-// }
-// /* d_debug_output[1+tx+128*bx] = 69.0; */
-//
-//}
-
-/* ----------------------------------------------------------------------------------------------- */
-
// double precision temporary variables leads to 10% performance
// decrease in Kernel_2_impl (not very much..)
//typedef float reald;
@@ -604,8 +429,12 @@
// #define MANUALLY_UNROLLED_LOOPS
-__global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
- int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,
+__global__ void Kernel_2_impl(int nb_blocks_to_compute,
+ int NGLOB,
+ int* d_ibool,
+ int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic,
+ int d_iphase,
+ int use_mesh_coloring_gpu,
float* d_displ, float* d_accel,
float* d_xix, float* d_xiy, float* d_xiz,
float* d_etax, float* d_etay, float* d_etaz,
@@ -617,7 +446,8 @@
float* epsilondev_xz,float* epsilondev_yz,
float* epsilon_trace_over_3,
int SIMULATION_TYPE,
- int ATTENUATION, int NSPEC,
+ int ATTENUATION,
+ int NSPEC,
float* one_minus_sum_beta,float* factor_common,
float* R_xx, float* R_yy, float* R_xy, float* R_xz, float* R_yz,
float* alphaval,float* betaval,float* gammaval
@@ -676,8 +506,19 @@
// copy from global memory to shared memory
// each thread writes one of the NGLL^3 = 125 data points
if (active) {
- // iphase-1 and working_element-1 for Fortran->C array conventions
- working_element = d_phase_ispec_inner_elastic[bx + num_phase_ispec_elastic*(d_iphase-1)]-1;
+
+#ifdef USE_MESH_COLORING_GPU
+ working_element = bx;
+#else
+ //mesh coloring
+ if( use_mesh_coloring_gpu ){
+ working_element = bx;
+ }else{
+ // iphase-1 and working_element-1 for Fortran->C array conventions
+ working_element = d_phase_ispec_inner_elastic[bx + num_phase_ispec_elastic*(d_iphase-1)]-1;
+ }
+#endif
+
// iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
iglob = d_ibool[working_element*125 + tx]-1;
@@ -826,8 +667,6 @@
duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l;
duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l;
-
-
duxdxl_plus_duydyl = duxdxl + duydyl;
duxdxl_plus_duzdzl = duxdxl + duzdzl;
duydyl_plus_duzdzl = duydyl + duzdzl;
@@ -945,14 +784,14 @@
tempy3l += s_tempy3[offset]*fac3;
tempz3l += s_tempz3[offset]*fac3;
- if(working_element == 169)
- if(l==0)
- if(I+J+K == 0) {
+ //if(working_element == 169)
+ // if(l==0)
+ // if(I+J+K == 0) {
// atomicAdd(&d_debug[0],1.0);
// d_debug[0] = fac3;
// d_debug[1] = offset;
// d_debug[2] = s_tempz3[offset];
- }
+ // }
}
#else
@@ -1022,30 +861,47 @@
d_accel[iglob + 2*NGLOB] = tex1Dfetch(tex_accel, iglob + 2*NGLOB) - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
#else
/* OLD/To be implemented version that uses coloring to get around race condition. About 1.6x faster */
- // d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
- // d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
- // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
- //if(iglob*3+2 == 41153) {
- // int ot = d_debug[5];
- // d_debug[0+1+ot] = d_accel[iglob*3+2];
- // // d_debug[1+1+ot] = fac1*tempz1l;
- // // d_debug[2+1+ot] = fac2*tempz2l;
- // // d_debug[3+1+ot] = fac3*tempz3l;
- // d_debug[1+1+ot] = fac1;
- // d_debug[2+1+ot] = fac2;
- // d_debug[3+1+ot] = fac3;
- // d_debug[4+1+ot] = d_accel[iglob*3+2]-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
- // atomicAdd(&d_debug[0],1.0);
- // d_debug[6+ot] = d_displ[iglob*3+2];
- //}
- atomicAdd(&d_accel[iglob*3],-(fac1*tempx1l + fac2*tempx2l + fac3*tempx3l));
- atomicAdd(&d_accel[iglob*3+1],-(fac1*tempy1l + fac2*tempy2l + fac3*tempy3l));
- atomicAdd(&d_accel[iglob*3+2],-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l));
+#ifdef USE_MESH_COLORING_GPU
+ // no atomic operation needed, colors don't share global points between elements
+ d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
+ d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
+ d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+#else
+ //mesh coloring
+ if( use_mesh_coloring_gpu ){
+ // no atomic operation needed, colors don't share global points between elements
+ d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
+ d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
+ d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+
+
+ }else{
+
+ //if(iglob*3+2 == 41153) {
+ // int ot = d_debug[5];
+ // d_debug[0+1+ot] = d_accel[iglob*3+2];
+ // // d_debug[1+1+ot] = fac1*tempz1l;
+ // // d_debug[2+1+ot] = fac2*tempz2l;
+ // // d_debug[3+1+ot] = fac3*tempz3l;
+ // d_debug[1+1+ot] = fac1;
+ // d_debug[2+1+ot] = fac2;
+ // d_debug[3+1+ot] = fac3;
+ // d_debug[4+1+ot] = d_accel[iglob*3+2]-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+ // atomicAdd(&d_debug[0],1.0);
+ // d_debug[6+ot] = d_displ[iglob*3+2];
+ //}
+
+ atomicAdd(&d_accel[iglob*3],-(fac1*tempx1l + fac2*tempx2l + fac3*tempx3l));
+ atomicAdd(&d_accel[iglob*3+1],-(fac1*tempy1l + fac2*tempy2l + fac3*tempy3l));
+ atomicAdd(&d_accel[iglob*3+2],-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l));
+ }
#endif
+#endif
+
// update memory variables based upon the Runge-Kutta scheme
if( ATTENUATION ){
compute_element_att_memory(tx,working_element,NSPEC,
@@ -1079,6 +935,359 @@
/* ----------------------------------------------------------------------------------------------- */
+void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,
+ int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE,int ATTENUATION,
+ int* d_ibool,
+ float* d_xix,
+ float* d_xiy,
+ float* d_xiz,
+ float* d_etax,
+ float* d_etay,
+ float* d_etaz,
+ float* d_gammax,
+ float* d_gammay,
+ float* d_gammaz,
+ float* d_kappav,
+ float* d_muv,
+ float* d_epsilondev_xx,
+ float* d_epsilondev_yy,
+ float* d_epsilondev_xy,
+ float* d_epsilondev_xz,
+ float* d_epsilondev_yz,
+ float* d_epsilon_trace_over_3,
+ float* d_one_minus_sum_beta,
+ float* d_factor_common,
+ float* d_R_xx,
+ float* d_R_yy,
+ float* d_R_xy,
+ float* d_R_xz,
+ float* d_R_yz,
+ float* d_b_epsilondev_xx,
+ float* d_b_epsilondev_yy,
+ float* d_b_epsilondev_xy,
+ float* d_b_epsilondev_xz,
+ float* d_b_epsilondev_yz,
+ float* d_b_epsilon_trace_over_3,
+ float* d_b_R_xx,
+ float* d_b_R_yy,
+ float* d_b_R_xy,
+ float* d_b_R_xz,
+ float* d_b_R_yz){
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("before kernel Kernel 2");
+#endif
+
+ /* if the grid can handle the number of blocks, we let it be 1D */
+ /* grid_2_x = nb_elem_color; */
+ /* nb_elem_color is just how many blocks we are computing now */
+
+ int num_blocks_x = nb_blocks_to_compute;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ //int threads_2 = 128;//BLOCK_SIZE_K2;
+ //dim3 grid_2(num_blocks_x,num_blocks_y);
+
+ int blocksize = 128;
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // debugging
+ //printf("Starting with grid %dx%d for %d blocks\n",num_blocks_x,num_blocks_y,nb_blocks_to_compute);
+ // float* d_debug;
+ // float* h_debug;
+ // h_debug = (float*)calloc(128,sizeof(float));
+ // cudaMalloc((void**)&d_debug,128*sizeof(float));
+ // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
+
+ // Cuda timing
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+
+ //Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+ Kernel_2_impl<<<grid,threads>>>(nb_blocks_to_compute,
+ mp->NGLOB_AB,
+ d_ibool,
+ mp->d_phase_ispec_inner_elastic,
+ mp->num_phase_ispec_elastic,
+ d_iphase,
+ mp->use_mesh_coloring_gpu,
+ mp->d_displ, mp->d_accel,
+ d_xix, d_xiy, d_xiz,
+ d_etax, d_etay, d_etaz,
+ d_gammax, d_gammay, d_gammaz,
+ d_kappav, d_muv,
+ //d_debug,
+ COMPUTE_AND_STORE_STRAIN,
+ d_epsilondev_xx,
+ d_epsilondev_yy,
+ d_epsilondev_xy,
+ d_epsilondev_xz,
+ d_epsilondev_yz,
+ d_epsilon_trace_over_3,
+ SIMULATION_TYPE,
+ ATTENUATION,mp->NSPEC_AB,
+ d_one_minus_sum_beta,
+ d_factor_common,
+ d_R_xx,d_R_yy,d_R_xy,d_R_xz,d_R_yz,
+ mp->d_alphaval,mp->d_betaval,mp->d_gammaval
+ );
+
+
+ // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ // int procid;
+ // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ // if(procid==0) {
+ // for(int i=0;i<17;i++) {
+ // printf("cudadebug[%d] = %e\n",i,h_debug[i]);
+ // }
+ // }
+ // free(h_debug);
+ // cudaFree(d_debug);
+ // #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ // exit_on_cuda_error("Kernel_2_impl");
+ // #endif
+
+ if(SIMULATION_TYPE == 3) {
+ //Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+ Kernel_2_impl<<< grid,threads>>>(nb_blocks_to_compute,
+ mp->NGLOB_AB,
+ d_ibool,
+ mp->d_phase_ispec_inner_elastic,
+ mp->num_phase_ispec_elastic,
+ d_iphase,
+ mp->use_mesh_coloring_gpu,
+ mp->d_b_displ, mp->d_b_accel,
+ d_xix, d_xiy, d_xiz,
+ d_etax, d_etay, d_etaz,
+ d_gammax, d_gammay, d_gammaz,
+ d_kappav, d_muv,
+ //d_debug,
+ COMPUTE_AND_STORE_STRAIN,
+ d_b_epsilondev_xx,
+ d_b_epsilondev_yy,
+ d_b_epsilondev_xy,
+ d_b_epsilondev_xz,
+ d_b_epsilondev_yz,
+ d_b_epsilon_trace_over_3,
+ SIMULATION_TYPE,
+ ATTENUATION,mp->NSPEC_AB,
+ d_one_minus_sum_beta,
+ d_factor_common,
+ d_b_R_xx,d_b_R_yy,d_b_R_xy,d_b_R_xz,d_b_R_yz,
+ mp->d_b_alphaval,mp->d_b_betaval,mp->d_b_gammaval
+ );
+ }
+
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("Kernel2 Execution Time: %f ms\n",time);
+
+ // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ // for(int i=0;i<10;i++) {
+ // printf("debug[%d]=%e\n",i,h_debug[i]);
+ // }
+
+ /* cudaThreadSynchronize(); */
+ /* LOG("Kernel 2 finished"); */
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("Kernel_2_impl ");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(compute_forces_elastic_cuda,
+ COMPUTE_FORCES_ELASTIC_CUDA)(long* Mesh_pointer_f,
+ int* iphase,
+ int* nspec_outer_elastic,
+ int* nspec_inner_elastic,
+ int* SIMULATION_TYPE,
+ int* COMPUTE_AND_STORE_STRAIN,
+ int* ATTENUATION) {
+
+ TRACE("compute_forces_elastic_cuda");
+ // EPIK_TRACER("compute_forces_elastic_cuda");
+ //printf("Running compute_forces\n");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ int num_elements;
+
+ if( *iphase == 1 )
+ num_elements = *nspec_outer_elastic;
+ else
+ num_elements = *nspec_inner_elastic;
+
+ // checks if anything to do
+ if( num_elements == 0 ) return;
+
+ //int myrank;
+ /* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
+ /* if(myrank==0) { */
+
+ // mesh coloring
+ if( mp->use_mesh_coloring_gpu ){
+
+ // note: array offsets require sorted arrays, such that e.g. ibool starts with elastic elements
+ // and followed by acoustic ones.
+ // elastic elements also start with outer than inner element ordering
+
+ int nb_colors,nb_blocks_to_compute;
+ int istart;
+ int color_offset,color_offset_nonpadded,color_offset_nonpadded_att2;
+
+ // sets up color loop
+ if( *iphase == 1 ){
+ // outer elements
+ nb_colors = mp->num_colors_outer_elastic;
+ istart = 0;
+
+ // array offsets
+ color_offset = 0;
+ color_offset_nonpadded = 0;
+ color_offset_nonpadded_att2 = 0;
+ }else{
+ // inner elements (start after outer elements)
+ nb_colors = mp->num_colors_outer_elastic + mp->num_colors_inner_elastic;
+ istart = mp->num_colors_outer_elastic;
+
+ // array offsets
+ color_offset = (*nspec_outer_elastic) * NGLL3_PADDED;
+ color_offset_nonpadded = (*nspec_outer_elastic) * NGLL3_NONPADDED;
+ color_offset_nonpadded_att2 = (*nspec_outer_elastic) * NGLL3_NONPADDED * N_SLS;
+ }
+
+ // loops over colors
+ for(int icolor = istart; icolor < nb_colors; icolor++){
+
+ nb_blocks_to_compute = mp->h_num_elem_colors_elastic[icolor];
+
+ // checks
+ //if( nb_blocks_to_compute <= 0 ){
+ // printf("error number of elastic color blocks: %d -- color = %d \n",nb_blocks_to_compute,icolor);
+ // exit(EXIT_FAILURE);
+ //}
+
+ Kernel_2(nb_blocks_to_compute,mp,*iphase,
+ *COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE,*ATTENUATION,
+ mp->d_ibool + color_offset_nonpadded,
+ mp->d_xix + color_offset,
+ mp->d_xiy + color_offset,
+ mp->d_xiz + color_offset,
+ mp->d_etax + color_offset,
+ mp->d_etay + color_offset,
+ mp->d_etaz + color_offset,
+ mp->d_gammax + color_offset,
+ mp->d_gammay + color_offset,
+ mp->d_gammaz + color_offset,
+ mp->d_kappav + color_offset,
+ mp->d_muv + color_offset,
+ mp->d_epsilondev_xx + color_offset_nonpadded,
+ mp->d_epsilondev_yy + color_offset_nonpadded,
+ mp->d_epsilondev_xy + color_offset_nonpadded,
+ mp->d_epsilondev_xz + color_offset_nonpadded,
+ mp->d_epsilondev_yz + color_offset_nonpadded,
+ mp->d_epsilon_trace_over_3 + color_offset_nonpadded,
+ mp->d_one_minus_sum_beta + color_offset_nonpadded,
+ mp->d_factor_common + color_offset_nonpadded_att2,
+ mp->d_R_xx + color_offset_nonpadded,
+ mp->d_R_yy + color_offset_nonpadded,
+ mp->d_R_xy + color_offset_nonpadded,
+ mp->d_R_xz + color_offset_nonpadded,
+ mp->d_R_yz + color_offset_nonpadded,
+ mp->d_b_epsilondev_xx + color_offset_nonpadded,
+ mp->d_b_epsilondev_yy + color_offset_nonpadded,
+ mp->d_b_epsilondev_xy + color_offset_nonpadded,
+ mp->d_b_epsilondev_xz + color_offset_nonpadded,
+ mp->d_b_epsilondev_yz + color_offset_nonpadded,
+ mp->d_b_epsilon_trace_over_3 + color_offset_nonpadded,
+ mp->d_b_R_xx + color_offset_nonpadded,
+ mp->d_b_R_yy + color_offset_nonpadded,
+ mp->d_b_R_xy + color_offset_nonpadded,
+ mp->d_b_R_xz + color_offset_nonpadded,
+ mp->d_b_R_yz + color_offset_nonpadded);
+
+ // for padded and aligned arrays
+ color_offset += nb_blocks_to_compute * NGLL3_PADDED;
+ // for no-aligned arrays
+ color_offset_nonpadded += nb_blocks_to_compute * NGLL3_NONPADDED;
+ // for factor_common array
+ color_offset_nonpadded_att2 += nb_blocks_to_compute * NGLL3_NONPADDED * N_SLS;
+ }
+
+ }else{
+
+ // no mesh coloring: uses atomic updates
+
+ Kernel_2(num_elements,mp,*iphase,
+ *COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE,*ATTENUATION,
+ mp->d_ibool,
+ mp->d_xix,
+ mp->d_xiy,
+ mp->d_xiz,
+ mp->d_etax,
+ mp->d_etay,
+ mp->d_etaz,
+ mp->d_gammax,
+ mp->d_gammay,
+ mp->d_gammaz,
+ mp->d_kappav,
+ mp->d_muv,
+ mp->d_epsilondev_xx,
+ mp->d_epsilondev_yy,
+ mp->d_epsilondev_xy,
+ mp->d_epsilondev_xz,
+ mp->d_epsilondev_yz,
+ mp->d_epsilon_trace_over_3,
+ mp->d_one_minus_sum_beta,
+ mp->d_factor_common,
+ mp->d_R_xx,
+ mp->d_R_yy,
+ mp->d_R_xy,
+ mp->d_R_xz,
+ mp->d_R_yz,
+ mp->d_b_epsilondev_xx,
+ mp->d_b_epsilondev_yy,
+ mp->d_b_epsilondev_xy,
+ mp->d_b_epsilondev_xz,
+ mp->d_b_epsilondev_yz,
+ mp->d_b_epsilon_trace_over_3,
+ mp->d_b_R_xx,
+ mp->d_b_R_yy,
+ mp->d_b_R_xy,
+ mp->d_b_R_xz,
+ mp->d_b_R_yz);
+ }
+
+
+
+ //cudaThreadSynchronize();
+
+ //#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ /* MPI_Barrier(MPI_COMM_WORLD); */
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ //#endif
+}
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
// KERNEL 3
/* ----------------------------------------------------------------------------------------------- */
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2011-11-03 03:11:45 UTC (rev 19145)
@@ -42,13 +42,11 @@
*/
-
#ifndef GPU_MESH_
#define GPU_MESH_
#include <sys/types.h>
#include <unistd.h>
-
/* ----------------------------------------------------------------------------------------------- */
// for debugging and benchmarking
@@ -73,27 +71,22 @@
#define PRINT5(var,offset) // for(i=0;i<10;i++) printf("var(%d)=%f\n",i,var[offset+i]);
#endif
-
-//#undef ENABLE_VERY_SLOW_ERROR_CHECKING
+// error checking after cuda function calls
#define ENABLE_VERY_SLOW_ERROR_CHECKING
+
/* ----------------------------------------------------------------------------------------------- */
// indexing
#define INDEX2(xsize,x,y) x + (y)*xsize
-#define INDEX3(xsize,ysize,x,y,z) x + (y)*xsize + (z)*xsize*ysize
-#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize
-#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize + (j)*xsize*ysize*zsize*isize
+#define INDEX3(xsize,ysize,x,y,z) x + xsize*(y + ysize*z)
+#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + xsize*(y + ysize*(z + zsize*i))
+#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + xsize*(y + ysize*(z + zsize*(i + isize*j)))
#define INDEX6(xsize,ysize,zsize,isize,jsize,x,y,z,i,j,k) x + xsize*(y + ysize*(z + zsize*(i + isize*(j + jsize*k))))
-#define INDEX4_PADDED(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*128
+#define INDEX4_PADDED(xsize,ysize,zsize,x,y,z,i) x + xsize*(y + ysize*z) + (i)*128
-//daniel: TODO -- check speed of these alternative definitions
-//#define INDEX2(xsize,x,y) x + (y)*xsize
-//#define INDEX3(xsize,ysize,x,y,z) x + xsize*(y + ysize*z)
-//#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + xsize*(y + ysize*(z + zsize*i))
-//#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + xsize*(y + ysize*(z + zsize*(i + isize*j)))
/* ----------------------------------------------------------------------------------------------- */
@@ -120,6 +113,9 @@
#define NGLL2 25
#define N_SLS 3
+#define NGLL3_NONPADDED 125
+#define NGLL3_PADDED 128
+
//typedef float real; // type of variables passed into function
typedef float realw; // type of "working" variables
@@ -127,6 +123,10 @@
// decrease in Kernel_2_impl (not very much..)
typedef float reald;
+// (optional) pre-processing directive used in kernels: if defined check that it is also set in src/shared/constants.h:
+// leads up to ~ 5% performance increase
+//#define USE_MESH_COLORING_GPU
+
/* ----------------------------------------------------------------------------------------------- */
// mesh pointer wrapper structure
@@ -153,6 +153,9 @@
// inner / outer elements
int* d_ispec_is_inner;
+ // mesh coloring
+ int use_mesh_coloring_gpu;
+
// pointers to constant memory arrays
float* d_hprime_xx; float* d_hprime_yy; float* d_hprime_zz;
float* d_hprimewgll_xx; float* d_hprimewgll_yy; float* d_hprimewgll_zz;
@@ -174,6 +177,11 @@
int* d_phase_ispec_inner_elastic;
int num_phase_ispec_elastic;
+ // mesh coloring
+ int* h_num_elem_colors_elastic;
+ int num_colors_outer_elastic,num_colors_inner_elastic;
+ int nspec_elastic;
+
float* d_rmass;
float* d_send_accel_buffer;
@@ -301,6 +309,11 @@
int* d_phase_ispec_inner_acoustic;
int num_phase_ispec_acoustic;
+ // mesh coloring
+ int* h_num_elem_colors_acoustic;
+ int num_colors_outer_acoustic,num_colors_inner_acoustic;
+ int nspec_acoustic;
+
float* d_rhostore;
float* d_kappastore;
float* d_rmass_acoustic;
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2011-11-03 03:11:45 UTC (rev 19145)
@@ -452,6 +452,8 @@
int* nrec_f,
int* nrec_local_f,
int* SIMULATION_TYPE,
+ int* USE_MESH_COLORING_GPU_f,
+ int* nspec_acoustic,int* nspec_elastic,
int* ncuda_devices) {
TRACE("prepare_constants_device");
@@ -648,6 +650,20 @@
print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_rec,h_ispec_selected_rec,
nrec*sizeof(int),cudaMemcpyHostToDevice),1514);
+#ifdef USE_MESH_COLORING_GPU
+ mp->use_mesh_coloring_gpu = 1;
+ if( ! *USE_MESH_COLORING_GPU_f ) exit_on_error("error with USE_MESH_COLORING_GPU constant; please re-compile\n");
+#else
+ // mesh coloring
+ // note: this here passes the coloring as an option to the kernel routines
+ // the performance seems to be the same if one uses the pre-processing directives above or not
+ mp->use_mesh_coloring_gpu = *USE_MESH_COLORING_GPU_f;
+#endif
+
+ // number of elements per domain
+ mp->nspec_acoustic = *nspec_acoustic;
+ mp->nspec_elastic = *nspec_elastic;
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("prepare_constants_device");
#endif
@@ -746,8 +762,10 @@
int* coupling_ac_el_ispec,
int* coupling_ac_el_ijk,
float* coupling_ac_el_normal,
- float* coupling_ac_el_jacobian2Dw
- ) {
+ float* coupling_ac_el_jacobian2Dw,
+ int* num_colors_outer_acoustic,
+ int* num_colors_inner_acoustic,
+ int* num_elem_colors_acoustic) {
TRACE("prepare_fields_acoustic_device");
@@ -850,6 +868,13 @@
}
+ // mesh coloring
+ if( mp->use_mesh_coloring_gpu ){
+ mp->num_colors_outer_acoustic = *num_colors_outer_acoustic;
+ mp->num_colors_inner_acoustic = *num_colors_inner_acoustic;
+ mp->h_num_elem_colors_acoustic = (int*) num_elem_colors_acoustic;
+ }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("prepare_fields_acoustic_device");
#endif
@@ -937,7 +962,10 @@
int* free_surface_ispec,
int* free_surface_ijk,
int* num_free_surface_faces,
- int* ACOUSTIC_SIMULATION){
+ int* ACOUSTIC_SIMULATION,
+ int* num_colors_outer_elastic,
+ int* num_colors_inner_elastic,
+ int* num_elem_colors_elastic){
TRACE("prepare_fields_elastic_device");
@@ -1125,6 +1153,13 @@
}
}
+ // mesh coloring
+ if( mp->use_mesh_coloring_gpu ){
+ mp->num_colors_outer_elastic = *num_colors_outer_elastic;
+ mp->num_colors_inner_elastic = *num_colors_inner_elastic;
+ mp->h_num_elem_colors_elastic = (int*) num_elem_colors_elastic;
+ }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("prepare_fields_elastic_device");
#endif
@@ -1467,6 +1502,7 @@
cudaFree(mp->d_station_seismo_potential);
free(mp->h_station_seismo_potential);
}
+
} // ACOUSTIC_SIMULATION
// ELASTIC arrays
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2011-11-03 03:11:45 UTC (rev 19145)
@@ -356,6 +356,8 @@
int* nrec_f,
int* nrec_local_f,
int* SIMULATION_TYPE,
+ int* USE_MESH_COLORING_GPU,
+ int* nspec_acoustic,int* nspec_elastic,
int* ncuda_devices)
{
fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
@@ -391,8 +393,10 @@
int* coupling_ac_el_ispec,
int* coupling_ac_el_ijk,
float* coupling_ac_el_normal,
- float* coupling_ac_el_jacobian2Dw
- ){}
+ float* coupling_ac_el_jacobian2Dw,
+ int* num_colors_outer_acoustic,
+ int* num_colors_inner_acoustic,
+ int* num_elem_colors_acoustic){}
void FC_FUNC_(prepare_fields_acoustic_adj_dev,
PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
@@ -426,7 +430,10 @@
int* free_surface_ispec,
int* free_surface_ijk,
int* num_free_surface_faces,
- int* ACOUSTIC_SIMULATION){}
+ int* ACOUSTIC_SIMULATION,
+ int* num_colors_outer_elastic,
+ int* num_colors_inner_elastic,
+ int* num_elem_colors_elastic){}
void FC_FUNC_(prepare_fields_elastic_adj_dev,
PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/Makefile.in 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/Makefile.in 2011-11-03 03:11:45 UTC (rev 19145)
@@ -78,14 +78,15 @@
$O/detect_surface.o \
$O/exit_mpi.o \
$O/get_absorbing_boundary.o \
- $O/get_coupling_surfaces.o \
- $O/get_model.o \
- $O/get_MPI.o \
$O/get_attenuation_model.o \
$O/get_cmt.o \
+ $O/get_coupling_surfaces.o \
$O/get_element_face.o \
$O/get_global.o \
$O/get_jacobian_boundaries.o \
+ $O/get_model.o \
+ $O/get_MPI.o \
+ $O/get_perm_color.o \
$O/get_shape2D.o \
$O/get_shape3D.o \
$O/get_value_parameters.o \
@@ -319,6 +320,9 @@
$O/get_MPI.o: ${SHARED}/constants.h get_MPI.f90
${FCCOMPILE_CHECK} -c -o $O/get_MPI.o get_MPI.f90
+$O/get_perm_color.o: ${SHARED}/constants.h get_perm_color.f90
+ ${FCCOMPILE_CHECK} -c -o $O/get_perm_color.o get_perm_color.f90
+
$O/create_name_database.o: ${SHARED}/constants.h ${SHARED}/create_name_database.f90
${FCCOMPILE_CHECK} -c -o $O/create_name_database.o ${SHARED}/create_name_database.f90
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -86,6 +86,14 @@
integer, dimension(:), allocatable :: coupling_ac_el_ispec
integer :: num_coupling_ac_el_faces
+ ! Moho mesh
+ real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_top
+ real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_bot
+ integer,dimension(:,:,:),allocatable :: ijk_moho_top, ijk_moho_bot
+ integer,dimension(:),allocatable :: ibelm_moho_top, ibelm_moho_bot
+ integer :: NSPEC2D_MOHO
+ logical, dimension(:),allocatable :: is_moho_top, is_moho_bot
+
! for stacey
real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rho_vp,rho_vs
@@ -103,6 +111,26 @@
! name of the database file
character(len=256) prname
+! inner/outer elements
+ logical,dimension(:),allocatable :: ispec_is_inner
+ integer :: nspec_inner_acoustic,nspec_outer_acoustic
+ integer :: nspec_inner_elastic,nspec_outer_elastic
+
+ integer :: num_phase_ispec_acoustic
+ integer,dimension(:,:),allocatable :: phase_ispec_inner_acoustic
+
+ integer :: num_phase_ispec_elastic
+ integer,dimension(:,:),allocatable :: phase_ispec_inner_elastic
+
+ logical :: ACOUSTIC_SIMULATION,ELASTIC_SIMULATION
+
+ ! mesh coloring
+ integer :: num_colors_outer_acoustic,num_colors_inner_acoustic
+ integer, dimension(:), allocatable :: num_elem_colors_acoustic
+
+ integer :: num_colors_outer_elastic,num_colors_inner_elastic
+ integer, dimension(:), allocatable :: num_elem_colors_elastic
+
end module create_regions_mesh_ext_par
!
@@ -112,7 +140,8 @@
! main routine
subroutine create_regions_mesh_ext(ibool, &
- xstore,ystore,zstore,nspec,npointot,myrank,LOCAL_PATH, &
+ xstore,ystore,zstore,nspec, &
+ npointot,myrank,LOCAL_PATH, &
nnodes_ext_mesh,nelmnts_ext_mesh, &
nodes_coords_ext_mesh, elmnts_ext_mesh, &
max_static_memory_size, mat_ext_mesh, materials_ext_mesh, &
@@ -126,15 +155,16 @@
ibelm_xmin, ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top, &
nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax,&
nodes_ibelm_bottom,nodes_ibelm_top, &
- SAVE_MESH_FILES,nglob, &
+ SAVE_MESH_FILES, &
+ nglob, &
ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
ATTENUATION,USE_OLSEN_ATTENUATION, &
UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
- itopo_bathy)
+ itopo_bathy, &
+ nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho)
! create the different regions of the mesh
-
use create_regions_mesh_ext_par
implicit none
!include "constants.h"
@@ -209,6 +239,11 @@
double precision :: ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO
integer, dimension(NX_TOPO,NY_TOPO) :: itopo_bathy
+! moho (optional)
+ integer :: nspec2D_moho_ext
+ integer, dimension(nspec2D_moho_ext) :: ibelm_moho
+ integer, dimension(4,nspec2D_moho_ext) :: nodes_ibelm_moho
+
! local parameters
! static memory size needed by the solver
double precision :: static_memory_size
@@ -289,16 +324,12 @@
if( myrank == 0) then
write(IMAIN,*) ' ...determining velocity model'
endif
- ! Default model. Using PREM model instead
- ! call get_model(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
- ! materials_ext_mesh,nmat_ext_mesh, &
- ! undef_mat_prop,nundefMat_ext_mesh, &
- ! ANISOTROPY,LOCAL_PATH)
+ ! calls wrapper function
+ call get_model(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
+ materials_ext_mesh,nmat_ext_mesh, &
+ undef_mat_prop,nundefMat_ext_mesh, &
+ ANISOTROPY,LOCAL_PATH)
- call get_model_PREM(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
- materials_ext_mesh,nmat_ext_mesh, &
- undef_mat_prop,nundefMat_ext_mesh, &
- ANISOTROPY,LOCAL_PATH)
! sets up absorbing/free surface boundaries
call sync_all()
@@ -324,6 +355,18 @@
num_interfaces_ext_mesh,max_interface_size_ext_mesh, &
my_neighbours_ext_mesh)
+! sets up up Moho surface
+ NSPEC2D_MOHO = 0
+ if( SAVE_MOHO_MESH ) then
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...setting up Moho surface'
+ endif
+ call crm_setup_moho(myrank,nglob,nspec, &
+ nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
+ nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
+ endif
+
! creates mass matrix
call sync_all()
if( myrank == 0) then
@@ -341,6 +384,24 @@
ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
itopo_bathy)
+! locates inner and outer elements
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...element inner/outer separation '
+ endif
+ call crm_setup_inner_outer_elemnts(myrank,nspec,nglob, &
+ num_interfaces_ext_mesh,max_interface_size_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ ibool,SAVE_MESH_FILES)
+
+! colors mesh if requested
+ call sync_all()
+ if( myrank == 0) then
+ write(IMAIN,*) ' ...element mesh coloring '
+ endif
+ call crm_setup_color_perm(myrank,nspec,nglob,ibool,ANISOTROPY,SAVE_MESH_FILES)
+
+
! saves the binary mesh files
call sync_all()
if( myrank == 0) then
@@ -370,9 +431,23 @@
c22store,c23store,c24store,c25store,c26store,c33store, &
c34store,c35store,c36store,c44store,c45store,c46store, &
c55store,c56store,c66store, &
- ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
+ ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic, &
+ ispec_is_inner,nspec_inner_acoustic,nspec_inner_elastic, &
+ nspec_outer_acoustic,nspec_outer_elastic, &
+ num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
+ num_phase_ispec_elastic,phase_ispec_inner_elastic, &
+ num_colors_outer_acoustic,num_colors_inner_acoustic, &
+ num_elem_colors_acoustic, &
+ num_colors_outer_elastic,num_colors_inner_elastic, &
+ num_elem_colors_elastic)
+! saves moho surface
+ if( SAVE_MOHO_MESH ) then
+ call crm_save_moho()
+ endif
+
! computes the approximate amount of static memory needed to run the solver
+ call sync_all()
call memory_eval(nspec,nglob,maxval(nibool_interfaces_ext_mesh),num_interfaces_ext_mesh, &
OCEANS,static_memory_size)
call max_all_dp(static_memory_size, max_static_memory_size)
@@ -782,7 +857,7 @@
!
- subroutine create_regions_mesh_save_moho( myrank,nglob,nspec, &
+ subroutine crm_setup_moho( myrank,nglob,nspec, &
nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
@@ -802,14 +877,6 @@
integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
! local parameters
- ! Moho mesh
- real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_top
- real(CUSTOM_REAL), dimension(:,:,:),allocatable :: normal_moho_bot
- integer,dimension(:,:,:),allocatable :: ijk_moho_top, ijk_moho_bot
- integer,dimension(:),allocatable :: ibelm_moho_top, ibelm_moho_bot
- integer :: NSPEC2D_MOHO
- logical, dimension(:),allocatable :: is_moho_top, is_moho_bot
-
real(kind=CUSTOM_REAL),dimension(NGNOD2D) :: xcoord,ycoord,zcoord
real(kind=CUSTOM_REAL) :: jacobian2Dw_face(NGLLX,NGLLY)
real(kind=CUSTOM_REAL) :: normal_face(NDIM,NGLLX,NGLLY)
@@ -1074,21 +1141,863 @@
write(IMAIN,*) '********'
endif
+ deallocate(iglob_is_surface)
+ deallocate(iglob_normals)
+
+ end subroutine crm_setup_moho
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine crm_save_moho()
+
+ use create_regions_mesh_ext_par
+ implicit none
+ ! local parameters
+ integer :: ier
+
! saves moho files: total number of elements, corner points, all points
- open(unit=27,file=prname(1:len_trim(prname))//'ibelm_moho.bin',status='unknown',form='unformatted')
+ open(unit=27,file=prname(1:len_trim(prname))//'ibelm_moho.bin', &
+ status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening ibelm_moho.bin file'
write(27) NSPEC2D_MOHO
write(27) ibelm_moho_top
write(27) ibelm_moho_bot
write(27) ijk_moho_top
write(27) ijk_moho_bot
close(27)
- open(unit=27,file=prname(1:len_trim(prname))//'normal_moho.bin',status='unknown',form='unformatted')
+ open(unit=27,file=prname(1:len_trim(prname))//'normal_moho.bin', &
+ status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening normal_moho.bin file'
write(27) normal_moho_top
write(27) normal_moho_bot
close(27)
- open(unit=27,file=prname(1:len_trim(prname))//'is_moho.bin',status='unknown',form='unformatted')
+ open(unit=27,file=prname(1:len_trim(prname))//'is_moho.bin', &
+ status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening is_moho.bin file'
write(27) is_moho_top
write(27) is_moho_bot
close(27)
- end subroutine create_regions_mesh_save_moho
+ end subroutine crm_save_moho
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine crm_setup_inner_outer_elemnts(myrank,nspec,nglob, &
+ num_interfaces_ext_mesh,max_interface_size_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ ibool,SAVE_MESH_FILES)
+
+! locates inner and outer elements
+
+ use create_regions_mesh_ext_par
+ implicit none
+
+ integer :: myrank,nspec,nglob
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+ ! MPI interfaces
+ integer :: num_interfaces_ext_mesh,max_interface_size_ext_mesh
+ integer, dimension(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: &
+ ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+
+ logical :: SAVE_MESH_FILES
+
+ ! local parameters
+ integer :: i,j,k,ispec,iglob
+ integer :: iinterface,ier
+ integer :: ispec_inner,ispec_outer
+ real :: percentage_edge
+ character(len=256) :: filename
+ logical,dimension(:),allocatable :: iglob_is_inner
+
+ ! allocates arrays
+ allocate(ispec_is_inner(nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ispec_is_inner'
+
+ ! temporary array
+ allocate(iglob_is_inner(nglob),stat=ier)
+ if( ier /= 0 ) stop 'error allocating temporary array iglob_is_inner'
+
+ ! initialize flags
+ ispec_is_inner(:) = .true.
+ iglob_is_inner(:) = .true.
+ do iinterface = 1, num_interfaces_ext_mesh
+ do i = 1, nibool_interfaces_ext_mesh(iinterface)
+ iglob = ibool_interfaces_ext_mesh(i,iinterface)
+ iglob_is_inner(iglob) = .false.
+ enddo
+ enddo
+
+ ! determines flags for inner elements (purely inside the partition)
+ do ispec = 1, nspec
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool(i,j,k,ispec)
+ ispec_is_inner(ispec) = ( iglob_is_inner(iglob) .and. ispec_is_inner(ispec) )
+ enddo
+ enddo
+ enddo
+ enddo
+
+ ! frees temporary array
+ deallocate( iglob_is_inner )
+
+ if( SAVE_MESH_FILES ) then
+ filename = prname(1:len_trim(prname))//'ispec_is_inner'
+ call write_VTK_data_elem_l(nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+ ispec_is_inner,filename)
+ endif
+
+
+ ! sets up elements for loops in acoustic simulations
+ nspec_inner_acoustic = 0
+ nspec_outer_acoustic = 0
+ call any_all_l( ANY(ispec_is_acoustic), ACOUSTIC_SIMULATION )
+ if( ACOUSTIC_SIMULATION ) then
+ ! counts inner and outer elements
+ do ispec = 1, nspec
+ if( ispec_is_acoustic(ispec) ) then
+ if( ispec_is_inner(ispec) .eqv. .true. ) then
+ nspec_inner_acoustic = nspec_inner_acoustic + 1
+ else
+ nspec_outer_acoustic = nspec_outer_acoustic + 1
+ endif
+ endif
+ enddo
+
+ ! stores indices of inner and outer elements for faster(?) computation
+ num_phase_ispec_acoustic = max(nspec_inner_acoustic,nspec_outer_acoustic)
+ if( num_phase_ispec_acoustic < 0 ) stop 'error acoustic simulation: num_phase_ispec_acoustic is < zero'
+
+ allocate( phase_ispec_inner_acoustic(num_phase_ispec_acoustic,2),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array phase_ispec_inner_acoustic'
+ phase_ispec_inner_acoustic(:,:) = 0
+
+ ispec_inner = 0
+ ispec_outer = 0
+ do ispec = 1, nspec
+ if( ispec_is_acoustic(ispec) ) then
+ if( ispec_is_inner(ispec) .eqv. .true. ) then
+ ispec_inner = ispec_inner + 1
+ phase_ispec_inner_acoustic(ispec_inner,2) = ispec
+ else
+ ispec_outer = ispec_outer + 1
+ phase_ispec_inner_acoustic(ispec_outer,1) = ispec
+ endif
+ endif
+ enddo
+ else
+ ! allocates dummy array
+ num_phase_ispec_acoustic = 0
+ allocate( phase_ispec_inner_acoustic(num_phase_ispec_acoustic,2),stat=ier)
+ if( ier /= 0 ) stop 'error allocating dummy array phase_ispec_inner_acoustic'
+ phase_ispec_inner_acoustic(:,:) = 0
+ endif
+
+ ! sets up elements for loops in acoustic simulations
+ nspec_inner_elastic = 0
+ nspec_outer_elastic = 0
+ call any_all_l( ANY(ispec_is_elastic), ELASTIC_SIMULATION )
+ if( ELASTIC_SIMULATION ) then
+ ! counts inner and outer elements
+ do ispec = 1, nspec
+ if( ispec_is_elastic(ispec) ) then
+ if( ispec_is_inner(ispec) .eqv. .true. ) then
+ nspec_inner_elastic = nspec_inner_elastic + 1
+ else
+ nspec_outer_elastic = nspec_outer_elastic + 1
+ endif
+ endif
+ enddo
+
+ ! stores indices of inner and outer elements for faster(?) computation
+ num_phase_ispec_elastic = max(nspec_inner_elastic,nspec_outer_elastic)
+ if( num_phase_ispec_elastic < 0 ) stop 'error elastic simulation: num_phase_ispec_elastic is < zero'
+
+ allocate( phase_ispec_inner_elastic(num_phase_ispec_elastic,2),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array phase_ispec_inner_elastic'
+ phase_ispec_inner_elastic(:,:) = 0
+
+ ispec_inner = 0
+ ispec_outer = 0
+ do ispec = 1, nspec
+ if( ispec_is_elastic(ispec) ) then
+ if( ispec_is_inner(ispec) .eqv. .true. ) then
+ ispec_inner = ispec_inner + 1
+ phase_ispec_inner_elastic(ispec_inner,2) = ispec
+ else
+ ispec_outer = ispec_outer + 1
+ phase_ispec_inner_elastic(ispec_outer,1) = ispec
+ endif
+ endif
+ enddo
+ else
+ ! allocates dummy array
+ num_phase_ispec_elastic = 0
+ allocate( phase_ispec_inner_elastic(num_phase_ispec_elastic,2),stat=ier)
+ if( ier /= 0 ) stop 'error allocating dummy array phase_ispec_inner_elastic'
+ phase_ispec_inner_elastic(:,:) = 0
+ endif
+
+ ! user output
+ if(myrank == 0) then
+ percentage_edge = 100.*count(ispec_is_inner(:))/real(nspec)
+ write(IMAIN,*) ' for overlapping of communications with calculations:'
+ write(IMAIN,*) ' percentage of edge elements ',100. -percentage_edge,'%'
+ write(IMAIN,*) ' percentage of volume elements ',percentage_edge,'%'
+ endif
+
+ end subroutine crm_setup_inner_outer_elemnts
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine crm_setup_color_perm(myrank,nspec,nglob,ibool,ANISOTROPY,SAVE_MESH_FILES)
+
+! sets up mesh coloring and permutes elements
+
+ use create_regions_mesh_ext_par
+ implicit none
+
+ integer :: myrank,nspec,nglob
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+ logical :: ANISOTROPY,SAVE_MESH_FILES
+
+ ! local parameters
+ integer, dimension(:), allocatable :: perm
+ integer :: ier
+
+ ! user output
+ if(myrank == 0) then
+ write(IMAIN,*) ' use coloring = ',USE_MESH_COLORING_GPU
+ endif
+
+ ! initializes
+ num_colors_outer_acoustic = 0
+ num_colors_inner_acoustic = 0
+ num_colors_outer_elastic = 0
+ num_colors_inner_elastic = 0
+
+ ! mesh coloring
+ if( USE_MESH_COLORING_GPU ) then
+
+ ! creates coloring of elements
+ allocate(perm(nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating temporary perm array'
+ perm(:) = 0
+
+ ! acoustic domains
+ if( ACOUSTIC_SIMULATION ) then
+ if( myrank == 0) then
+ write(IMAIN,*) ' acoustic domains: '
+ endif
+ call crm_setup_color(myrank,nspec,nglob,ibool,perm, &
+ ispec_is_acoustic,1, &
+ num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
+ SAVE_MESH_FILES)
+ else
+ ! allocates dummy arrays
+ allocate(num_elem_colors_acoustic(num_colors_outer_acoustic + num_colors_inner_acoustic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_acoustic array'
+ endif
+
+ ! elastic domains
+ if( ELASTIC_SIMULATION ) then
+ if( myrank == 0) then
+ write(IMAIN,*) ' elastic domains: '
+ endif
+ call crm_setup_color(myrank,nspec,nglob,ibool,perm, &
+ ispec_is_elastic,2, &
+ num_phase_ispec_elastic,phase_ispec_inner_elastic, &
+ SAVE_MESH_FILES)
+ else
+ allocate(num_elem_colors_elastic(num_colors_outer_elastic + num_colors_inner_elastic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_elastic array'
+ endif
+
+ ! checks: after all domains are done
+ if(minval(perm) /= 1) &
+ call exit_MPI(myrank, 'minval(perm) should be 1')
+ if(maxval(perm) /= max(num_phase_ispec_acoustic,num_phase_ispec_elastic)) &
+ call exit_MPI(myrank, 'maxval(perm) should be max(num_phase_ispec_..)')
+
+ ! sorts array according to permutation
+ call sync_all()
+ if(myrank == 0) then
+ write(IMAIN,*) ' mesh permutation:'
+ endif
+ call crm_setup_permutation(myrank,nspec,nglob,ibool,ANISOTROPY,perm, &
+ SAVE_MESH_FILES)
+
+ deallocate(perm)
+
+ else
+
+ ! allocates dummy arrays
+ allocate(num_elem_colors_acoustic(num_colors_outer_acoustic + num_colors_inner_acoustic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_acoustic array'
+ allocate(num_elem_colors_elastic(num_colors_outer_elastic + num_colors_inner_elastic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_elastic array'
+
+ endif ! USE_MESH_COLORING_GPU
+
+ end subroutine crm_setup_color_perm
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine crm_setup_color(myrank,nspec,nglob,ibool,perm, &
+ ispec_is_d,idomain, &
+ num_phase_ispec_d,phase_ispec_inner_d, &
+ SAVE_MESH_FILES)
+
+! sets up mesh coloring
+
+ use create_regions_mesh_ext_par
+ implicit none
+
+ integer :: myrank,nspec,nglob
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+ integer, dimension(nspec) :: perm
+
+ ! wrapper array for ispec is in domain:
+ ! idomain acoustic == 1 or elastic == 2
+ integer :: idomain
+ logical, dimension(nspec) :: ispec_is_d
+ integer :: num_phase_ispec_d
+ integer, dimension(num_phase_ispec_d,2) :: phase_ispec_inner_d
+
+ logical :: SAVE_MESH_FILES
+
+ ! local parameters
+ ! added for color permutation
+ integer :: nb_colors_outer_elements,nb_colors_inner_elements
+ integer, dimension(:), allocatable :: num_of_elems_in_this_color
+ integer, dimension(:), allocatable :: color
+ integer, dimension(:), allocatable :: first_elem_number_in_this_color
+ logical, dimension(:), allocatable :: is_on_a_slice_edge
+
+ integer :: nspec_outer,nspec_inner,nspec_domain
+ integer :: nspec_outer_min_global,nspec_outer_max_global
+ integer :: nb_colors,nb_colors_min,nb_colors_max
+
+ integer :: icolor,ispec,ispec_counter
+ integer :: ispec_inner,ispec_outer
+ integer :: ier
+
+ character(len=2),dimension(2) :: str_domain = (/ "ac", "el" /)
+ character(len=256) :: filename
+
+ !!!! David Michea: detection of the edges, coloring and permutation separately
+
+ ! implement mesh coloring for GPUs if needed, to create subsets of disconnected elements
+ ! to remove dependencies and the need for atomic operations in the sum of
+ ! elemental contributions in the solver
+
+ ! allocates temporary array with colors
+ allocate(color(nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating temporary color array'
+ allocate(first_elem_number_in_this_color(MAX_NUMBER_OF_COLORS + 1),stat=ier)
+ if( ier /= 0 ) stop 'error allocating first_elem_number_in_this_color array'
+
+ ! flags for elements on outer rims
+ ! opposite to what is stored in ispec_is_inner
+ allocate(is_on_a_slice_edge(nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating is_on_a_slice_edge array'
+ do ispec = 1,nspec
+ is_on_a_slice_edge(ispec) = .not. ispec_is_inner(ispec)
+ enddo
+
+ ! fast element coloring scheme
+ call get_perm_color_faster(is_on_a_slice_edge,ispec_is_d, &
+ ibool,perm,color, &
+ nspec,nglob, &
+ nb_colors_outer_elements,nb_colors_inner_elements, &
+ nspec_outer,nspec_inner,nspec_domain, &
+ first_elem_number_in_this_color, &
+ myrank)
+
+ ! for the last color, the next color is fictitious and its first (fictitious) element number is nspec + 1
+ first_elem_number_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements + 1) &
+ = nspec_domain + 1
+
+ allocate(num_of_elems_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_of_elems_in_this_color array'
+
+ num_of_elems_in_this_color(:) = 0
+ do icolor = 1, nb_colors_outer_elements + nb_colors_inner_elements
+ num_of_elems_in_this_color(icolor) = first_elem_number_in_this_color(icolor+1) - first_elem_number_in_this_color(icolor)
+ enddo
+
+ ! check that the sum of all the numbers of elements found in each color is equal
+ ! to the total number of elements in the mesh
+ if(sum(num_of_elems_in_this_color) /= nspec_domain) then
+ print *,'error number of elements in this color:',idomain
+ print *,'rank: ',myrank,' nspec = ',nspec_domain
+ print *,' total number of elements in all the colors of the mesh = ', &
+ sum(num_of_elems_in_this_color)
+ call exit_MPI(myrank, 'incorrect total number of elements in all the colors of the mesh')
+ endif
+
+ ! check that the sum of all the numbers of elements found in each color for the outer elements is equal
+ ! to the total number of outer elements found in the mesh
+ if(sum(num_of_elems_in_this_color(1:nb_colors_outer_elements)) /= nspec_outer) then
+ print *,'error number of outer elements in this color:',idomain
+ print *,'rank: ',myrank,' nspec_outer = ',nspec_outer
+ print*,'nb_colors_outer_elements = ',nb_colors_outer_elements
+ print *,'total number of elements in all the colors of the mesh for outer elements = ', &
+ sum(num_of_elems_in_this_color(1:nb_colors_outer_elements))
+ call exit_MPI(myrank, 'incorrect total number of elements in all the colors of the mesh for outer elements')
+ endif
+
+ ! debug: file output
+ if( SAVE_MESH_FILES ) then
+ filename = prname(1:len_trim(prname))//'color_'//str_domain(idomain)
+ call write_VTK_data_elem_i(nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+ color,filename)
+ endif
+
+ deallocate(first_elem_number_in_this_color)
+ deallocate(is_on_a_slice_edge)
+ deallocate(color)
+
+ ! debug: no mesh coloring, only creates dummy coloring arrays
+ if( .false. ) then
+ nb_colors_outer_elements = 0
+ nb_colors_inner_elements = 0
+ ispec_counter = 0
+
+ ! first generate all the outer elements
+ do ispec = 1,nspec
+ if( ispec_is_d(ispec) ) then
+ if( ispec_is_inner(ispec) .eqv. .false. ) then
+ ispec_counter = ispec_counter + 1
+ perm(ispec) = ispec_counter
+ endif
+ endif
+ enddo
+
+ ! store total number of outer elements
+ nspec_outer = ispec_counter
+
+ ! only single color
+ if(nspec_outer > 0 ) nb_colors_outer_elements = 1
+
+ ! then generate all the inner elements
+ do ispec = 1,nspec
+ if( ispec_is_d(ispec) ) then
+ if( ispec_is_inner(ispec) .eqv. .true. ) then
+ ispec_counter = ispec_counter + 1
+ perm(ispec) = ispec_counter - nspec_outer ! starts again at 1
+ endif
+ endif
+ enddo
+ nspec_inner = ispec_counter - nspec_outer
+
+ ! only single color
+ if(nspec_inner > 0 ) nb_colors_inner_elements = 1
+
+ allocate(num_of_elems_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_of_elems_in_this_color array'
+
+ if( nspec_outer > 0 ) num_of_elems_in_this_color(1) = nspec_outer
+ if( nspec_inner > 0 ) num_of_elems_in_this_color(2) = nspec_inner
+ endif ! debug
+
+ ! debug: saves mesh coloring numbers into files
+ if( SAVE_MESH_FILES ) then
+ ! debug file output
+ filename = prname(1:len_trim(prname))//'num_of_elems_in_this_color_'//str_domain(idomain)//'.dat'
+ open(unit=99,file=trim(filename),status='unknown',iostat=ier)
+ if( ier /= 0 ) stop 'error opening num_of_elems_in_this_color file'
+ ! number of colors for outer elements
+ write(99,*) nb_colors_outer_elements
+ ! number of colors for inner elements
+ write(99,*) nb_colors_inner_elements
+ ! number of elements in each color
+ ! outer elements
+ do icolor = 1, nb_colors_outer_elements + nb_colors_inner_elements
+ write(99,*) num_of_elems_in_this_color(icolor)
+ enddo
+ close(99)
+ endif
+
+ ! sets up domain coloring arrays
+ select case(idomain)
+ case( 1 )
+ ! acoustic domains
+ num_colors_outer_acoustic = nb_colors_outer_elements
+ num_colors_inner_acoustic = nb_colors_inner_elements
+
+ allocate(num_elem_colors_acoustic(num_colors_outer_acoustic + num_colors_inner_acoustic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_acoustic array'
+
+ num_elem_colors_acoustic(:) = num_of_elems_in_this_color(:)
+
+ case( 2 )
+ ! elastic domains
+ num_colors_outer_elastic = nb_colors_outer_elements
+ num_colors_inner_elastic = nb_colors_inner_elements
+
+ allocate(num_elem_colors_elastic(num_colors_outer_elastic + num_colors_inner_elastic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_elastic array'
+
+ num_elem_colors_elastic(:) = num_of_elems_in_this_color(:)
+
+ case default
+ stop 'error idomain not recognized'
+ end select
+
+ ! sets up elements for loops in simulations
+ ispec_inner = 0
+ ispec_outer = 0
+ do ispec = 1, nspec
+ ! only elements in this domain
+ if( ispec_is_d(ispec) ) then
+
+ ! sets phase_ispec arrays with ordering of elements
+ if( ispec_is_inner(ispec) .eqv. .false. ) then
+ ! outer elements
+ ispec_outer = perm(ispec)
+
+ ! checks
+ if( ispec_outer < 1 .or. ispec_outer > num_phase_ispec_d ) then
+ print*,'error outer permutation:',idomain
+ print*,'rank:',myrank,' ispec_inner = ',ispec_outer
+ print*,'num_phase_ispec_d = ',num_phase_ispec_d
+ call exit_MPI(myrank,'error outer acoustic permutation')
+ endif
+
+ phase_ispec_inner_d(ispec_outer,1) = ispec
+
+ else
+ ! inner elements
+ ispec_inner = perm(ispec)
+
+ ! checks
+ if( ispec_inner < 1 .or. ispec_inner > num_phase_ispec_d ) then
+ print*,'error inner permutation:',idomain
+ print*,'rank:',myrank,' ispec_inner = ',ispec_inner
+ print*,'num_phase_ispec_d = ',num_phase_ispec_d
+ call exit_MPI(myrank,'error inner acoustic permutation')
+ endif
+
+ phase_ispec_inner_d(ispec_inner,2) = ispec
+
+ endif
+ endif
+ enddo
+
+ ! total number of colors
+ nb_colors = nb_colors_inner_elements + nb_colors_outer_elements
+ call min_all_i(nb_colors,nb_colors_min)
+ call max_all_i(nb_colors,nb_colors_max)
+
+ ! user output
+ call min_all_i(nspec_outer,nspec_outer_min_global)
+ call max_all_i(nspec_outer,nspec_outer_max_global)
+ call min_all_i(nspec_outer,nspec_outer_min_global)
+ call max_all_i(nspec_outer,nspec_outer_max_global)
+ if(myrank == 0) then
+ write(IMAIN,*) ' colors min = ',nb_colors_min
+ write(IMAIN,*) ' colors max = ',nb_colors_max
+ write(IMAIN,*) ' outer elements: min = ',nspec_outer_min_global
+ write(IMAIN,*) ' outer elements: max = ',nspec_outer_max_global
+ endif
+
+ ! debug: outputs permutation array as vtk file
+ !if( SAVE_MESH_FILES ) then
+ ! filename = prname(1:len_trim(prname))//'perm_'//str_domain(idomain)
+ ! call write_VTK_data_elem_i(nspec,nglob, &
+ ! xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+ ! perm,filename)
+ !endif
+
+ deallocate(num_of_elems_in_this_color)
+
+ end subroutine crm_setup_color
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine crm_setup_permutation(myrank,nspec,nglob,ibool,ANISOTROPY,perm, &
+ SAVE_MESH_FILES)
+
+ use create_regions_mesh_ext_par
+ implicit none
+
+ integer :: myrank,nspec,nglob
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+ logical :: ANISOTROPY
+
+ integer, dimension(nspec),intent(inout) :: perm
+
+ logical :: SAVE_MESH_FILES
+
+ ! local parameters
+ ! added for sorting
+ integer, dimension(:,:,:,:), allocatable :: temp_array_int
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: temp_array_real
+ logical, dimension(:), allocatable :: temp_array_logical_1D
+
+ integer, dimension(:), allocatable :: temp_perm_global
+ logical, dimension(:), allocatable :: mask_global
+
+ integer :: icolor,icounter,ispec,ielem,ier,i
+ integer :: iface,old_ispec,new_ispec
+
+ character(len=256) :: filename
+
+ ! sorts array according to permutation
+ allocate(temp_perm_global(nspec),stat=ier)
+ if( ier /= 0 ) stop 'error temp_perm_global array'
+
+ ! global ordering
+ temp_perm_global(:) = 0
+ icounter = 0
+
+ ! fills global permutation array
+ ! starts with elastic elements
+ if( ELASTIC_SIMULATION ) then
+ ! first outer elements coloring
+ ! phase element counter
+ ielem = 0
+ do icolor = 1,num_colors_outer_elastic
+ ! loops through elements
+ do i = 1,num_elem_colors_elastic(icolor)
+ ielem = ielem + 1
+ ispec = phase_ispec_inner_elastic(ielem,1) ! 1 <-- first phase, outer elements
+ ! reorders elements
+ icounter = icounter + 1
+ temp_perm_global(ispec) = icounter
+ ! resets to new order
+ phase_ispec_inner_elastic(ielem,1) = icounter
+ enddo
+ enddo
+ ! inner elements coloring
+ ielem = 0
+ do icolor = num_colors_outer_elastic+1,num_colors_outer_elastic+num_colors_inner_elastic
+ ! loops through elements
+ do i = 1,num_elem_colors_elastic(icolor)
+ ielem = ielem + 1
+ ispec = phase_ispec_inner_elastic(ielem,2) ! 2 <-- second phase, inner elements
+ ! reorders elements
+ icounter = icounter + 1
+ temp_perm_global(ispec) = icounter
+ ! resets to new order
+ phase_ispec_inner_elastic(ielem,2) = icounter
+ enddo
+ enddo
+ endif
+
+ ! continues with acoustic elements
+ if( ACOUSTIC_SIMULATION ) then
+ ! first outer elements coloring
+ ! phase element counter
+ ielem = 0
+ do icolor = 1,num_colors_outer_acoustic
+ ! loops through elements
+ do i = 1,num_elem_colors_acoustic(icolor)
+ ielem = ielem + 1
+ ispec = phase_ispec_inner_acoustic(ielem,1) ! 1 <-- first phase, outer elements
+ ! reorders elements
+ icounter = icounter + 1
+ temp_perm_global(ispec) = icounter
+ ! resets to new order
+ phase_ispec_inner_acoustic(ielem,1) = icounter
+ enddo
+ enddo
+ ! inner elements coloring
+ ielem = 0
+ do icolor = num_colors_outer_acoustic+1,num_colors_outer_acoustic+num_colors_inner_acoustic
+ ! loops through elements
+ do i = 1,num_elem_colors_acoustic(icolor)
+ ielem = ielem + 1
+ ispec = phase_ispec_inner_acoustic(ielem,2) ! 2 <-- second phase, inner elements
+ ! reorders elements
+ icounter = icounter + 1
+ temp_perm_global(ispec) = icounter
+ ! resets to new order
+ phase_ispec_inner_acoustic(ielem,2) = icounter
+ enddo
+ enddo
+ endif
+
+ ! checks
+ if( icounter /= nspec ) then
+ print*,'error temp perm: ',icounter,nspec
+ stop 'error temporary global permutation incomplete'
+ endif
+
+ if(minval(temp_perm_global) /= 1) call exit_MPI(myrank, 'minval(temp_perm_global) should be 1')
+ if(maxval(temp_perm_global) /= nspec) call exit_MPI(myrank, 'maxval(temp_perm_global) should be nspec')
+
+ ! checks if every element was set
+ allocate(mask_global(nspec),stat=ier)
+ if( ier /= 0 ) stop 'error allocating temporary mask_global'
+ mask_global(:) = .false.
+ icounter = 0 ! counts permutations
+ do ispec = 1, nspec
+ new_ispec = temp_perm_global(ispec)
+ ! checks bounds
+ if( new_ispec < 1 .or. new_ispec > nspec ) call exit_MPI(myrank,'error temp_perm_global ispec bounds')
+ ! checks if already set
+ if( mask_global(new_ispec) ) then
+ print*,'error temp_perm_global:',ispec,new_ispec,'element already set'
+ call exit_MPI(myrank,'error global permutation')
+ else
+ mask_global(new_ispec) = .true.
+ endif
+ ! counts permutations
+ if( new_ispec /= ispec ) icounter = icounter + 1
+ enddo
+
+ ! checks number of set elements
+ if( count(mask_global(:)) /= nspec ) then
+ print*,'error temp_perm_global:',count(mask_global(:)),nspec,'permutation incomplete'
+ call exit_MPI(myrank,'error global permutation incomplete')
+ endif
+ deallocate(mask_global)
+
+ ! user output
+ if(myrank == 0) then
+ write(IMAIN,*) ' number of permutations = ',icounter
+ endif
+
+ ! outputs permutation array as vtk file
+ if( SAVE_MESH_FILES ) then
+ filename = prname(1:len_trim(prname))//'perm_global'
+ call write_VTK_data_elem_i(nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+ temp_perm_global,filename)
+ endif
+
+ ! store as new permutation
+ perm(:) = temp_perm_global(:)
+ deallocate(temp_perm_global)
+
+ ! permutes all required mesh arrays according to new ordering
+
+ ! permutation of ibool
+ allocate(temp_array_int(NGLLX,NGLLY,NGLLZ,nspec))
+ call permute_elements_integer(ibool,temp_array_int,perm,nspec)
+ deallocate(temp_array_int)
+
+ ! element domain flags
+ allocate(temp_array_logical_1D(nspec))
+ call permute_elements_logical1D(ispec_is_acoustic,temp_array_logical_1D,perm,nspec)
+ call permute_elements_logical1D(ispec_is_elastic,temp_array_logical_1D,perm,nspec)
+ call permute_elements_logical1D(ispec_is_poroelastic,temp_array_logical_1D,perm,nspec)
+ call permute_elements_logical1D(ispec_is_inner,temp_array_logical_1D,perm,nspec)
+ deallocate(temp_array_logical_1D)
+
+ ! mesh arrays
+ allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec))
+ call permute_elements_real(xixstore,temp_array_real,perm,nspec)
+ call permute_elements_real(xiystore,temp_array_real,perm,nspec)
+ call permute_elements_real(xizstore,temp_array_real,perm,nspec)
+ call permute_elements_real(etaxstore,temp_array_real,perm,nspec)
+ call permute_elements_real(etaystore,temp_array_real,perm,nspec)
+ call permute_elements_real(etazstore,temp_array_real,perm,nspec)
+ call permute_elements_real(gammaxstore,temp_array_real,perm,nspec)
+ call permute_elements_real(gammaystore,temp_array_real,perm,nspec)
+ call permute_elements_real(gammazstore,temp_array_real,perm,nspec)
+ call permute_elements_real(jacobianstore,temp_array_real,perm,nspec)
+
+ call permute_elements_real(kappastore,temp_array_real,perm,nspec)
+ call permute_elements_real(mustore,temp_array_real,perm,nspec)
+
+ ! acoustic arrays
+ if( ACOUSTIC_SIMULATION ) then
+ call permute_elements_real(rhostore,temp_array_real,perm,nspec)
+ endif
+
+ ! elastic arrays
+ if( ELASTIC_SIMULATION ) then
+ call permute_elements_real(rho_vp,temp_array_real,perm,nspec)
+ call permute_elements_real(rho_vs,temp_array_real,perm,nspec)
+ if( ANISOTROPY ) then
+ call permute_elements_real(c11store,temp_array_real,perm,nspec)
+ call permute_elements_real(c12store,temp_array_real,perm,nspec)
+ call permute_elements_real(c13store,temp_array_real,perm,nspec)
+ call permute_elements_real(c14store,temp_array_real,perm,nspec)
+ call permute_elements_real(c15store,temp_array_real,perm,nspec)
+ call permute_elements_real(c16store,temp_array_real,perm,nspec)
+ call permute_elements_real(c22store,temp_array_real,perm,nspec)
+ call permute_elements_real(c23store,temp_array_real,perm,nspec)
+ call permute_elements_real(c24store,temp_array_real,perm,nspec)
+ call permute_elements_real(c25store,temp_array_real,perm,nspec)
+ call permute_elements_real(c33store,temp_array_real,perm,nspec)
+ call permute_elements_real(c34store,temp_array_real,perm,nspec)
+ call permute_elements_real(c35store,temp_array_real,perm,nspec)
+ call permute_elements_real(c36store,temp_array_real,perm,nspec)
+ call permute_elements_real(c44store,temp_array_real,perm,nspec)
+ call permute_elements_real(c45store,temp_array_real,perm,nspec)
+ call permute_elements_real(c46store,temp_array_real,perm,nspec)
+ call permute_elements_real(c55store,temp_array_real,perm,nspec)
+ call permute_elements_real(c56store,temp_array_real,perm,nspec)
+ call permute_elements_real(c66store,temp_array_real,perm,nspec)
+ endif
+ endif
+ deallocate(temp_array_real)
+
+ ! boundary surface
+ if( num_abs_boundary_faces > 0 ) then
+ do iface = 1,num_abs_boundary_faces
+ old_ispec = abs_boundary_ispec(iface)
+ new_ispec = perm(old_ispec)
+ abs_boundary_ispec(iface) = new_ispec
+ enddo
+ endif
+
+ ! free surface
+ if( num_free_surface_faces > 0 ) then
+ do iface = 1,num_free_surface_faces
+ old_ispec = free_surface_ispec(iface)
+ new_ispec = perm(old_ispec)
+ free_surface_ispec(iface) = new_ispec
+ enddo
+ endif
+
+ ! coupling surface
+ if( num_coupling_ac_el_faces > 0 ) then
+ do iface = 1,num_coupling_ac_el_faces
+ old_ispec = coupling_ac_el_ispec(iface)
+ new_ispec = perm(old_ispec)
+ coupling_ac_el_ispec(iface) = new_ispec
+ enddo
+ endif
+
+ ! moho surface
+ if( NSPEC2D_MOHO > 0 ) then
+ allocate(temp_array_logical_1D(nspec))
+ call permute_elements_logical1D(is_moho_top,temp_array_logical_1D,perm,nspec)
+ call permute_elements_logical1D(is_moho_bot,temp_array_logical_1D,perm,nspec)
+ deallocate(temp_array_logical_1D)
+ do iface = 1,NSPEC2D_MOHO
+ ! top
+ old_ispec = ibelm_moho_top(iface)
+ new_ispec = perm(old_ispec)
+ ibelm_moho_top(iface) = new_ispec
+ ! bottom
+ old_ispec = ibelm_moho_bot(iface)
+ new_ispec = perm(old_ispec)
+ ibelm_moho_bot(iface) = new_ispec
+ enddo
+ endif
+
+ end subroutine crm_setup_permutation
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/generate_databases.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -848,6 +848,11 @@
write(IMAIN,*) ' moho surfaces: ',num_moho
endif
call sync_all()
+ else
+ ! allocate dummy array
+ nspec2D_moho_ext = 0
+ allocate(ibelm_moho(nspec2D_moho_ext),nodes_ibelm_moho(4,nspec2D_moho_ext),stat=ier)
+ if( ier /= 0 ) stop 'error allocating dumy array ibelm_moho etc.'
endif
close(IIN)
@@ -898,7 +903,8 @@
write(IMAIN,*) 'create regions: '
endif
call create_regions_mesh_ext(ibool, &
- xstore, ystore, zstore, nspec, npointot, myrank, LOCAL_PATH, &
+ xstore, ystore, zstore, nspec, &
+ npointot, myrank, LOCAL_PATH, &
nnodes_ext_mesh, nelmnts_ext_mesh, &
nodes_coords_ext_mesh, elmnts_ext_mesh, &
max_static_memory_size, mat_ext_mesh, materials_ext_mesh, &
@@ -912,19 +918,22 @@
ibelm_xmin, ibelm_xmax, ibelm_ymin, ibelm_ymax, ibelm_bottom, ibelm_top, &
nodes_ibelm_xmin,nodes_ibelm_xmax,nodes_ibelm_ymin,nodes_ibelm_ymax, &
nodes_ibelm_bottom,nodes_ibelm_top, &
- SAVE_MESH_FILES,nglob, &
+ SAVE_MESH_FILES,&
+ nglob, &
ANISOTROPY,NPROC,OCEANS,TOPOGRAPHY, &
ATTENUATION,USE_OLSEN_ATTENUATION, &
UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION,NX_TOPO,NY_TOPO, &
ORIG_LAT_TOPO,ORIG_LONG_TOPO,DEGREES_PER_CELL_TOPO, &
- itopo_bathy)
+ itopo_bathy, &
+ nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho)
+! now done inside create_regions_mesh_ext routine...
! Moho boundary parameters, 2-D jacobians and normals
- if( SAVE_MOHO_MESH ) then
- call create_regions_mesh_save_moho(myrank,nglob,nspec, &
- nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
- nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
- endif
+! if( SAVE_MOHO_MESH ) then
+! call create_regions_mesh_save_moho(myrank,nglob,nspec, &
+! nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
+! nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
+! endif
! defines global number of nodes in model
NGLOB_AB = nglob
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_model.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -24,6 +24,9 @@
!
!=====================================================================
+
+! wrapper function
+
subroutine get_model(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
materials_ext_mesh,nmat_ext_mesh, &
undef_mat_prop,nundefMat_ext_mesh, &
@@ -34,7 +37,59 @@
! number of spectral elements in each block
integer :: myrank,nspec
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+ ! external mesh
+ integer :: nelmnts_ext_mesh
+ integer :: nmat_ext_mesh,nundefMat_ext_mesh
+ integer, dimension(2,nelmnts_ext_mesh) :: mat_ext_mesh
+ double precision, dimension(6,nmat_ext_mesh) :: materials_ext_mesh
+ character (len=30), dimension(6,nundefMat_ext_mesh):: undef_mat_prop
+ ! anisotropy
+ logical :: ANISOTROPY
+ character(len=256) LOCAL_PATH
+ !----------------------------------------------------------
+ ! USER Parameter
+ ! daniel: TODO -- uses Piero's get_model_PREM routine rather than default one
+ logical,parameter :: USE_PIERO_MODEL = .true.
+
+ !----------------------------------------------------------
+
+ if( USE_PIERO_MODEL ) then
+ ! Using PREM model instead
+ call get_model_PREM(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
+ materials_ext_mesh,nmat_ext_mesh, &
+ undef_mat_prop,nundefMat_ext_mesh, &
+ ANISOTROPY,LOCAL_PATH)
+
+ else
+ ! DEFAULT routine
+ call get_model_d(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
+ materials_ext_mesh,nmat_ext_mesh, &
+ undef_mat_prop,nundefMat_ext_mesh, &
+ ANISOTROPY,LOCAL_PATH)
+ endif
+
+ end subroutine get_model
+
+ !
+ !-----------------------------------------------------------------------------------------------
+ !
+
+! default get model routine
+
+ subroutine get_model_d(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
+ materials_ext_mesh,nmat_ext_mesh, &
+ undef_mat_prop,nundefMat_ext_mesh, &
+ ANISOTROPY,LOCAL_PATH)
+
+
+ use create_regions_mesh_ext_par
+ implicit none
+
+ ! number of spectral elements in each block
+ integer :: myrank,nspec
+
integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
! external mesh
@@ -360,8 +415,11 @@
endif ! USE_EXTERNAL_FILES
- end subroutine get_model
+ end subroutine get_model_d
+!
+!-------------------------------------------------------------------------------------------------
+!
subroutine get_model_PREM(myrank,nspec,ibool,mat_ext_mesh,nelmnts_ext_mesh, &
materials_ext_mesh,nmat_ext_mesh, &
undef_mat_prop,nundefMat_ext_mesh, &
Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90 (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -0,0 +1,1048 @@
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+! define sets of colors that contain disconnected elements for the CUDA solver.
+! also split the elements into two subsets: inner and outer elements, in order
+! to be able to compute the outer elements first in the solver and then
+! start non-blocking MPI calls and overlap them with the calculation of the inner elements
+! (which works fine because there are always far more inner elements than outer elements)
+
+!*********************************************************************************************************
+! Mila
+
+! daniel: modified routines to use element domain flags given in ispec_is_d, thus
+! coloring only acoustic or elastic (or..) elements in one run, then repeat run for other domains.
+! also, the permutation re-starts at 1 for outer and for inner elements,
+! making it usable for the phase_ispec_inner_** arrays for acoustic and elastic elements.
+
+ subroutine get_perm_color_faster(is_on_a_slice_edge,ispec_is_d, &
+ ibool,perm,color, &
+ nspec,nglob, &
+ nb_colors_outer_elements,nb_colors_inner_elements, &
+ nspec_outer,nspec_inner,nspec_domain, &
+ first_elem_number_in_this_color, &
+ myrank)
+
+ implicit none
+
+ include "constants.h"
+
+ integer, intent(in) :: nspec, nglob
+ logical, dimension(nspec), intent(in) :: is_on_a_slice_edge
+ logical, dimension(nspec), intent(in) :: ispec_is_d
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec), intent(in) :: ibool
+ integer, dimension(nspec),intent(inout) :: perm
+
+ integer, dimension(nspec),intent(inout) :: color
+ integer, dimension(MAX_NUMBER_OF_COLORS+1),intent(inout) :: first_elem_number_in_this_color
+ integer, intent(out) :: nb_colors_outer_elements,nb_colors_inner_elements
+
+ integer, intent(out) :: nspec_outer,nspec_inner,nspec_domain
+ integer, intent(in) :: myrank
+
+ ! local variables
+ integer :: nb_colors
+
+ ! coloring algorithm
+ call get_color_faster(ibool, is_on_a_slice_edge, ispec_is_d, &
+ myrank, nspec, nglob, &
+ color, nb_colors_outer_elements, nb_colors_inner_elements, &
+ nspec_outer,nspec_inner,nspec_domain)
+
+ !debug
+ !if(myrank == 0) then
+ ! print*, 'rank :',myrank,' - colors:'
+ ! print*, ' number of colors for inner elements = ',nb_colors_inner_elements
+ ! print*, ' number of colors for outer elements = ',nb_colors_outer_elements
+ ! print*, ' total number of colors (sum of both) = ', nb_colors_inner_elements + nb_colors_outer_elements
+ ! print*, 'rank :',myrank,' - elements:'
+ ! print*, ' number of elements for outer elements = ',nspec_outer
+ ! print*, ' number of elements for inner elements = ',nspec_inner
+ ! print*, ' total number of elements for domain elements = ',nspec_domain
+ !endif
+ !if(myrank == 0) print*, ' generating the final colors'
+
+ ! total number of colors used
+ nb_colors = nb_colors_inner_elements+nb_colors_outer_elements
+ first_elem_number_in_this_color(:) = 0
+
+ ! gets element permutation depending on colors
+ call get_final_perm(color,perm,first_elem_number_in_this_color(1:nb_colors), &
+ nspec,nb_colors,nb_colors_outer_elements, &
+ ispec_is_d,nspec_domain)
+
+ !debug
+ !if(myrank == 0) print*, ' done with mesh coloring and inner/outer element splitting'
+
+ end subroutine get_perm_color_faster
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine get_color_faster(ibool, is_on_a_slice_edge, ispec_is_d, &
+ myrank, nspec, nglob, &
+ color, nb_colors_outer_elements, nb_colors_inner_elements, &
+ nspec_outer,nspec_inner,nspec_domain)
+
+ implicit none
+
+ include "constants.h"
+
+ integer nspec,nglob
+ logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+ integer, dimension(nspec) :: color
+ integer :: nb_colors_outer_elements,nb_colors_inner_elements,myrank
+
+ integer :: nspec_outer,nspec_inner,nspec_domain
+
+ ! local variables
+ integer :: ispec
+ !! DK DK for mesh coloring GPU Joseph Fourier
+ logical, dimension(:), allocatable :: mask_ibool
+ integer :: icolor, nb_already_colored
+ integer :: iglob1,iglob2,iglob3,iglob4,iglob5,iglob6,iglob7,iglob8
+ logical :: conflict_found_need_new_color
+
+ ! user output
+ if( myrank == 0 ) then
+ write(IMAIN,*) ' fast coloring mesh algorithm'
+ endif
+
+ ! counts number of elements for inner, outer and total domain
+ nspec_outer = 0
+ nspec_inner = 0
+ nspec_domain = 0
+ do ispec=1,nspec
+ if(ispec_is_d(ispec)) then
+ if(is_on_a_slice_edge(ispec)) then
+ nspec_outer=nspec_outer+1
+ else
+ nspec_inner=nspec_inner+1
+ endif
+ nspec_domain=nspec_domain+1
+ endif
+ enddo
+
+ !! DK DK start mesh coloring (new Apr 2010 version by DK for GPU Joseph Fourier)
+ allocate(mask_ibool(nglob))
+
+ !! DK DK ----------------------------------
+ !! DK DK color the mesh in the crust_mantle
+ !! DK DK ----------------------------------
+
+ ! debug
+ !if(myrank == 0) then
+ ! print *
+ ! print *,'----------------------------------'
+ ! print *,'coloring the mesh'
+ ! print *,'----------------------------------'
+ ! print *
+ !endif
+
+ ! first set color of all elements to 0
+ color(:) = 0
+ icolor = 0
+ nb_already_colored = 0
+
+ ! colors outer elements
+ do while( nb_already_colored < nspec_outer )
+ icolor = icolor + 1
+
+ 333 continue
+
+ ! debug: user output
+ !if(myrank == 0) then
+ ! print *,' analyzing color ',icolor,' - outer elements'
+ !endif
+
+ ! resets flags
+ mask_ibool(:) = .false.
+ conflict_found_need_new_color = .false.
+
+ ! finds un-colored elements
+ do ispec = 1,nspec
+ ! domain elements only
+ if( ispec_is_d(ispec) ) then
+ ! outer elements
+ if( is_on_a_slice_edge(ispec) ) then
+ if(color(ispec) == 0) then
+ ! the eight corners of the current element
+ iglob1=ibool(1,1,1,ispec)
+ iglob2=ibool(NGLLX,1,1,ispec)
+ iglob3=ibool(NGLLX,NGLLY,1,ispec)
+ iglob4=ibool(1,NGLLY,1,ispec)
+ iglob5=ibool(1,1,NGLLZ,ispec)
+ iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+ iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+ iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+
+ if(mask_ibool(iglob1) .or. mask_ibool(iglob2) .or. mask_ibool(iglob3) .or. mask_ibool(iglob4) .or. &
+ mask_ibool(iglob5) .or. mask_ibool(iglob6) .or. mask_ibool(iglob7) .or. mask_ibool(iglob8)) then
+ ! if element of this color has a common point with another element of that same color
+ ! then we need to create a new color, i.e., increment the color of the current element
+ conflict_found_need_new_color = .true.
+ else
+ color(ispec) = icolor
+ nb_already_colored = nb_already_colored + 1
+ mask_ibool(iglob1) = .true.
+ mask_ibool(iglob2) = .true.
+ mask_ibool(iglob3) = .true.
+ mask_ibool(iglob4) = .true.
+ mask_ibool(iglob5) = .true.
+ mask_ibool(iglob6) = .true.
+ mask_ibool(iglob7) = .true.
+ mask_ibool(iglob8) = .true.
+ endif
+ endif
+ endif
+ endif
+ enddo
+
+ ! debug: user output
+ !if(myrank == 0) then
+ ! print *,' done ',(100.0*nb_already_colored)/nspec_domain,'% of ',nspec_domain,'elements'
+ !endif
+
+ if(conflict_found_need_new_color) then
+ icolor = icolor + 1
+ if( icolor >= MAX_NUMBER_OF_COLORS ) stop 'error MAX_NUMBER_OF_COLORS too small'
+ goto 333
+ endif
+ enddo
+
+ nb_colors_outer_elements = icolor
+
+ ! colors inner elements
+ do while(nb_already_colored < nspec_domain)
+ icolor = icolor + 1
+
+ 334 continue
+ !if(myrank == 0) print *,'analyzing color ',icolor,' for all the elements of the mesh'
+
+ ! debug: user output
+ !if(myrank == 0) then
+ ! print *,' analyzing color ',icolor,' - inner elements'
+ !endif
+
+ ! resets flags
+ mask_ibool(:) = .false.
+ conflict_found_need_new_color = .false.
+
+ do ispec = 1,nspec
+ ! domain elements only
+ if(ispec_is_d(ispec)) then
+ ! inner elements
+ if (.not. is_on_a_slice_edge(ispec)) then
+ if(color(ispec) == 0) then
+ ! the eight corners of the current element
+ iglob1=ibool(1,1,1,ispec)
+ iglob2=ibool(NGLLX,1,1,ispec)
+ iglob3=ibool(NGLLX,NGLLY,1,ispec)
+ iglob4=ibool(1,NGLLY,1,ispec)
+ iglob5=ibool(1,1,NGLLZ,ispec)
+ iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+ iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+ iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+
+ if(mask_ibool(iglob1) .or. mask_ibool(iglob2) .or. mask_ibool(iglob3) .or. mask_ibool(iglob4) .or. &
+ mask_ibool(iglob5) .or. mask_ibool(iglob6) .or. mask_ibool(iglob7) .or. mask_ibool(iglob8)) then
+ ! if element of this color has a common point with another element of that same color
+ ! then we need to create a new color, i.e., increment the color of the current element
+ conflict_found_need_new_color = .true.
+ else
+ color(ispec) = icolor
+ nb_already_colored = nb_already_colored + 1
+ mask_ibool(iglob1) = .true.
+ mask_ibool(iglob2) = .true.
+ mask_ibool(iglob3) = .true.
+ mask_ibool(iglob4) = .true.
+ mask_ibool(iglob5) = .true.
+ mask_ibool(iglob6) = .true.
+ mask_ibool(iglob7) = .true.
+ mask_ibool(iglob8) = .true.
+ endif
+ endif
+ endif
+ endif
+ enddo
+
+ ! debug user output
+ !if(myrank == 0) then
+ ! print *,' done ',(100.0*nb_already_colored)/nspec_domain,'% of ',nspec_domain,'elements'
+ !endif
+
+ if(conflict_found_need_new_color) then
+ icolor = icolor + 1
+ if( icolor >= MAX_NUMBER_OF_COLORS ) stop 'error MAX_NUMBER_OF_COLORS too small'
+ goto 334
+ endif
+ enddo
+
+ nb_colors_inner_elements = icolor - nb_colors_outer_elements
+
+ ! debug
+ !if(myrank == 0) then
+ ! print *
+ ! print *,' created a total of ',maxval(color),' colors in this domain' ! 'for all the domain elements of the mesh'
+ ! print *
+ !endif
+
+ !!!!!!!! DK DK now check that all the color sets are independent
+ do icolor = 1,maxval(color)
+ mask_ibool(:) = .false.
+ do ispec = 1,nspec
+ ! domain elements only
+ if(ispec_is_d(ispec)) then
+ if(color(ispec) == icolor ) then
+ ! the eight corners of the current element
+ iglob1=ibool(1,1,1,ispec)
+ iglob2=ibool(NGLLX,1,1,ispec)
+ iglob3=ibool(NGLLX,NGLLY,1,ispec)
+ iglob4=ibool(1,NGLLY,1,ispec)
+ iglob5=ibool(1,1,NGLLZ,ispec)
+ iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+ iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+ iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+
+ if(mask_ibool(iglob1) .or. mask_ibool(iglob2) .or. mask_ibool(iglob3) .or. mask_ibool(iglob4) .or. &
+ mask_ibool(iglob5) .or. mask_ibool(iglob6) .or. mask_ibool(iglob7) .or. mask_ibool(iglob8)) then
+ ! if element of this color has a common point with another element of that same color
+ ! then there is a problem, the color set is not correct
+ print*,'error check color:',icolor
+ stop 'error detected: found a common point inside a color set'
+ else
+ mask_ibool(iglob1) = .true.
+ mask_ibool(iglob2) = .true.
+ mask_ibool(iglob3) = .true.
+ mask_ibool(iglob4) = .true.
+ mask_ibool(iglob5) = .true.
+ mask_ibool(iglob6) = .true.
+ mask_ibool(iglob7) = .true.
+ mask_ibool(iglob8) = .true.
+ endif
+ endif
+ endif
+ enddo
+
+ !debug
+ !if(myrank == 0) print *,' color ',icolor,' has disjoint elements only and is therefore OK'
+ !if(myrank == 0) print *,' color ',icolor,' contains ',count(color == icolor),' elements'
+ enddo
+
+ ! debug
+ !if(myrank == 0) then
+ ! print *,' the ',maxval(color),' color sets are OK'
+ ! print *
+ !endif
+
+ deallocate(mask_ibool)
+
+ end subroutine get_color_faster
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine get_final_perm(color,perm,first_elem_number_in_this_color, &
+ nspec,nb_colors,nb_colors_outer_elements, &
+ ispec_is_d,nspec_domain)
+
+ integer, intent(in) :: nspec,nb_colors
+
+ integer,dimension(nspec), intent(in) :: color
+ integer,dimension(nspec), intent(inout) :: perm
+
+ integer, intent(inout) :: first_elem_number_in_this_color(nb_colors)
+
+ logical,dimension(nspec),intent(in) :: ispec_is_d
+
+ integer,intent(in) :: nb_colors_outer_elements,nspec_domain
+
+ ! local parameters
+ integer :: ispec,icolor,icounter,counter_outer
+
+ ! note: permutations are only valid within each domain
+ ! also, the counters start at 1 for each inner/outer element range
+
+ ! outer elements first ( note: inner / outer order sensitive)
+ icounter = 1
+ do icolor = 1, nb_colors_outer_elements
+ first_elem_number_in_this_color(icolor) = icounter
+ do ispec = 1, nspec
+ ! elements in this domain only
+ if( ispec_is_d(ispec) ) then
+ if(color(ispec) == icolor) then
+ perm(ispec) = icounter
+ icounter = icounter + 1
+ endif
+ endif
+ enddo
+ enddo
+ counter_outer = icounter - 1
+
+ ! inner elements second
+ icounter = 1
+ do icolor = nb_colors_outer_elements+1, nb_colors
+ first_elem_number_in_this_color(icolor) = icounter + counter_outer
+ do ispec = 1, nspec
+ ! elements in this domain only
+ if( ispec_is_d(ispec) ) then
+ ! outer elements
+ if(color(ispec) == icolor) then
+ perm(ispec) = icounter
+ icounter = icounter + 1
+ endif
+ endif
+ enddo
+ enddo
+
+ ! checks
+ if( counter_outer + icounter -1 /= nspec_domain ) then
+ print*,'error: perm: ',nspec_domain,counter_outer,icounter,counter_outer+icounter-1
+ stop 'error get_final_perm: counter incomplete'
+ endif
+
+ end subroutine get_final_perm
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! unused...
+! subroutine get_perm_color(is_on_a_slice_edge,ispec_is_d, &
+! ibool,perm,nspec,nglob, &
+! nb_colors_outer_elements,nb_colors_inner_elements, &
+! nspec_outer,first_elem_number_in_this_color,myrank)
+!
+! implicit none
+!
+! include "constants.h"
+! integer, parameter :: NGNOD_HEXAHEDRA = 8
+!
+! logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
+!
+! integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+! integer, dimension(nspec) :: perm
+! integer, dimension(nspec) :: color
+! integer, dimension(MAX_NUMBER_OF_COLORS+1) :: first_elem_number_in_this_color
+! integer :: nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer,myrank
+!
+! ! local variables
+! integer nspec,nglob_GLL_full
+!
+! ! a neighbor of a hexahedral node is a hexahedron that shares a face with it -> max degree of a node = 6
+! integer, parameter :: MAX_NUMBER_OF_NEIGHBORS = 100
+!
+! ! global corner numbers that need to be created
+! integer, dimension(nglob) :: global_corner_number
+!
+! integer mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1)
+! integer, dimension(:), allocatable :: ne,np,adj
+! integer xadj(nspec+1)
+!
+! !logical maskel(nspec)
+!
+! integer i,istart,istop,number_of_neighbors
+!
+! integer nglob_eight_corners_only,nglob
+!
+! ! only count the total size of the array that will be created, or actually create it
+! logical count_only
+! integer total_size_ne,total_size_adj
+!
+!
+! ! total number of points in the mesh
+! nglob_GLL_full = nglob
+!
+! !---- call Charbel Farhat's routines
+! if(myrank == 0) &
+! write(IMAIN,*) 'calling form_elt_connectivity_foelco to perform mesh coloring and inner/outer element splitting'
+! call form_elt_connectivity_foelco(mn,mp,nspec,global_corner_number,nglob_GLL_full,ibool,nglob_eight_corners_only)
+! do i=1,nspec
+! istart = mp(i)
+! istop = mp(i+1) - 1
+! enddo
+!
+! ! count only, to determine the size needed for the array
+! allocate(np(nglob_eight_corners_only+1))
+! count_only = .true.
+! total_size_ne = 1
+! if(myrank == 0) write(IMAIN,*) 'calling form_node_connectivity_fonoco to determine the size of the table'
+! allocate(ne(total_size_ne))
+! call form_node_connectivity_fonoco(mn,mp,ne,np,nglob_eight_corners_only,nspec,count_only,total_size_ne)
+! deallocate(ne)
+!
+! !print *, 'nglob_eight_corners_only'
+! !print *, nglob_eight_corners_only
+!
+! ! allocate the array with the right size
+! allocate(ne(total_size_ne))
+!
+! ! now actually generate the array
+! count_only = .false.
+! if(myrank == 0) write(IMAIN,*) 'calling form_node_connectivity_fonoco to actually create the table'
+! call form_node_connectivity_fonoco(mn,mp,ne,np,nglob_eight_corners_only,nspec,count_only,total_size_ne)
+! do i=1,nglob_eight_corners_only
+! istart = np(i)
+! istop = np(i+1) - 1
+! enddo
+!
+! !print *, 'total_size_ne'
+! !print *, total_size_ne
+!
+! ! count only, to determine the size needed for the array
+! count_only = .true.
+! total_size_adj = 1
+! if(myrank == 0) write(IMAIN,*) 'calling create_adjacency_table_adjncy to determine the size of the table'
+! allocate(adj(total_size_adj))
+! !call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,maskel,nspec,nglob_eight_corners_only,&
+! !count_only,total_size_ne,total_size_adj,.false.)
+! call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,nspec,nglob_eight_corners_only,&
+! count_only,total_size_ne,total_size_adj,.false.)
+! deallocate(adj)
+!
+! ! allocate the array with the right size
+! allocate(adj(total_size_adj))
+!
+! ! now actually generate the array
+! count_only = .false.
+! if(myrank == 0) write(IMAIN,*) 'calling create_adjacency_table_adjncy again to actually create the table'
+! !call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,maskel,nspec,nglob_eight_corners_only,&
+! !count_only,total_size_ne,total_size_adj,.false.)
+! call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,nspec,nglob_eight_corners_only,&
+! count_only,total_size_ne,total_size_adj,.false.)
+!
+! do i=1,nspec
+! istart = xadj(i)
+! istop = xadj(i+1) - 1
+! number_of_neighbors = istop-istart+1
+! if(number_of_neighbors < 1 .or. number_of_neighbors > MAX_NUMBER_OF_NEIGHBORS) stop 'incorrect number of neighbors'
+! enddo
+!
+! deallocate(ne,np)
+!
+! call get_color(adj,xadj,color,nspec,total_size_adj, &
+! is_on_a_slice_edge,ispec_is_d, &
+! nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer)
+!
+! if(myrank == 0) then
+! write(IMAIN,*) ' number of colors of the graph for inner elements = ',nb_colors_inner_elements
+! write(IMAIN,*) ' number of colors of the graph for outer elements = ',nb_colors_outer_elements
+! write(IMAIN,*) ' total number of colors of the graph (sum of both) = ', nb_colors_inner_elements + nb_colors_outer_elements
+! write(IMAIN,*) ' number of elements of the graph for outer elements = ',nspec_outer
+! endif
+!
+! deallocate(adj)
+!
+! if(myrank == 0) write(IMAIN,*) 'generating the final colors'
+! first_elem_number_in_this_color(:) = -1
+! call get_final_perm(color,perm,first_elem_number_in_this_color,nspec,nb_colors_inner_elements+nb_colors_outer_elements)
+!
+! if(myrank == 0) write(IMAIN,*) 'done with mesh coloring and inner/outer element splitting'
+!
+! end subroutine get_perm_color
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+!unused...
+! subroutine get_color(adj,xadj,color,nspec,total_size_adj, &
+! is_on_a_slice_edge,ispec_is_d, &
+! nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer)
+!
+! integer, intent(in) :: nspec,total_size_adj
+! integer, intent(in) :: adj(total_size_adj),xadj(nspec+1)
+! integer :: color(nspec)
+! integer :: this_color,nb_already_colored,ispec,ixadj,ok
+! logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
+! integer :: nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer
+! logical :: is_outer_element(nspec)
+!
+! nspec_outer = 0
+!
+! is_outer_element(:) = .false.
+!
+! do ispec=1,nspec
+! if(ispec_is_d(ispec)) then
+! if (is_on_a_slice_edge(ispec)) then
+! is_outer_element(ispec) = .true.
+! nspec_outer=nspec_outer+1
+! endif
+! endif
+! enddo
+!
+! ! outer elements
+! color(:) = 0
+! this_color = 0
+! nb_already_colored = 0
+! do while(nb_already_colored<nspec_outer)
+! this_color = this_color + 1
+! do ispec = 1, nspec
+! if(ispec_is_d(ispec)) then
+! if (is_outer_element(ispec)) then
+! if (color(ispec) == 0) then
+! ok = 1
+! do ixadj = xadj(ispec), (xadj(ispec+1)-1)
+! if (is_outer_element(adj(ixadj)) .and. color(adj(ixadj)) == this_color) ok = 0
+! enddo
+! if (ok /= 0) then
+! color(ispec) = this_color
+! nb_already_colored = nb_already_colored + 1
+! endif
+! endif
+! endif
+! endif
+! enddo
+! enddo
+! nb_colors_outer_elements = this_color
+!
+! ! inner elements
+! do while(nb_already_colored<nspec)
+! this_color = this_color + 1
+! do ispec = 1, nspec
+! if(ispec_is_d(ispec)) then
+! if (.not. is_outer_element(ispec)) then
+! if (color(ispec) == 0) then
+! ok = 1
+! do ixadj = xadj(ispec), (xadj(ispec+1)-1)
+! if (.not. is_outer_element(adj(ixadj)) .and. color(adj(ixadj)) == this_color) ok = 0
+! enddo
+! if (ok /= 0) then
+! color(ispec) = this_color
+! nb_already_colored = nb_already_colored + 1
+! endif
+! endif
+! endif
+! endif
+! enddo
+! enddo
+!
+! nb_colors_inner_elements = this_color - nb_colors_outer_elements
+!
+!end subroutine get_color
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+!unused...
+!
+!!=======================================================================
+!!
+!! Charbel Farhat's FEM topology routines
+!!
+!! Dimitri Komatitsch, February 1996 - Code based on Farhat's original version from 1987
+!!
+!! modified and adapted by Dimitri Komatitsch, May 2006
+!!
+!!=======================================================================
+!
+! subroutine form_elt_connectivity_foelco(mn,mp,nspec,global_corner_number,&
+! nglob_GLL_full,ibool,nglob_eight_corners_only)
+!
+!!-----------------------------------------------------------------------
+!!
+!! Forms the MN and MP arrays
+!!
+!! Input :
+!! -------
+!! ibool Array needed to build the element connectivity table
+!! nspec Number of elements in the domain
+!! NGNOD_HEXAHEDRA number of nodes per hexahedron (brick with 8 corners)
+!!
+!! Output :
+!! --------
+!! MN, MP This is the element connectivity array pair.
+!! Array MN contains the list of the element
+!! connectivity, that is, the nodes contained in each
+!! element. They are stored in a stacked fashion.
+!!
+!! Pointer array MP stores the location of each
+!! element list. Its length is equal to the number
+!! of elements plus one.
+!!
+!!-----------------------------------------------------------------------
+!
+! implicit none
+!
+! include "constants.h"
+! integer, parameter :: NGNOD_HEXAHEDRA = 8
+!
+! integer nspec,nglob_GLL_full
+!
+! ! arrays with mesh parameters per slice
+! integer, intent(in), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+!
+! ! global corner numbers that need to be created
+! integer, intent(out), dimension(nglob_GLL_full) :: global_corner_number
+! integer, intent(out) :: mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1)
+! integer, intent(out) :: nglob_eight_corners_only
+!
+! integer ninter,nsum,ispec,node,k,inumcorner,ix,iy,iz
+!
+! ninter = 1
+! nsum = 1
+! mp(1) = 1
+!
+! !---- define topology of the elements in the mesh
+! !---- we need to define adjacent numbers from the sub-mesh consisting of the corners only
+! nglob_eight_corners_only = 0
+! global_corner_number(:) = -1
+!
+! do ispec=1,nspec
+!
+! inumcorner = 0
+! do iz = 1,NGLLZ,NGLLZ-1
+! do iy = 1,NGLLY,NGLLY-1
+! do ix = 1,NGLLX,NGLLX-1
+!
+! inumcorner = inumcorner + 1
+! if(inumcorner > NGNOD_HEXAHEDRA) stop 'corner number too large'
+!
+! ! check if this point was already assigned a number previously, otherwise create one and store it
+! if(global_corner_number(ibool(ix,iy,iz,ispec)) == -1) then
+! nglob_eight_corners_only = nglob_eight_corners_only + 1
+! global_corner_number(ibool(ix,iy,iz,ispec)) = nglob_eight_corners_only
+! endif
+!
+! node = global_corner_number(ibool(ix,iy,iz,ispec))
+! do k=nsum,ninter-1
+! if(node == mn(k)) goto 200
+! enddo
+!
+! mn(ninter) = node
+! ninter = ninter + 1
+! 200 continue
+!
+! enddo
+! enddo
+! enddo
+!
+! nsum = ninter
+! mp(ispec + 1) = nsum
+!
+! enddo
+!
+! end subroutine form_elt_connectivity_foelco
+!
+!!
+!!-------------------------------------------------------------------------------------------------
+!!
+!
+! subroutine form_node_connectivity_fonoco(mn,mp,ne,np,nglob_eight_corners_only,&
+! nspec,count_only,total_size_ne)
+!
+!!-----------------------------------------------------------------------
+!!
+!! Forms the NE and NP arrays
+!!
+!! Input :
+!! -------
+!! MN, MP, nspec
+!! nglob_eight_corners_only Number of nodes in the domain
+!!
+!! Output :
+!! --------
+!! NE, NP This is the node-connected element array pair.
+!! Integer array NE contains a list of the
+!! elements connected to each node, stored in stacked fashion.
+!!
+!! Array NP is the pointer array for the
+!! location of a node's element list in the NE array.
+!! Its length is equal to the number of points plus one.
+!!
+!!-----------------------------------------------------------------------
+!
+! implicit none
+!
+! include "constants.h"
+! integer, parameter :: NGNOD_HEXAHEDRA = 8
+!
+! ! only count the total size of the array that will be created, or actually create it
+! logical count_only
+! integer total_size_ne
+!
+! integer nglob_eight_corners_only,nspec
+!
+! integer, intent(in) :: mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1)
+!
+! integer, intent(out) :: ne(total_size_ne),np(nglob_eight_corners_only+1)
+!
+! integer nsum,inode,ispec,j
+!
+! nsum = 1
+! np(1) = 1
+!
+! do inode=1,nglob_eight_corners_only
+! do 200 ispec=1,nspec
+!
+! do j=mp(ispec),mp(ispec + 1) - 1
+! if (mn(j) == inode) then
+! if(count_only) then
+! total_size_ne = nsum
+! else
+! ne(nsum) = ispec
+! endif
+! nsum = nsum + 1
+! goto 200
+! endif
+! enddo
+! 200 continue
+!
+! np(inode + 1) = nsum
+!
+! enddo
+!
+! end subroutine form_node_connectivity_fonoco
+!
+!!
+!!-------------------------------------------------------------------------------------------------
+!!
+!
+! !subroutine create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,maskel,nspec,nglob_eight_corners_only,&
+! ! count_only,total_size_ne,total_size_adj,face)
+!
+! subroutine create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,nspec,nglob_eight_corners_only,&
+! count_only,total_size_ne,total_size_adj,face)
+!
+!!-----------------------------------------------------------------------
+!!
+!! Establishes the element adjacency information of the mesh
+!! Two elements are considered adjacent if they share a face.
+!!
+!! Input :
+!! -------
+!! MN, MP, NE, NP, nspec
+!! MASKEL logical mask (length = nspec)
+!!
+!! Output :
+!! --------
+!! ADJ, XADJ This is the element adjacency array pair. Array
+!! ADJ contains the list of the elements adjacent to
+!! element i. They are stored in a stacked fashion.
+!! Pointer array XADJ stores the location of each element list.
+!!
+!!-----------------------------------------------------------------------
+!
+! implicit none
+!
+! include "constants.h"
+! integer, parameter :: NGNOD_HEXAHEDRA = 8
+!
+! ! only count the total size of the array that will be created, or actually create it
+! logical count_only,face
+! integer total_size_ne,total_size_adj
+!
+! integer nglob_eight_corners_only
+!
+! integer, intent(in) :: mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1),ne(total_size_ne),np(nglob_eight_corners_only+1)
+!
+! integer, intent(out) :: adj(total_size_adj),xadj(nspec+1)
+!
+! logical maskel(nspec)
+! integer countel(nspec)
+!
+! integer nspec,iad,ispec,istart,istop,ino,node,jstart,jstop,nelem,jel
+!
+! xadj(1) = 1
+! iad = 1
+!
+! do ispec=1,nspec
+!
+! ! reset mask
+! maskel(:) = .false.
+!
+! ! mask current element
+! maskel(ispec) = .true.
+! if (face) countel(:) = 0
+!
+! istart = mp(ispec)
+! istop = mp(ispec+1) - 1
+! do ino=istart,istop
+! node = mn(ino)
+! jstart = np(node)
+! jstop = np(node + 1) - 1
+! do 120 jel=jstart,jstop
+! nelem = ne(jel)
+! if(maskel(nelem)) goto 120
+! if (face) then
+! ! if 2 elements share at least 3 corners, therefore they share a face
+! countel(nelem) = countel(nelem) + 1
+! if (countel(nelem)>=3) then
+! if(count_only) then
+! total_size_adj = iad
+! else
+! adj(iad) = nelem
+! endif
+! maskel(nelem) = .true.
+! iad = iad + 1
+! endif
+! else
+! if(count_only) then
+! total_size_adj = iad
+! else
+! adj(iad) = nelem
+! endif
+! maskel(nelem) = .true.
+! iad = iad + 1
+! endif
+! 120 continue
+! enddo
+!
+! xadj(ispec+1) = iad
+!
+! enddo
+!
+! end subroutine create_adjacency_table_adjncy
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+! PERMUTATIONS
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! implement permutation of elements for arrays of real (CUSTOM_REAL) type
+
+ subroutine permute_elements_real(array_to_permute,temp_array,perm,nspec)
+
+ implicit none
+
+ include "constants.h"
+
+ integer, intent(in) :: nspec
+ integer, intent(in), dimension(nspec) :: perm
+
+ real(kind=CUSTOM_REAL), intent(inout), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
+ array_to_permute,temp_array
+
+ integer old_ispec,new_ispec
+
+ ! copy the original array
+ temp_array(:,:,:,:) = array_to_permute(:,:,:,:)
+
+ do old_ispec = 1,nspec
+ new_ispec = perm(old_ispec)
+ array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec)
+ enddo
+
+ end subroutine permute_elements_real
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! implement permutation of elements for arrays of integer type
+
+ subroutine permute_elements_integer(array_to_permute,temp_array,perm,nspec)
+
+ implicit none
+
+ include "constants.h"
+
+ integer, intent(in) :: nspec
+ integer, intent(in), dimension(nspec) :: perm
+
+ integer, intent(inout), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
+ array_to_permute,temp_array
+
+ integer old_ispec,new_ispec
+
+ ! copy the original array
+ temp_array(:,:,:,:) = array_to_permute(:,:,:,:)
+
+ do old_ispec = 1,nspec
+ new_ispec = perm(old_ispec)
+ array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec)
+ enddo
+
+ end subroutine permute_elements_integer
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! implement permutation of elements for arrays of double precision type
+
+ subroutine permute_elements_dble(array_to_permute,temp_array,perm,nspec)
+
+ implicit none
+
+ include "constants.h"
+
+ integer, intent(in) :: nspec
+ integer, intent(in), dimension(nspec) :: perm
+
+ double precision, intent(inout), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
+ array_to_permute,temp_array
+
+ integer old_ispec,new_ispec
+
+ ! copy the original array
+ temp_array(:,:,:,:) = array_to_permute(:,:,:,:)
+
+ do old_ispec = 1,nspec
+ new_ispec = perm(old_ispec)
+ array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec)
+ enddo
+
+ end subroutine permute_elements_dble
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! implement permutation of elements for arrays of double precision type
+
+ subroutine permute_elements_logical1D(array_to_permute,temp_array,perm,nspec)
+
+ implicit none
+
+ include "constants.h"
+
+ integer, intent(in) :: nspec
+ integer, intent(in), dimension(nspec) :: perm
+
+ logical, intent(inout), dimension(nspec) :: array_to_permute,temp_array
+
+ integer old_ispec,new_ispec
+
+ ! copy the original array
+ temp_array(:) = array_to_permute(:)
+
+ do old_ispec = 1,nspec
+ new_ispec = perm(old_ispec)
+ array_to_permute(new_ispec) = temp_array(old_ispec)
+ enddo
+
+ end subroutine permute_elements_logical1D
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/save_arrays_solver.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -53,7 +53,15 @@
c22store,c23store,c24store,c25store,c26store,c33store, &
c34store,c35store,c36store,c44store,c45store,c46store, &
c55store,c56store,c66store, &
- ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic)
+ ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic, &
+ ispec_is_inner,nspec_inner_acoustic,nspec_inner_elastic, &
+ nspec_outer_acoustic,nspec_outer_elastic, &
+ num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
+ num_phase_ispec_elastic,phase_ispec_inner_elastic, &
+ num_colors_outer_acoustic,num_colors_inner_acoustic, &
+ num_elem_colors_acoustic, &
+ num_colors_outer_elastic,num_colors_inner_elastic, &
+ num_elem_colors_elastic)
implicit none
@@ -127,6 +135,23 @@
! material domain flags
logical, dimension(nspec) :: ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic
+! inner/outer elements
+ logical,dimension(nspec) :: ispec_is_inner
+ integer :: nspec_inner_acoustic,nspec_outer_acoustic
+ integer :: nspec_inner_elastic,nspec_outer_elastic
+ integer :: num_phase_ispec_acoustic
+ integer,dimension(num_phase_ispec_acoustic,2) :: phase_ispec_inner_acoustic
+ integer :: num_phase_ispec_elastic
+ integer,dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
+
+ ! mesh coloring
+ integer :: num_colors_outer_acoustic,num_colors_inner_acoustic
+ integer, dimension(num_colors_outer_acoustic + num_colors_inner_acoustic) :: &
+ num_elem_colors_acoustic
+ integer :: num_colors_outer_elastic,num_colors_inner_elastic
+ integer, dimension(num_colors_outer_elastic + num_colors_inner_elastic) :: &
+ num_elem_colors_elastic
+
! local parameters
real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: v_tmp
integer,dimension(:),allocatable :: v_tmp_i
@@ -272,6 +297,33 @@
write(IOUT) c66store
endif
+! inner/outer elements
+ write(IOUT) ispec_is_inner
+
+ if( ACOUSTIC_SIMULATION ) then
+ write(IOUT) nspec_inner_acoustic,nspec_outer_acoustic
+ write(IOUT) num_phase_ispec_acoustic
+ if(num_phase_ispec_acoustic > 0 ) write(IOUT) phase_ispec_inner_acoustic
+ endif
+
+ if( ELASTIC_SIMULATION ) then
+ write(IOUT) nspec_inner_elastic,nspec_outer_elastic
+ write(IOUT) num_phase_ispec_elastic
+ if(num_phase_ispec_elastic > 0 ) write(IOUT) phase_ispec_inner_elastic
+ endif
+
+! mesh coloring
+ if( USE_MESH_COLORING_GPU ) then
+ if( ACOUSTIC_SIMULATION ) then
+ write(IOUT) num_colors_outer_acoustic,num_colors_inner_acoustic
+ write(IOUT) num_elem_colors_acoustic
+ endif
+ if( ELASTIC_SIMULATION ) then
+ write(IOUT) num_colors_outer_elastic,num_colors_inner_elastic
+ write(IOUT) num_elem_colors_elastic
+ endif
+ endif
+
close(IOUT)
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/meshfem3D/save_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/meshfem3D/save_databases.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/meshfem3D/save_databases.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -83,60 +83,66 @@
integer, dimension(8) :: nspec_interface
- open(unit=15,file=prname(1:len_trim(prname))//'Database',status='unknown',action='write',form='formatted')
+ !open(unit=15,file=prname(1:len_trim(prname))//'Database',status='unknown',action='write',form='formatted')
+ open(unit=15,file=prname(1:len_trim(prname))//'Database', &
+ status='unknown',action='write',form='unformatted')
- write(15,*) nglob
+ write(15) nglob
do iglob=1,nglob
- write(15,*) iglob,nodes_coords(iglob,1),nodes_coords(iglob,2),nodes_coords(iglob,3)
+ write(15) iglob,nodes_coords(iglob,1),nodes_coords(iglob,2),nodes_coords(iglob,3)
end do
! Materials properties
- write(15,*) NMATERIALS, 0
+ write(15) NMATERIALS, 0
do idoubl = 1,NMATERIALS
!write(15,*) material_properties(idoubl,:)
matpropl(:) = material_properties(idoubl,:)
- write(15,*) matpropl
+ write(15) matpropl
end do
- write(15,*) nspec
+ write(15) nspec
do ispec=1,nspec
- write(15,'(11i14)') ispec,true_material_num(ispec),1,ibool(1,1,1,ispec),ibool(2,1,1,ispec),&
+ !write(15,'(11i14)') ispec,true_material_num(ispec),1,ibool(1,1,1,ispec),ibool(2,1,1,ispec),&
+ ! ibool(2,2,1,ispec),ibool(1,2,1,ispec),ibool(1,1,2,ispec),&
+ ! ibool(2,1,2,ispec),ibool(2,2,2,ispec),ibool(1,2,2,ispec)
+ write(15) ispec,true_material_num(ispec),1,ibool(1,1,1,ispec),ibool(2,1,1,ispec),&
ibool(2,2,1,ispec),ibool(1,2,1,ispec),ibool(1,1,2,ispec),&
ibool(2,1,2,ispec),ibool(2,2,2,ispec),ibool(1,2,2,ispec)
+
end do
! Boundaries
- write(15,*) 1,nspec2D_xmin
- write(15,*) 2,nspec2D_xmax
- write(15,*) 3,nspec2D_ymin
- write(15,*) 4,nspec2D_ymax
- write(15,*) 5,NSPEC2D_BOTTOM
- write(15,*) 6,NSPEC2D_TOP
+ write(15) 1,nspec2D_xmin
+ write(15) 2,nspec2D_xmax
+ write(15) 3,nspec2D_ymin
+ write(15) 4,nspec2D_ymax
+ write(15) 5,NSPEC2D_BOTTOM
+ write(15) 6,NSPEC2D_TOP
do i=1,nspec2D_xmin
- write(15,*) ibelm_xmin(i),ibool(1,1,1,ibelm_xmin(i)),ibool(1,NGLLY,1,ibelm_xmin(i)),&
+ write(15) ibelm_xmin(i),ibool(1,1,1,ibelm_xmin(i)),ibool(1,NGLLY,1,ibelm_xmin(i)),&
ibool(1,1,NGLLZ,ibelm_xmin(i)),ibool(1,NGLLY,NGLLZ,ibelm_xmin(i))
end do
do i=1,nspec2D_xmax
- write(15,*) ibelm_xmax(i),ibool(NGLLX,1,1,ibelm_xmax(i)),ibool(NGLLX,NGLLY,1,ibelm_xmax(i)), &
+ write(15) ibelm_xmax(i),ibool(NGLLX,1,1,ibelm_xmax(i)),ibool(NGLLX,NGLLY,1,ibelm_xmax(i)), &
ibool(NGLLX,1,NGLLZ,ibelm_xmax(i)),ibool(NGLLX,NGLLY,NGLLZ,ibelm_xmax(i))
end do
do i=1,nspec2D_ymin
- write(15,*) ibelm_ymin(i),ibool(1,1,1,ibelm_ymin(i)),ibool(NGLLX,1,1,ibelm_ymin(i)),&
+ write(15) ibelm_ymin(i),ibool(1,1,1,ibelm_ymin(i)),ibool(NGLLX,1,1,ibelm_ymin(i)),&
ibool(1,1,NGLLZ,ibelm_ymin(i)),ibool(NGLLX,1,NGLLZ,ibelm_ymin(i))
end do
do i=1,nspec2D_ymax
- write(15,*) ibelm_ymax(i),ibool(NGLLX,NGLLY,1,ibelm_ymax(i)),ibool(1,NGLLY,1,ibelm_ymax(i)), &
+ write(15) ibelm_ymax(i),ibool(NGLLX,NGLLY,1,ibelm_ymax(i)),ibool(1,NGLLY,1,ibelm_ymax(i)), &
ibool(NGLLX,NGLLY,NGLLZ,ibelm_ymax(i)),ibool(1,NGLLY,NGLLZ,ibelm_ymax(i))
end do
do i=1,NSPEC2D_BOTTOM
- write(15,*) ibelm_bottom(i),ibool(1,1,1,ibelm_bottom(i)),ibool(NGLLX,1,1,ibelm_bottom(i)), &
+ write(15) ibelm_bottom(i),ibool(1,1,1,ibelm_bottom(i)),ibool(NGLLX,1,1,ibelm_bottom(i)), &
ibool(NGLLX,NGLLY,1,ibelm_bottom(i)),ibool(1,NGLLY,1,ibelm_bottom(i))
end do
do i=1,NSPEC2D_TOP
- write(15,*) ibelm_top(i),ibool(1,1,NGLLZ,ibelm_top(i)),ibool(NGLLX,1,NGLLZ,ibelm_top(i)), &
+ write(15) ibelm_top(i),ibool(1,1,NGLLZ,ibelm_top(i)),ibool(NGLLX,1,NGLLZ,ibelm_top(i)), &
ibool(NGLLX,NGLLY,NGLLZ,ibelm_top(i)),ibool(1,NGLLY,NGLLZ,ibelm_top(i))
end do
@@ -194,79 +200,79 @@
nspec_interfaces_max = maxval(nspec_interface)
- write(15,*) nb_interfaces,nspec_interfaces_max
+ write(15) nb_interfaces,nspec_interfaces_max
if(interfaces(W)) then
- write(15,*) addressing(iproc_xi-1,iproc_eta),nspec_interface(W)
+ write(15) addressing(iproc_xi-1,iproc_eta),nspec_interface(W)
do ispec = 1,nspec
- if(iMPIcut_xi(1,ispec)) write(15,*) ispec,4,ibool(1,1,1,ispec),ibool(1,2,1,ispec), &
+ if(iMPIcut_xi(1,ispec)) write(15) ispec,4,ibool(1,1,1,ispec),ibool(1,2,1,ispec), &
ibool(1,1,2,ispec),ibool(1,2,2,ispec)
end do
end if
if(interfaces(E)) then
- write(15,*) addressing(iproc_xi+1,iproc_eta),nspec_interface(E)
+ write(15) addressing(iproc_xi+1,iproc_eta),nspec_interface(E)
do ispec = 1,nspec
- if(iMPIcut_xi(2,ispec)) write(15,*) ispec,4,ibool(2,1,1,ispec),ibool(2,2,1,ispec), &
+ if(iMPIcut_xi(2,ispec)) write(15) ispec,4,ibool(2,1,1,ispec),ibool(2,2,1,ispec), &
ibool(2,1,2,ispec),ibool(2,2,2,ispec)
end do
end if
if(interfaces(S)) then
- write(15,*) addressing(iproc_xi,iproc_eta-1),nspec_interface(S)
+ write(15) addressing(iproc_xi,iproc_eta-1),nspec_interface(S)
do ispec = 1,nspec
- if(iMPIcut_eta(1,ispec)) write(15,*) ispec,4,ibool(1,1,1,ispec),ibool(2,1,1,ispec), &
+ if(iMPIcut_eta(1,ispec)) write(15) ispec,4,ibool(1,1,1,ispec),ibool(2,1,1,ispec), &
ibool(1,1,2,ispec),ibool(2,1,2,ispec)
end do
end if
if(interfaces(N)) then
- write(15,*) addressing(iproc_xi,iproc_eta+1),nspec_interface(N)
+ write(15) addressing(iproc_xi,iproc_eta+1),nspec_interface(N)
do ispec = 1,nspec
- if(iMPIcut_eta(2,ispec)) write(15,*) ispec,4,ibool(2,2,1,ispec),ibool(1,2,1,ispec), &
+ if(iMPIcut_eta(2,ispec)) write(15) ispec,4,ibool(2,2,1,ispec),ibool(1,2,1,ispec), &
ibool(2,2,2,ispec),ibool(1,2,2,ispec)
end do
end if
if(interfaces(NW)) then
- write(15,*) addressing(iproc_xi-1,iproc_eta+1),nspec_interface(NW)
+ write(15) addressing(iproc_xi-1,iproc_eta+1),nspec_interface(NW)
do ispec = 1,nspec
if((iMPIcut_xi(1,ispec) .eqv. .true.) .and. (iMPIcut_eta(2,ispec) .eqv. .true.)) then
- write(15,*) ispec,2,ibool(1,2,1,ispec),ibool(1,2,2,ispec),-1,-1
+ write(15) ispec,2,ibool(1,2,1,ispec),ibool(1,2,2,ispec),-1,-1
end if
end do
end if
if(interfaces(NE)) then
- write(15,*) addressing(iproc_xi+1,iproc_eta+1),nspec_interface(NE)
+ write(15) addressing(iproc_xi+1,iproc_eta+1),nspec_interface(NE)
do ispec = 1,nspec
if((iMPIcut_xi(2,ispec) .eqv. .true.) .and. (iMPIcut_eta(2,ispec) .eqv. .true.)) then
- write(15,*) ispec,2,ibool(2,2,1,ispec),ibool(2,2,2,ispec),-1,-1
+ write(15) ispec,2,ibool(2,2,1,ispec),ibool(2,2,2,ispec),-1,-1
end if
end do
end if
if(interfaces(SE)) then
- write(15,*) addressing(iproc_xi+1,iproc_eta-1),nspec_interface(SE)
+ write(15) addressing(iproc_xi+1,iproc_eta-1),nspec_interface(SE)
do ispec = 1,nspec
if((iMPIcut_xi(2,ispec) .eqv. .true.) .and. (iMPIcut_eta(1,ispec) .eqv. .true.)) then
- write(15,*) ispec,2,ibool(2,1,1,ispec),ibool(2,1,2,ispec),-1,-1
+ write(15) ispec,2,ibool(2,1,1,ispec),ibool(2,1,2,ispec),-1,-1
end if
end do
end if
if(interfaces(SW)) then
- write(15,*) addressing(iproc_xi-1,iproc_eta-1),nspec_interface(SW)
+ write(15) addressing(iproc_xi-1,iproc_eta-1),nspec_interface(SW)
do ispec = 1,nspec
if((iMPIcut_xi(1,ispec) .eqv. .true.) .and. (iMPIcut_eta(1,ispec) .eqv. .true.)) then
- write(15,*) ispec,2,ibool(1,1,1,ispec),ibool(1,1,2,ispec),-1,-1
+ write(15) ispec,2,ibool(1,1,1,ispec),ibool(1,1,2,ispec),-1,-1
end if
end do
end if
else
- write(15,*) 0,0
+ write(15) 0,0
end if
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in 2011-11-03 03:11:45 UTC (rev 19145)
@@ -256,7 +256,6 @@
! add mesh coloring for the GPU + MPI implementation
logical, parameter :: USE_MESH_COLORING_GPU = .false.
integer, parameter :: MAX_NUMBER_OF_COLORS = 10000
- integer, parameter :: NGNOD_HEXAHEDRA = 8
!------------------------------------------------------
!----------- do not modify anything below -------------
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/write_VTK_data.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/write_VTK_data.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/write_VTK_data.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -38,17 +38,20 @@
integer :: nspec,nglob
-! global coordinates
+ ! global coordinates
integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
-! element flag array
+ ! element flag array
integer, dimension(nspec) :: elem_flag
+
+ ! file name
+ character(len=256) :: prname_file
+ !character(len=2), optional, intent(in) :: str_id
+
+ ! local parameters
integer :: ispec,i
-! file name
- character(len=256) prname_file
-
! write source and receiver VTK files for Paraview
!debug
!write(IMAIN,*) ' vtk file: '
@@ -79,6 +82,11 @@
write(IOVTK,*) ""
write(IOVTK,'(a,i12)') "CELL_DATA ",nspec
+ !if( present( str_id ) ) then
+ ! write(IOVTK,'(a)') "SCALARS elem_flag_"//str_id//" integer"
+ !else
+ ! write(IOVTK,'(a)') "SCALARS elem_flag integer"
+ !endif
write(IOVTK,'(a)') "SCALARS elem_flag integer"
write(IOVTK,'(a)') "LOOKUP_TABLE default"
do ispec = 1,nspec
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -65,7 +65,7 @@
logical:: phase_is_inner
-! enforces free surface (zeroes potentials at free surface)
+ ! enforces free surface (zeroes potentials at free surface)
if(.NOT. GPU_MODE) then
! on CPU
call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB, &
@@ -110,17 +110,17 @@
potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
endif
-! distinguishes two runs: for points on MPI interfaces, and points within the partitions
+ ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions
do iphase=1,2
- !first for points on MPI interfaces
+ !first for points on MPI interfaces, thus outer elements
if( iphase == 1 ) then
phase_is_inner = .false.
else
phase_is_inner = .true.
endif
-! acoustic pressure term
+ ! acoustic pressure term
if(.NOT. GPU_MODE) then
! on CPU
call compute_forces_acoustic_pot( iphase, NSPEC_AB,NGLOB_AB, &
@@ -184,7 +184,7 @@
endif ! PML
-! absorbing boundaries
+ ! absorbing boundaries
if(ABSORBING_CONDITIONS) then
if( PML .and. PML_USE_SOMMERFELD ) then
! adds a Sommerfeld condition on the domain's absorbing boundaries
@@ -215,7 +215,7 @@
endif
endif
-! elastic coupling
+ ! elastic coupling
if(ELASTIC_SIMULATION ) then
if( .NOT. GPU_MODE ) then
call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
@@ -243,13 +243,13 @@
endif
endif
-! poroelastic coupling
-! not implemented yet
+ ! poroelastic coupling
+ ! not implemented yet
if(POROELASTIC_SIMULATION ) &
! call compute_coupling_acoustic_poro()
call exit_MPI(myrank,'poroelastic coupling with acoustic domain not implemented yet!')
-! sources
+ ! sources
call compute_add_sources_acoustic(NSPEC_AB,NGLOB_AB,potential_dot_dot_acoustic, &
ibool,ispec_is_inner,phase_is_inner, &
NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,&
@@ -262,7 +262,7 @@
NTSTEP_BETWEEN_READ_ADJSRC, &
GPU_MODE, Mesh_pointer)
-! assemble all the contributions between slices using MPI
+ ! assemble all the contributions between slices using MPI
if( phase_is_inner .eqv. .false. ) then
! sends potential_dot_dot_acoustic values to corresponding MPI interface neighbors (non-blocking)
if(.NOT. GPU_MODE) then
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -37,7 +37,7 @@
! type = 2 : velocity V_y component
! type = 3 : velocity V_z component
! type = 4 : velocity V norm
- integer,parameter:: IMAGE_TYPE = 2
+ integer,parameter:: IMAGE_TYPE = 4
! cross-section surface
! cross-section origin point
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -778,7 +778,9 @@
sourcearrays, islice_selected_source, ispec_selected_source, &
number_receiver_global, ispec_selected_rec, &
nrec, nrec_local, &
- SIMULATION_TYPE,ncuda_devices)
+ SIMULATION_TYPE, &
+ USE_MESH_COLORING_GPU,nspec_acoustic,nspec_elastic, &
+ ncuda_devices)
call min_all_i(ncuda_devices,ncuda_devices_min)
call max_all_i(ncuda_devices,ncuda_devices_max)
@@ -793,7 +795,9 @@
ABSORBING_CONDITIONS,b_reclen_potential,b_absorb_potential, &
ELASTIC_SIMULATION, num_coupling_ac_el_faces, &
coupling_ac_el_ispec,coupling_ac_el_ijk, &
- coupling_ac_el_normal,coupling_ac_el_jacobian2Dw)
+ coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
+ num_colors_outer_acoustic,num_colors_inner_acoustic, &
+ num_elem_colors_acoustic)
if( SIMULATION_TYPE == 3 ) &
call prepare_fields_acoustic_adj_dev(Mesh_pointer, &
@@ -822,7 +826,9 @@
NOISE_TOMOGRAPHY, &
free_surface_normal,free_surface_ispec,free_surface_ijk, &
num_free_surface_faces, &
- ACOUSTIC_SIMULATION)
+ ACOUSTIC_SIMULATION, &
+ num_colors_outer_elastic,num_colors_inner_elastic, &
+ num_elem_colors_elastic)
if( SIMULATION_TYPE == 3 ) &
call prepare_fields_elastic_adj_dev(Mesh_pointer, NDIM*NGLOB_AB, &
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -76,6 +76,8 @@
read(27) ispec_is_poroelastic
! acoustic
+ ! number of acoustic elements in this partition
+ nspec_acoustic = count(ispec_is_acoustic(:))
! all processes will have acoustic_simulation set if any flag is .true.
call any_all_l( ANY(ispec_is_acoustic), ACOUSTIC_SIMULATION )
if( ACOUSTIC_SIMULATION ) then
@@ -98,6 +100,9 @@
endif
! elastic
+ ! number of elastic elements in this partition
+ nspec_elastic = count(ispec_is_elastic(:))
+ ! elastic simulation
call any_all_l( ANY(ispec_is_elastic), ELASTIC_SIMULATION )
if( ELASTIC_SIMULATION ) then
! displacement,velocity,acceleration
@@ -292,8 +297,68 @@
read(27) c66store
endif
+! inner / outer elements
+ allocate(ispec_is_inner(NSPEC_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ispec_is_inner'
+ read(27) ispec_is_inner
+
+ if( ACOUSTIC_SIMULATION ) then
+ read(27) nspec_inner_acoustic,nspec_outer_acoustic
+ read(27) num_phase_ispec_acoustic
+ if( num_phase_ispec_acoustic < 0 ) stop 'error acoustic simulation: num_phase_ispec_acoustic is < zero'
+ allocate( phase_ispec_inner_acoustic(num_phase_ispec_acoustic,2),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array phase_ispec_inner_acoustic'
+ if(num_phase_ispec_acoustic > 0 ) read(27) phase_ispec_inner_acoustic
+ endif
+
+ if( ELASTIC_SIMULATION ) then
+ read(27) nspec_inner_elastic,nspec_outer_elastic
+ read(27) num_phase_ispec_elastic
+ if( num_phase_ispec_elastic < 0 ) stop 'error elastic simulation: num_phase_ispec_elastic is < zero'
+ allocate( phase_ispec_inner_elastic(num_phase_ispec_elastic,2),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array phase_ispec_inner_elastic'
+ if(num_phase_ispec_elastic > 0 ) read(27) phase_ispec_inner_elastic
+ endif
+
+! mesh coloring for GPUs
+ if( USE_MESH_COLORING_GPU ) then
+ ! acoustic domain colors
+ if( ACOUSTIC_SIMULATION ) then
+ read(27) num_colors_outer_acoustic,num_colors_inner_acoustic
+
+ allocate(num_elem_colors_acoustic(num_colors_outer_acoustic + num_colors_inner_acoustic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_acoustic array'
+
+ read(27) num_elem_colors_acoustic
+ endif
+ ! elastic domain colors
+ if( ELASTIC_SIMULATION ) then
+ read(27) num_colors_outer_elastic,num_colors_inner_elastic
+
+ allocate(num_elem_colors_elastic(num_colors_outer_elastic + num_colors_inner_elastic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_elastic array'
+
+ read(27) num_elem_colors_elastic
+ endif
+ else
+ ! allocates dummy arrays
+ if( ACOUSTIC_SIMULATION ) then
+ num_colors_outer_acoustic = 0
+ num_colors_inner_acoustic = 0
+ allocate(num_elem_colors_acoustic(num_colors_outer_acoustic + num_colors_inner_acoustic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_acoustic array'
+ endif
+ if( ELASTIC_SIMULATION ) then
+ num_colors_outer_elastic = 0
+ num_colors_inner_elastic = 0
+ allocate(num_elem_colors_elastic(num_colors_outer_elastic + num_colors_inner_elastic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_elastic array'
+ endif
+ endif
+
close(27)
+ ! outputs total element numbers
call sum_all_i(count(ispec_is_acoustic(:)),inum)
if( myrank == 0 ) then
write(IMAIN,*) 'total acoustic elements:',inum
@@ -320,13 +385,7 @@
request_recv_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
if( ier /= 0 ) stop 'error allocating array buffer_send_vector_ext_mesh etc.'
-! locate inner and outer elements
- call rmd_setup_inner_outer_elemnts()
-
-!daniel: TODO -- mesh coloring
-! call rmd_setup_color_perm()
-
-! gets model dimensions
+ ! gets model dimensions
minl = minval( xstore )
maxl = maxval( xstore )
call min_all_all_cr(minl,min_all)
@@ -341,7 +400,7 @@
LATITUDE_MIN = min_all
LATITUDE_MAX = max_all
-! check courant criteria on mesh
+ ! checks courant criteria on mesh
if( ELASTIC_SIMULATION ) then
call check_mesh_resolution(myrank,NSPEC_AB,NGLOB_AB, &
ibool,xstore,ystore,zstore, &
@@ -366,437 +425,11 @@
end subroutine read_mesh_databases
-!
-!-------------------------------------------------------------------------------------------------
-!
- subroutine rmd_setup_inner_outer_elemnts()
- use specfem_par
- use specfem_par_elastic
- use specfem_par_acoustic
- implicit none
- ! local parameters
- integer :: i,j,k,ispec,iglob
- integer :: iinterface,ier
- character(len=256) :: filename
- logical,dimension(:),allocatable :: iglob_is_inner
- real :: percentage_edge
-
- ! allocates arrays
- allocate(ispec_is_inner(NSPEC_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ispec_is_inner'
- allocate(iglob_is_inner(NGLOB_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating temporary array iglob_is_inner'
-
- ! initialize flags
- ispec_is_inner(:) = .true.
- iglob_is_inner(:) = .true.
- do iinterface = 1, num_interfaces_ext_mesh
- do i = 1, nibool_interfaces_ext_mesh(iinterface)
- iglob = ibool_interfaces_ext_mesh(i,iinterface)
- iglob_is_inner(iglob) = .false.
- enddo
- enddo
-
- ! determines flags for inner elements (purely inside the partition)
- do ispec = 1, NSPEC_AB
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool(i,j,k,ispec)
- ispec_is_inner(ispec) = ( iglob_is_inner(iglob) .and. ispec_is_inner(ispec) )
- enddo
- enddo
- enddo
- enddo
-
- ! frees temporary array
- deallocate( iglob_is_inner )
-
- if( SAVE_MESH_FILES ) then
- filename = prname(1:len_trim(prname))//'ispec_is_inner'
- call write_VTK_data_elem_l(NSPEC_AB,NGLOB_AB, &
- xstore,ystore,zstore,ibool, &
- ispec_is_inner,filename)
- endif
-
- ! sets up elements for loops in acoustic simulations
- if( ACOUSTIC_SIMULATION ) then
- ! counts inner and outer elements
- nspec_inner_acoustic = 0
- nspec_outer_acoustic = 0
- do ispec = 1, NSPEC_AB
- if( ispec_is_acoustic(ispec) ) then
- if( ispec_is_inner(ispec) .eqv. .true. ) then
- nspec_inner_acoustic = nspec_inner_acoustic + 1
- else
- nspec_outer_acoustic = nspec_outer_acoustic + 1
- endif
- endif
- enddo
-
- ! stores indices of inner and outer elements for faster(?) computation
- num_phase_ispec_acoustic = max(nspec_inner_acoustic,nspec_outer_acoustic)
- allocate( phase_ispec_inner_acoustic(num_phase_ispec_acoustic,2),stat=ier)
- phase_ispec_inner_acoustic(:,:) = 0
-
- if( ier /= 0 ) stop 'error allocating array phase_ispec_inner_acoustic'
- nspec_inner_acoustic = 0
- nspec_outer_acoustic = 0
- do ispec = 1, NSPEC_AB
- if( ispec_is_acoustic(ispec) ) then
- if( ispec_is_inner(ispec) .eqv. .true. ) then
- nspec_inner_acoustic = nspec_inner_acoustic + 1
- phase_ispec_inner_acoustic(nspec_inner_acoustic,2) = ispec
- else
- nspec_outer_acoustic = nspec_outer_acoustic + 1
- phase_ispec_inner_acoustic(nspec_outer_acoustic,1) = ispec
- endif
- endif
- enddo
- !print *,'rank ',myrank,' acoustic inner spec: ',nspec_inner_acoustic
- !print *,'rank ',myrank,' acoustic outer spec: ',nspec_outer_acoustic
- endif
-
- ! sets up elements for loops in acoustic simulations
- if( ELASTIC_SIMULATION ) then
- ! counts inner and outer elements
- nspec_inner_elastic = 0
- nspec_outer_elastic = 0
- do ispec = 1, NSPEC_AB
- if( ispec_is_elastic(ispec) ) then
- if( ispec_is_inner(ispec) .eqv. .true. ) then
- nspec_inner_elastic = nspec_inner_elastic + 1
- else
- nspec_outer_elastic = nspec_outer_elastic + 1
- endif
- endif
- enddo
-
- ! stores indices of inner and outer elements for faster(?) computation
- num_phase_ispec_elastic = max(nspec_inner_elastic,nspec_outer_elastic)
- if( num_phase_ispec_elastic <= 0 ) stop 'error elastic simulation: num_phase_ispec_elastic is zero'
-
- allocate( phase_ispec_inner_elastic(num_phase_ispec_elastic,2),stat=ier)
- if( ier /= 0 ) stop 'error allocating array phase_ispec_inner_elastic'
- phase_ispec_inner_elastic(:,:) = 0
-
- nspec_inner_elastic = 0
- nspec_outer_elastic = 0
- do ispec = 1, NSPEC_AB
- if( ispec_is_elastic(ispec) ) then
- if( ispec_is_inner(ispec) .eqv. .true. ) then
- nspec_inner_elastic = nspec_inner_elastic + 1
- phase_ispec_inner_elastic(nspec_inner_elastic,2) = ispec
- else
- nspec_outer_elastic = nspec_outer_elastic + 1
- phase_ispec_inner_elastic(nspec_outer_elastic,1) = ispec
- endif
- endif
- enddo
- !print *,'rank ',myrank,' elastic inner spec: ',nspec_inner_elastic
- !print *,'rank ',myrank,' elastic outer spec: ',nspec_outer_elastic
- endif
-
- if(myrank == 0) then
- percentage_edge = 100.*count(ispec_is_inner(:))/real(NSPEC_AB)
- write(IMAIN,*) 'for overlapping of communications with calculations:'
- write(IMAIN,*) ' percentage of edge elements ',100. -percentage_edge,'%'
- write(IMAIN,*) ' percentage of volume elements ',percentage_edge,'%'
- write(IMAIN,*)
- endif
-
-
- end subroutine rmd_setup_inner_outer_elemnts
-
!
!-------------------------------------------------------------------------------------------------
!
-!daniel: TODO -- mesh coloring
- subroutine rmd_setup_color_perm()
-
- use specfem_par
- use specfem_par_elastic
- use specfem_par_acoustic
- implicit none
- ! local parameters
- ! added for color permutation
- integer :: nb_colors_outer_elements,nb_colors_inner_elements
- integer, dimension(:), allocatable :: perm
- integer, dimension(:), allocatable :: first_elem_number_in_this_color
- integer, dimension(:), allocatable :: num_of_elems_in_this_color
-
- integer :: icolor,ispec,ispec_counter
- integer :: nspec_outer,nspec_outer_min_global,nspec_outer_max_global
-
- integer :: ispec_inner_acoustic,ispec_outer_acoustic
- integer :: ispec_inner_elastic,ispec_outer_elastic
- integer :: nspec,nglob
-
- ! added for sorting
- integer, dimension(:,:,:,:), allocatable :: temp_array_int
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: temp_array_real
- logical, dimension(:), allocatable :: temp_array_logical_1D
- !integer, dimension(:), allocatable :: temp_array_int_1D
-
- nspec = NSPEC_AB
- nglob = NGLOB_AB
-
- !!!! David Michea: detection of the edges, coloring and permutation separately
- allocate(perm(nspec))
-
- ! implement mesh coloring for GPUs if needed, to create subsets of disconnected elements
- ! to remove dependencies and the need for atomic operations in the sum of
- ! elemental contributions in the solver
- if(USE_MESH_COLORING_GPU) then
-
- allocate(first_elem_number_in_this_color(MAX_NUMBER_OF_COLORS + 1))
-
- call get_perm_color_faster(ispec_is_inner,ibool,perm,nspec,nglob, &
- nb_colors_outer_elements,nb_colors_inner_elements, &
- nspec_outer,first_elem_number_in_this_color,myrank)
-
- ! for the last color, the next color is fictitious and its first (fictitious) element number is nspec + 1
- first_elem_number_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements + 1) = nspec + 1
-
- allocate(num_of_elems_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements))
-
- ! save mesh coloring
- open(unit=99,file=prname(1:len_trim(prname))//'num_of_elems_in_this_color.dat',status='unknown')
-
- ! number of colors for outer elements
- write(99,*) nb_colors_outer_elements
-
- ! number of colors for inner elements
- write(99,*) nb_colors_inner_elements
-
- ! number of elements in each color
- do icolor = 1, nb_colors_outer_elements + nb_colors_inner_elements
- num_of_elems_in_this_color(icolor) = first_elem_number_in_this_color(icolor+1) - first_elem_number_in_this_color(icolor)
- write(99,*) num_of_elems_in_this_color(icolor)
- enddo
- close(99)
-
- ! check that the sum of all the numbers of elements found in each color is equal
- ! to the total number of elements in the mesh
- if(sum(num_of_elems_in_this_color) /= nspec) then
- print *,'nspec = ',nspec
- print *,'total number of elements in all the colors of the mesh = ',sum(num_of_elems_in_this_color)
- call exit_MPI(myrank, 'incorrect total number of elements in all the colors of the mesh')
- endif
-
- ! check that the sum of all the numbers of elements found in each color for the outer elements is equal
- ! to the total number of outer elements found in the mesh
- if(sum(num_of_elems_in_this_color(1:nb_colors_outer_elements)) /= nspec_outer) then
- print *,'nspec_outer = ',nspec_outer
- print *,'total number of elements in all the colors of the mesh for outer elements = ',sum(num_of_elems_in_this_color)
- call exit_MPI(myrank, 'incorrect total number of elements in all the colors of the mesh for outer elements')
- endif
-
- deallocate(first_elem_number_in_this_color)
- deallocate(num_of_elems_in_this_color)
-
- else
-
-!! DK DK for regular C + MPI version for CPUs: do not use colors but nonetheless put all the outer elements
-!! DK DK first in order to be able to overlap non-blocking MPI communications with calculations
-
-!! DK DK nov 2010, for Rosa Badia / StarSs:
-!! no need for mesh coloring, but need to implement inner/outer subsets for non blocking MPI for StarSs
- ispec_counter = 0
- perm(:) = 0
-
- ! first generate all the outer elements
- do ispec = 1,nspec
- if( ispec_is_inner(ispec) .eqv. .false.) then
- ispec_counter = ispec_counter + 1
- perm(ispec) = ispec_counter
- endif
- enddo
-
- ! make sure we have detected some outer elements
- !if(ispec_counter <= 0) call exit_MPI(myrank, 'fatal error: no outer elements detected!')
-
- ! store total number of outer elements
- nspec_outer = ispec_counter
-
- ! then generate all the inner elements
- do ispec = 1,nspec
- if( ispec_is_inner(ispec) .eqv. .true.) then
- ispec_counter = ispec_counter + 1
- perm(ispec) = ispec_counter - nspec_outer ! starts again at 1
- endif
- enddo
-
- ! test that all the elements have been used once and only once
- if(ispec_counter /= nspec) call exit_MPI(myrank, 'fatal error: ispec_counter not equal to nspec')
-
- ! do basic checks
- if(minval(perm) /= 1) call exit_MPI(myrank, 'minval(perm) should be 1')
- !if(maxval(perm) /= nspec) call exit_MPI(myrank, 'maxval(perm) should be nspec')
-
-
- endif
-
- ! sets up elements for loops in acoustic simulations
- if( ACOUSTIC_SIMULATION ) then
- ispec_inner_acoustic = 0
- ispec_outer_acoustic = 0
- do ispec = 1, NSPEC_AB
- if( ispec_is_acoustic(ispec) ) then
- if( ispec_is_inner(ispec) .eqv. .true. ) then
- !ispec_inner_acoustic = ispec_inner_acoustic + 1
- ispec_inner_acoustic = perm(ispec)
- if( ispec_inner_acoustic < 1 .or. ispec_inner_acoustic > num_phase_ispec_acoustic ) &
- call exit_MPI(myrank,'error inner acoustic permutation')
-
- phase_ispec_inner_acoustic(ispec_inner_acoustic,2) = ispec
-
- else
- !ispec_outer_acoustic = ispec_outer_acoustic + 1
- ispec_outer_acoustic = perm(ispec)
- if( ispec_outer_acoustic < 1 .or. ispec_outer_acoustic > num_phase_ispec_acoustic ) &
- call exit_MPI(myrank,'error outer acoustic permutation')
-
- phase_ispec_inner_acoustic(ispec_outer_acoustic,1) = ispec
-
- endif
- endif
- enddo
- endif
-
- ! sets up elements for loops in elastic simulations
- if( ELASTIC_SIMULATION ) then
- ispec_inner_elastic = 0
- ispec_outer_elastic = 0
- do ispec = 1, NSPEC_AB
- if( ispec_is_elastic(ispec) ) then
- if( ispec_is_inner(ispec) .eqv. .true. ) then
- !ispec_inner_elastic = ispec_inner_elastic + 1
- ispec_inner_elastic = perm(ispec)
- if( ispec_inner_elastic < 1 .or. ispec_inner_elastic > num_phase_ispec_elastic ) then
- print*,'error: inner elastic permutation',ispec_inner_elastic,num_phase_ispec_elastic,nspec_outer
- call exit_MPI(myrank,'error inner elastic permutation')
- endif
- phase_ispec_inner_elastic(ispec_inner_elastic,2) = ispec
-
- else
- !ispec_outer_elastic = ispec_outer_elastic + 1
- ispec_outer_elastic = perm(ispec)
- if( ispec_outer_elastic < 1 .or. ispec_outer_elastic > num_phase_ispec_elastic ) then
- print*,'error: outer elastic permutation',ispec_outer_elastic,num_phase_ispec_elastic,nspec_outer
- call exit_MPI(myrank,'error outer elastic permutation')
- endif
- phase_ispec_inner_elastic(ispec_outer_elastic,1) = ispec
-
- endif
- endif
- enddo
- endif
-
- ! sorts array according to permutation
- ! SORT_MESH_INNER_OUTER
-!daniel: TODO -- mesh permutation
- if( .false. ) then
-
- ! permutation of ibool
- allocate(temp_array_int(NGLLX,NGLLY,NGLLZ,nspec))
- call permute_elements_integer(ibool,temp_array_int,perm,nspec)
- deallocate(temp_array_int)
-
- ! element domain flags
- allocate(temp_array_logical_1D(nglob))
- temp_array_logical_1D(:) = ispec_is_acoustic
- do ispec = 1, nspec
- ispec_is_acoustic(perm(ispec)) = temp_array_logical_1D(ispec)
- enddo
- temp_array_logical_1D(:) = ispec_is_elastic
- do ispec = 1, nspec
- ispec_is_elastic(perm(ispec)) = temp_array_logical_1D(ispec)
- enddo
- !temp_array_logical_1D(:) = ispec_is_poroelastic
- !do ispec = 1, nspec
- ! ispec_is_poroelastic(perm(ispec)) = temp_array_logical_1D(ispec)
- !enddo
- deallocate(temp_array_logical_1D)
-
- ! mesh arrays
- allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec))
- call permute_elements_real(xix,temp_array_real,perm,nspec)
- call permute_elements_real(xiy,temp_array_real,perm,nspec)
- call permute_elements_real(xiz,temp_array_real,perm,nspec)
- call permute_elements_real(etax,temp_array_real,perm,nspec)
- call permute_elements_real(etay,temp_array_real,perm,nspec)
- call permute_elements_real(etaz,temp_array_real,perm,nspec)
- call permute_elements_real(gammax,temp_array_real,perm,nspec)
- call permute_elements_real(gammay,temp_array_real,perm,nspec)
- call permute_elements_real(gammaz,temp_array_real,perm,nspec)
- call permute_elements_real(jacobian,temp_array_real,perm,nspec)
-
- call permute_elements_real(kappastore,temp_array_real,perm,nspec)
- call permute_elements_real(mustore,temp_array_real,perm,nspec)
-
- ! acoustic arrays
- if( ACOUSTIC_SIMULATION ) then
- call permute_elements_real(rhostore,temp_array_real,perm,nspec)
- endif
-
- ! elastic arrays
- if( ELASTIC_SIMULATION ) then
- call permute_elements_real(rho_vp,temp_array_real,perm,nspec)
- call permute_elements_real(rho_vs,temp_array_real,perm,nspec)
- if( ANISOTROPY ) then
- call permute_elements_real(c11store,temp_array_real,perm,nspec)
- call permute_elements_real(c12store,temp_array_real,perm,nspec)
- call permute_elements_real(c13store,temp_array_real,perm,nspec)
- call permute_elements_real(c14store,temp_array_real,perm,nspec)
- call permute_elements_real(c15store,temp_array_real,perm,nspec)
- call permute_elements_real(c16store,temp_array_real,perm,nspec)
- call permute_elements_real(c22store,temp_array_real,perm,nspec)
- call permute_elements_real(c23store,temp_array_real,perm,nspec)
- call permute_elements_real(c24store,temp_array_real,perm,nspec)
- call permute_elements_real(c25store,temp_array_real,perm,nspec)
- call permute_elements_real(c33store,temp_array_real,perm,nspec)
- call permute_elements_real(c34store,temp_array_real,perm,nspec)
- call permute_elements_real(c35store,temp_array_real,perm,nspec)
- call permute_elements_real(c36store,temp_array_real,perm,nspec)
- call permute_elements_real(c44store,temp_array_real,perm,nspec)
- call permute_elements_real(c45store,temp_array_real,perm,nspec)
- call permute_elements_real(c46store,temp_array_real,perm,nspec)
- call permute_elements_real(c55store,temp_array_real,perm,nspec)
- call permute_elements_real(c56store,temp_array_real,perm,nspec)
- call permute_elements_real(c66store,temp_array_real,perm,nspec)
- endif
- endif
-
- deallocate(temp_array_real)
- endif
-
- ! user output
- !call MPI_ALLREDUCE(nspec_outer,nspec_outer_min_global,1,MPI_INTEGER,MPI_MIN,MPI_COMM_WORLD,ier)
- !call MPI_ALLREDUCE(nspec_outer,nspec_outer_max_global,1,MPI_INTEGER,MPI_MAX,MPI_COMM_WORLD,ier)
- call min_all_i(nspec_outer,nspec_outer_min_global)
- call max_all_i(nspec_outer,nspec_outer_max_global)
- call min_all_i(nspec_outer,nspec_outer_min_global)
- call max_all_i(nspec_outer,nspec_outer_max_global)
-
- if(myrank == 0) then
- write(IMAIN,*) 'mesh coloring:'
- write(IMAIN,*) ' permutation : use coloring = ',USE_MESH_COLORING_GPU
- write(IMAIN,*) ' outer elements: min = ',nspec_outer_min_global
- write(IMAIN,*) ' max = ',nspec_outer_max_global
- write(IMAIN,*)
- endif
-
- deallocate(perm)
-
- end subroutine rmd_setup_color_perm
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
subroutine read_mesh_databases_adjoint()
! reads in moho meshes
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90 2011-11-03 01:09:17 UTC (rev 19144)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90 2011-11-03 03:11:45 UTC (rev 19145)
@@ -301,9 +301,13 @@
integer, dimension(:,:), allocatable :: phase_ispec_inner_elastic
integer :: num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic
+! mesh coloring
+ integer :: num_colors_outer_elastic,num_colors_inner_elastic
+ integer, dimension(:), allocatable :: num_elem_colors_elastic
+ integer :: nspec_elastic
+
logical :: ELASTIC_SIMULATION
-
! ADJOINT elastic
! (backward/reconstructed) wavefields
@@ -374,6 +378,11 @@
integer, dimension(:,:), allocatable :: phase_ispec_inner_acoustic
integer :: num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic
+! mesh coloring
+ integer :: num_colors_outer_acoustic,num_colors_inner_acoustic
+ integer, dimension(:), allocatable :: num_elem_colors_acoustic
+ integer :: nspec_acoustic
+
logical :: ACOUSTIC_SIMULATION
! ADJOINT acoustic
More information about the CIG-COMMITS
mailing list