[cig-commits] r19246 - in seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER: src/cuda src/generate_databases src/shared src/specfem3D utils utils/Cluster/pbs
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Mon Nov 28 20:28:43 PST 2011
Author: danielpeter
Date: 2011-11-28 20:28:42 -0800 (Mon, 28 Nov 2011)
New Revision: 19246
Added:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/Cluster/pbs/valgrind_go_solver_pbs.bash
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/locate_partition.f90
Modified:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
Log:
updates device initialization; updates mesh coloring routine w/ simple color balancing
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2011-11-29 04:28:42 UTC (rev 19246)
@@ -92,7 +92,7 @@
{
fprintf(stderr,"Error after %s: %s\n", kernel_name, cudaGetErrorString(err));
pause_for_debugger(0);
- free(kernel_name);
+ //free(kernel_name);
exit(EXIT_FAILURE);
}
}
@@ -106,7 +106,7 @@
#ifdef USE_MPI
MPI_Abort(MPI_COMM_WORLD,1);
#endif
- free(info);
+ //free(info);
exit(EXIT_FAILURE);
return;
}
@@ -467,9 +467,19 @@
if (device_count == 0) exit_on_error("CUDA runtime error: there is no device supporting CUDA\n");
*ncuda_devices = device_count;
- // Sets the active device
+
+ // Sets the active device
if(device_count > 1) {
// generalized for more GPUs per node
+ // note: without previous context release, cudaSetDevice will complain with the cuda error
+ // "setting the device when a process is active is not allowed"
+ // releases previous contexts
+ cudaThreadExit();
+
+ //printf("rank %d: cuda device count = %d sets device = %d \n",myrank,device_count,myrank % device_count);
+ //MPI_Barrier(MPI_COMM_WORLD);
+
+ // sets active device
cudaSetDevice( myrank % device_count );
exit_on_cuda_error("cudaSetDevice");
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90 2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90 2011-11-29 04:28:42 UTC (rev 19246)
@@ -31,9 +31,6 @@
! start non-blocking MPI calls and overlap them with the calculation of the inner elements
! (which works fine because there are always far more inner elements than outer elements)
-!*********************************************************************************************************
-! Mila
-
! daniel: modified routines to use element domain flags given in ispec_is_d, thus
! coloring only acoustic or elastic (or..) elements in one run, then repeat run for other domains.
! also, the permutation re-starts at 1 for outer and for inner elements,
@@ -122,26 +119,27 @@
! local variables
integer :: ispec
- !! DK DK for mesh coloring GPU Joseph Fourier
logical, dimension(:), allocatable :: mask_ibool
integer :: icolor, nb_already_colored
integer :: iglob1,iglob2,iglob3,iglob4,iglob5,iglob6,iglob7,iglob8
+ integer :: ier
logical :: conflict_found_need_new_color
- !! DK DK Nov 2011 added this
! Droux
- logical, dimension(:), allocatable :: icolor_conflict_found
- integer, dimension(:), allocatable :: nb_elems_in_this_color
logical :: try_Droux_coloring
- integer :: ispec2,ncolors,icolormin,icolormax,icolor_chosen,nb_elems_in_color_chosen
- integer :: nb_tries_of_Droux_1993,last_ispec_studied
+ logical :: fail_safe
! valence
- integer, dimension(:), allocatable :: count_ibool
integer :: maxval_count_ibool_outer,maxval_count_ibool_inner
-
+
+ ! display absolute minimum possible number of colors, i.e., maximum valence (for information only)
+ ! beware: this wastes memory (needs an additional array called "count_ibool")
+ logical, parameter :: DISPLAY_MIN_POSSIBLE_COLORS = .false.
+
! user output
if( myrank == 0 ) then
if( USE_DROUX_OPTIMIZATION ) then
write(IMAIN,*) ' fast coloring mesh algorithm w/ Droux optimization'
+ else if( BALANCE_COLORS_SIMPLE_ALGO ) then
+ write(IMAIN,*) ' fast coloring mesh algorithm w/ color balancing'
else
write(IMAIN,*) ' fast coloring mesh algorithm'
endif
@@ -152,7 +150,9 @@
nspec_inner = 0
nspec_domain = 0
do ispec=1,nspec
+ ! domain elements
if(ispec_is_d(ispec)) then
+ ! outer/inner elements
if(is_on_a_slice_edge(ispec)) then
nspec_outer=nspec_outer+1
else
@@ -162,13 +162,6 @@
endif
enddo
- !! DK DK start mesh coloring (new Apr 2010 version by DK for GPU Joseph Fourier)
- allocate(mask_ibool(nglob))
-
- !! DK DK ----------------------------------
- !! DK DK color the mesh in the crust_mantle
- !! DK DK ----------------------------------
-
! debug
!if(myrank == 0) then
! print *
@@ -180,8 +173,29 @@
! Droux optimization
try_Droux_coloring = USE_DROUX_OPTIMIZATION
- nb_tries_of_Droux_1993 = 1
+ if(BALANCE_COLORS_SIMPLE_ALGO .and. USE_DROUX_OPTIMIZATION ) then
+ if( myrank == 0 ) then
+ print *,'noticed a problem with mesh coloring options: '
+ print *,' cannot set both USE_DROUX_OPTIMAL_ALGO and BALANCE_COLORS_SIMPLE_ALGO'
+ print *,' -> this run will use only BALANCE_COLORS_SIMPLE_ALGO'
+ print *,'please check parameter settings in constants.h...'
+ endif
+ try_Droux_coloring = .false.
+ endif
+
+ ! gives a lower bound for the number of colors needed
+ if(DISPLAY_MIN_POSSIBLE_COLORS .or. try_Droux_coloring) then
+ ! gets maximum values of valence for inner and outer element points
+ call count_mesh_valence(ibool,is_on_a_slice_edge,ispec_is_d, &
+ myrank, nspec, nglob, &
+ maxval_count_ibool_outer,maxval_count_ibool_inner)
+ endif
+
+ ! allocates mask
+ allocate(mask_ibool(nglob),stat=ier)
+ if( ier /= 0 ) stop 'error allocating mask_ibool array'
+
! entry point for fail-safe mechanism when Droux 1993 fails
999 continue
@@ -264,8 +278,6 @@
334 continue
icolor = icolor + 1
- !if(myrank == 0) print *,'analyzing color ',icolor,' for all the elements of the mesh'
-
! debug: user output
!if(myrank == 0) then
! print *,' analyzing color ',icolor,' - inner elements'
@@ -326,34 +338,44 @@
nb_colors_inner_elements = icolor - nb_colors_outer_elements
-!!!!!!!!!! DK DK Nov 2011 added this to create more balanced colors according to JJ Droux (1993)
-
- ! Droux optimization: might not find an optimial solution.
- ! we will probably have to try a few times with increasing colors
- if(try_Droux_coloring) then
-
- ! debug outupt
- if( myrank == 0 ) then
- write(IMAIN,*) ' fast algorithm: number of outer element colors = ',nb_colors_outer_elements
- write(IMAIN,*) ' fast algorithm: number of inner element colors = ',nb_colors_inner_elements,' total:', &
- nb_colors_outer_elements + nb_colors_inner_elements
- write(IMAIN,*) ' fast algorithm: number of total colors = ',nb_colors_outer_elements + nb_colors_inner_elements
+ ! Droux optimization:
+ ! added this to create more balanced colors according to JJ Droux (1993)
+ ! note: this might not find an optimial solution.
+ ! we will probably have to try a few times with increasing colors
+ if( try_Droux_coloring ) then
+ ! initializes fail-safe mechanism
+ fail_safe = .false.
+
+ ! tries to find a balanced coloring
+ call balance_colors_Droux(ibool,is_on_a_slice_edge,ispec_is_d, &
+ myrank, nspec, nglob, &
+ color,nb_colors_outer_elements,nb_colors_inner_elements, &
+ nspec_outer,nspec_inner,maxval_count_ibool_inner, &
+ mask_ibool,fail_safe)
+
+ ! in case it fails go back to simple coloring algorithm
+ if( fail_safe ) then
+ try_Droux_coloring = .false.
+ if(myrank == 0) write(IMAIN,*) ' giving up on Droux 1993 algorithm, calling fail-safe mechanism'
+ goto 999
endif
+ endif ! of if(try_Droux_coloring)
- !! DK DK Nov 2011: give a lower bound for the number of colors needed
- allocate( count_ibool(nglob))
+ ! balances colors using a simple algorithm (if Droux was not used)
+ if( BALANCE_COLORS_SIMPLE_ALGO ) then
+ call balance_colors_simple(ibool,is_on_a_slice_edge,ispec_is_d, &
+ myrank, nspec, nglob, &
+ color,nb_colors_outer_elements,nb_colors_inner_elements, &
+ nspec_outer,nspec_inner,mask_ibool)
+ endif
- ! valence numbers of the mesh
- maxval_count_ibool_outer = 0
- maxval_count_ibool_inner = 0
-
- ! valence for outer elements
- count_ibool(:) = 0
+ ! checks that all the color sets are independent
+ do icolor = 1,maxval(color)
+ mask_ibool(:) = .false.
do ispec = 1,nspec
! domain elements only
if(ispec_is_d(ispec)) then
- ! outer elements
- if (is_on_a_slice_edge(ispec)) then
+ if(color(ispec) == icolor ) then
! the eight corners of the current element
iglob1=ibool(1,1,1,ispec)
iglob2=ibool(NGLLX,1,1,ispec)
@@ -364,85 +386,408 @@
iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
iglob8=ibool(1,NGLLY,NGLLZ,ispec)
- count_ibool(iglob1) = count_ibool(iglob1) + 1
- count_ibool(iglob2) = count_ibool(iglob2) + 1
- count_ibool(iglob3) = count_ibool(iglob3) + 1
- count_ibool(iglob4) = count_ibool(iglob4) + 1
- count_ibool(iglob5) = count_ibool(iglob5) + 1
- count_ibool(iglob6) = count_ibool(iglob6) + 1
- count_ibool(iglob7) = count_ibool(iglob7) + 1
- count_ibool(iglob8) = count_ibool(iglob8) + 1
+ if(mask_ibool(iglob1) .or. mask_ibool(iglob2) .or. mask_ibool(iglob3) .or. mask_ibool(iglob4) .or. &
+ mask_ibool(iglob5) .or. mask_ibool(iglob6) .or. mask_ibool(iglob7) .or. mask_ibool(iglob8)) then
+ ! if element of this color has a common point with another element of that same color
+ ! then there is a problem, the color set is not correct
+ print*,'error check color:',icolor
+ stop 'error detected: found a common point inside a color set'
+ else
+ mask_ibool(iglob1) = .true.
+ mask_ibool(iglob2) = .true.
+ mask_ibool(iglob3) = .true.
+ mask_ibool(iglob4) = .true.
+ mask_ibool(iglob5) = .true.
+ mask_ibool(iglob6) = .true.
+ mask_ibool(iglob7) = .true.
+ mask_ibool(iglob8) = .true.
+ endif
endif
endif
enddo
- maxval_count_ibool_outer = maxval(count_ibool)
- ! valence for inner elements
- count_ibool(:) = 0
- do ispec = 1,nspec
- ! domain elements only
- if(ispec_is_d(ispec)) then
- ! inner elements
- if (.not. is_on_a_slice_edge(ispec)) then
- ! the eight corners of the current element
- iglob1=ibool(1,1,1,ispec)
- iglob2=ibool(NGLLX,1,1,ispec)
- iglob3=ibool(NGLLX,NGLLY,1,ispec)
- iglob4=ibool(1,NGLLY,1,ispec)
- iglob5=ibool(1,1,NGLLZ,ispec)
- iglob6=ibool(NGLLX,1,NGLLZ,ispec)
- iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
- iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+ !debug output
+ !if(myrank == 0) print *,' color ',icolor,' has disjoint elements only and is therefore OK'
+ !if(myrank == 0) print *,' color ',icolor,' contains ',count(color == icolor),' elements'
+ enddo
+ ! debug output
+ !if(myrank == 0) then
+ ! print*, ' the ',maxval(color),' color sets are OK'
+ !endif
- count_ibool(iglob1) = count_ibool(iglob1) + 1
- count_ibool(iglob2) = count_ibool(iglob2) + 1
- count_ibool(iglob3) = count_ibool(iglob3) + 1
- count_ibool(iglob4) = count_ibool(iglob4) + 1
- count_ibool(iglob5) = count_ibool(iglob5) + 1
- count_ibool(iglob6) = count_ibool(iglob6) + 1
- count_ibool(iglob7) = count_ibool(iglob7) + 1
- count_ibool(iglob8) = count_ibool(iglob8) + 1
- endif
- endif
- enddo
- maxval_count_ibool_inner = maxval(count_ibool)
+ deallocate(mask_ibool)
- ! debug outupt
- if( myrank == 0 ) then
- write(IMAIN,*) ' maximum valence (i.e. minimum possible nb of colors) for outer = ',maxval_count_ibool_outer
- write(IMAIN,*) ' maximum valence (i.e. minimum possible nb of colors) for inner = ',maxval_count_ibool_inner
+ end subroutine get_color_faster
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine count_mesh_valence(ibool,is_on_a_slice_edge,ispec_is_d, &
+ myrank, nspec, nglob, &
+ maxval_count_ibool_outer,maxval_count_ibool_inner)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: nspec,nglob
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+ logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
+
+ integer :: myrank
+ integer :: maxval_count_ibool_outer,maxval_count_ibool_inner
+
+ ! local parameters
+ integer, dimension(:), allocatable :: count_ibool
+ integer :: ispec
+ integer :: iglob1,iglob2,iglob3,iglob4,iglob5,iglob6,iglob7,iglob8
+ integer :: ier
+
+ ! allocates count array
+ allocate(count_ibool(nglob),stat=ier)
+ if( ier /= 0 ) stop 'error allocating count_ibool array'
+
+ ! valence numbers of the mesh
+ maxval_count_ibool_outer = 0
+ maxval_count_ibool_inner = 0
+
+ ! valence for outer elements
+ count_ibool(:) = 0
+ do ispec = 1,nspec
+ ! domain elements only
+ if(ispec_is_d(ispec)) then
+ ! outer elements
+ if (is_on_a_slice_edge(ispec)) then
+ ! the eight corners of the current element
+ iglob1=ibool(1,1,1,ispec)
+ iglob2=ibool(NGLLX,1,1,ispec)
+ iglob3=ibool(NGLLX,NGLLY,1,ispec)
+ iglob4=ibool(1,NGLLY,1,ispec)
+ iglob5=ibool(1,1,NGLLZ,ispec)
+ iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+ iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+ iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+
+ count_ibool(iglob1) = count_ibool(iglob1) + 1
+ count_ibool(iglob2) = count_ibool(iglob2) + 1
+ count_ibool(iglob3) = count_ibool(iglob3) + 1
+ count_ibool(iglob4) = count_ibool(iglob4) + 1
+ count_ibool(iglob5) = count_ibool(iglob5) + 1
+ count_ibool(iglob6) = count_ibool(iglob6) + 1
+ count_ibool(iglob7) = count_ibool(iglob7) + 1
+ count_ibool(iglob8) = count_ibool(iglob8) + 1
+ endif
endif
+ enddo
+ maxval_count_ibool_outer = maxval(count_ibool)
- deallocate(count_ibool)
+ ! valence for inner elements
+ count_ibool(:) = 0
+ do ispec = 1,nspec
+ ! domain elements only
+ if(ispec_is_d(ispec)) then
+ ! inner elements
+ if (.not. is_on_a_slice_edge(ispec)) then
+ ! the eight corners of the current element
+ iglob1=ibool(1,1,1,ispec)
+ iglob2=ibool(NGLLX,1,1,ispec)
+ iglob3=ibool(NGLLX,NGLLY,1,ispec)
+ iglob4=ibool(1,NGLLY,1,ispec)
+ iglob5=ibool(1,1,NGLLZ,ispec)
+ iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+ iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+ iglob8=ibool(1,NGLLY,NGLLZ,ispec)
- ! initial guess of number of colors needed
- if( maxval_count_ibool_inner > 0 .and. maxval_count_ibool_inner < nb_colors_inner_elements ) then
- ! uses maximum valence to estimate number of colors for Droux
- nb_colors_inner_elements = maxval_count_ibool_inner
+ count_ibool(iglob1) = count_ibool(iglob1) + 1
+ count_ibool(iglob2) = count_ibool(iglob2) + 1
+ count_ibool(iglob3) = count_ibool(iglob3) + 1
+ count_ibool(iglob4) = count_ibool(iglob4) + 1
+ count_ibool(iglob5) = count_ibool(iglob5) + 1
+ count_ibool(iglob6) = count_ibool(iglob6) + 1
+ count_ibool(iglob7) = count_ibool(iglob7) + 1
+ count_ibool(iglob8) = count_ibool(iglob8) + 1
+ endif
endif
+ enddo
+ maxval_count_ibool_inner = maxval(count_ibool)
- !! DK DK Nov 2011: give a lower bound for the number of colors needed
+ ! debug outupt
+ if( myrank == 0 ) then
+ write(IMAIN,*) ' maximum valence (i.e. minimum possible nb of colors) for outer = ',maxval_count_ibool_outer
+ write(IMAIN,*) ' maximum valence (i.e. minimum possible nb of colors) for inner = ',maxval_count_ibool_inner
+ endif
- !! DK DK do it for inner elements only for now
- nb_already_colored = nspec_outer
+ deallocate(count_ibool)
+
+ end subroutine count_mesh_valence
- 765 continue
+!
+!-------------------------------------------------------------------------------------------------
+!
- ! initial guess of number of colors needed
- ncolors = nb_colors_outer_elements + nb_colors_inner_elements
+ subroutine balance_colors_Droux(ibool,is_on_a_slice_edge,ispec_is_d, &
+ myrank, nspec, nglob, &
+ color, nb_colors_outer_elements, nb_colors_inner_elements, &
+ nspec_outer,nspec_inner,maxval_count_ibool_inner, &
+ mask_ibool,fail_safe)
- ! debug output
- if( myrank == 0 ) then
- write(IMAIN,*) ' Droux optimization: try = ',nb_tries_of_Droux_1993,'colors = ',ncolors
- endif
+ implicit none
- icolormin = nb_colors_outer_elements + 1
- icolormax = ncolors
+ include "constants.h"
- allocate(nb_elems_in_this_color(ncolors))
- allocate(icolor_conflict_found(ncolors))
+ integer :: nspec,nglob
- nb_elems_in_this_color(:) = 0
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+ logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
+ logical, dimension(nglob) :: mask_ibool
+
+ integer, dimension(nspec) :: color
+
+ integer :: myrank
+ integer :: nb_colors_outer_elements,nb_colors_inner_elements
+
+ integer :: nspec_outer,nspec_inner
+ integer :: maxval_count_ibool_inner
+
+ logical :: fail_safe
+
+ ! local parameters
+ logical, dimension(:), allocatable :: icolor_conflict_found
+ integer, dimension(:), allocatable :: nb_elems_in_this_color
+ integer :: ispec,ispec2,icolor,ncolors,icolormin,icolormax,icolor_chosen,nb_elems_in_color_chosen
+ integer :: nb_tries_of_Droux_1993,last_ispec_studied
+ integer :: ier
+
+ ! debug outupt
+ if( myrank == 0 ) then
+ write(IMAIN,*) ' balancing colors: Droux algorithm'
+ write(IMAIN,*) ' initial number of outer element colors = ',nb_colors_outer_elements
+ write(IMAIN,*) ' initial number of inner element colors = ',nb_colors_inner_elements
+ write(IMAIN,*) ' initial number of total colors = ',nb_colors_outer_elements + nb_colors_inner_elements
+ endif
+
+ ! initial guess of number of colors needed
+ if( maxval_count_ibool_inner > 0 .and. maxval_count_ibool_inner < nb_colors_inner_elements ) then
+ ! uses maximum valence to estimate number of colors for Droux
+ nb_colors_inner_elements = maxval_count_ibool_inner
+ endif
+
+ !! DK DK do it for inner elements only for now
+ ! Droux optimization run
+ nb_tries_of_Droux_1993 = 1
+
+ ! entry point to re-try Droux
+ 765 continue
+
+ ! initial guess of number of colors needed
+ ncolors = nb_colors_outer_elements + nb_colors_inner_elements
+
+ ! debug output
+ if( myrank == 0 ) then
+ write(IMAIN,*) ' Droux optimization: try = ',nb_tries_of_Droux_1993,'colors = ',ncolors
+ endif
+
+ icolormin = nb_colors_outer_elements + 1
+ icolormax = ncolors
+
+ ! allocates temporary arrays
+ allocate(nb_elems_in_this_color(ncolors), &
+ icolor_conflict_found(ncolors),stat=ier)
+ if( ier /= 0 ) stop 'error allocating nb_elems_in_this_color arrays'
+
+ nb_elems_in_this_color(:) = 0
+ mask_ibool(:) = .false.
+ last_ispec_studied = -1
+
+ do ispec = 1,nspec
+ ! domain elements only
+ if(ispec_is_d(ispec)) then
+
+ ! only inner elements
+ if (is_on_a_slice_edge(ispec)) cycle
+
+ ! unmark the eight corners of the previously marked element
+ if(last_ispec_studied > 0) then
+ mask_ibool(ibool(1,1,1,last_ispec_studied)) = .false.
+ mask_ibool(ibool(NGLLX,1,1,last_ispec_studied)) = .false.
+ mask_ibool(ibool(NGLLX,NGLLY,1,last_ispec_studied)) = .false.
+ mask_ibool(ibool(1,NGLLY,1,last_ispec_studied)) = .false.
+ mask_ibool(ibool(1,1,NGLLZ,last_ispec_studied)) = .false.
+ mask_ibool(ibool(NGLLX,1,NGLLZ,last_ispec_studied)) = .false.
+ mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,last_ispec_studied)) = .false.
+ mask_ibool(ibool(1,NGLLY,NGLLZ,last_ispec_studied)) = .false.
+ endif
+ icolor_conflict_found(icolormin:icolormax) = .false.
+
+ ! mark the eight corners of the current element
+ mask_ibool(ibool(1,1,1,ispec)) = .true.
+ mask_ibool(ibool(NGLLX,1,1,ispec)) = .true.
+ mask_ibool(ibool(NGLLX,NGLLY,1,ispec)) = .true.
+ mask_ibool(ibool(1,NGLLY,1,ispec)) = .true.
+ mask_ibool(ibool(1,1,NGLLZ,ispec)) = .true.
+ mask_ibool(ibool(NGLLX,1,NGLLZ,ispec)) = .true.
+ mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec)) = .true.
+ mask_ibool(ibool(1,NGLLY,NGLLZ,ispec)) = .true.
+ last_ispec_studied = ispec
+
+ if(ispec > 1) then
+ do ispec2 = 1,ispec - 1
+ ! domain elements only
+ if(ispec_is_d(ispec2)) then
+
+ ! only inner elements
+ if (is_on_a_slice_edge(ispec2)) cycle
+
+ ! if conflict already found previously with this color, no need to test again
+ if (icolor_conflict_found(color(ispec2))) cycle
+
+ ! test the eight corners of the current element for a common point with element under study
+ if (mask_ibool(ibool(1,1,1,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,1,1,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,NGLLY,1,ispec2)) .or. &
+ mask_ibool(ibool(1,NGLLY,1,ispec2)) .or. &
+ mask_ibool(ibool(1,1,NGLLZ,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,1,NGLLZ,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
+ mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
+ icolor_conflict_found(color(ispec2)) = .true.
+
+ endif ! domain elements
+ enddo
+ endif
+
+ ! check if the Droux 1993 algorithm found a solution
+ if (all(icolor_conflict_found(icolormin:icolormax))) then
+ ! user output
+ !if(myrank == 0) write(IMAIN,*) ' Droux 1993 algorithm did not find any solution for ncolors = ',ncolors
+
+ ! try with one more color
+ if(nb_tries_of_Droux_1993 < MAX_NB_TRIES_OF_DROUX_1993) then
+ nb_colors_inner_elements = nb_colors_inner_elements + 1
+ deallocate(nb_elems_in_this_color)
+ deallocate(icolor_conflict_found)
+ nb_tries_of_Droux_1993 = nb_tries_of_Droux_1993 + 1
+ goto 765
+ else
+ ! fail-safe mechanism: if Droux 1993 still fails after all the tries with one more color,
+ ! then go back to my original simple and fast coloring algorithm
+ fail_safe = .true.
+ return
+ endif
+ endif
+
+ ! loop on all the colors to determine the color with the smallest number
+ ! of elements and for which there is no conflict
+ nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
+ do icolor = icolormin,icolormax
+ if (.not. icolor_conflict_found(icolor) .and. nb_elems_in_this_color(icolor) < nb_elems_in_color_chosen) then
+ icolor_chosen = icolor
+ nb_elems_in_color_chosen = nb_elems_in_this_color(icolor)
+ endif
+ enddo
+
+ ! store the color finally chosen
+ color(ispec) = icolor_chosen
+ nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
+
+ endif ! domain elements
+ enddo
+
+ ! debug output
+ if(myrank == 0) then
+ write(IMAIN,*) ' created a total of ',maxval(color),' colors in this domain' ! 'for all the domain elements of the mesh'
+ if( nb_colors_outer_elements > 0 ) &
+ write(IMAIN,*) ' typical nb of elements per color for outer elements should be ', &
+ nspec_outer / nb_colors_outer_elements
+ if( nb_colors_inner_elements > 0 ) &
+ write(IMAIN,*) ' typical nb of elements per color for inner elements should be ', &
+ nspec_inner / nb_colors_inner_elements
+ endif
+
+
+ end subroutine balance_colors_Droux
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine balance_colors_simple(ibool,is_on_a_slice_edge,ispec_is_d, &
+ myrank, nspec, nglob, &
+ color, nb_colors_outer_elements, nb_colors_inner_elements, &
+ nspec_outer,nspec_inner,mask_ibool)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: nspec,nglob
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+ logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
+ logical, dimension(nglob) :: mask_ibool
+
+ integer, dimension(nspec) :: color
+
+ integer :: myrank
+ integer :: nb_colors_outer_elements,nb_colors_inner_elements
+
+ integer :: nspec_outer,nspec_inner
+
+ ! local parameters
+ logical, dimension(:), allocatable :: icolor_conflict_found
+ integer, dimension(:), allocatable :: nb_elems_in_this_color
+ integer :: ispec,ispec2,icolor,ncolors,icolormin,icolormax,icolor_chosen,nb_elems_in_color_chosen
+ integer :: last_ispec_studied
+ integer :: target_nb_elems_per_color,icolor_target
+ integer :: ier
+
+ ! debug outupt
+ if( myrank == 0 ) then
+ write(IMAIN,*) ' balancing colors: simple algorithm'
+ write(IMAIN,*) ' number of outer element colors = ',nb_colors_outer_elements
+ write(IMAIN,*) ' number of inner element colors = ',nb_colors_inner_elements
+ write(IMAIN,*) ' number of total colors = ',nb_colors_outer_elements + nb_colors_inner_elements
+ endif
+
+ ! balances colors in postprocess if Droux (1993) is not used
+ ncolors = nb_colors_outer_elements + nb_colors_inner_elements
+
+ ! allocates temporary arrays
+ allocate(nb_elems_in_this_color(ncolors), &
+ icolor_conflict_found(ncolors),stat=ier)
+ if( ier /= 0 ) stop 'error allocating nb_elems_in_this_color arrays'
+
+ !! DK DK do it for outer elements
+ icolormin = 1
+ icolormax = nb_colors_outer_elements
+
+ ! ideal value if all colors are perfectly balanced
+ if( nb_colors_outer_elements > 0 ) then
+ target_nb_elems_per_color = nspec_outer / nb_colors_outer_elements + 1
+ else
+ target_nb_elems_per_color = 1
+ endif
+
+ ! print *,'nspec_outer,target_nb_elems_per_color = ',nspec_outer,target_nb_elems_per_color
+
+ ! count the initial number of elements in each color
+ nb_elems_in_this_color(:) = 0
+ do icolor = icolormin,icolormax
+ nb_elems_in_this_color(icolor) = count(color == icolor)
+ enddo
+
+ ! do not balance the last one, because it will be balanced automatically by the others
+ do icolor = icolormin,icolormax-1
+
+ ! if color is already balanced, do nothing
+ ! (this works because in the initial set the number of elements per color decreases when the color number increases)
+ if(nb_elems_in_this_color(icolor) <= target_nb_elems_per_color) cycle
+
mask_ibool(:) = .false.
last_ispec_studied = -1
@@ -450,9 +795,15 @@
! domain elements only
if(ispec_is_d(ispec)) then
- ! only inner elements
- if (is_on_a_slice_edge(ispec)) cycle
+ ! only outer elements
+ if (.not. is_on_a_slice_edge(ispec)) cycle
+ ! only elements of this color
+ if (color(ispec) /= icolor) cycle
+
+ ! if color is now balanced because we have moved enough elements then stop searching
+ if(nb_elems_in_this_color(icolor) <= target_nb_elems_per_color) exit
+
! unmark the eight corners of the previously marked element
if(last_ispec_studied > 0) then
mask_ibool(ibool(1,1,1,last_ispec_studied)) = .false.
@@ -465,6 +816,7 @@
mask_ibool(ibool(1,NGLLY,NGLLZ,last_ispec_studied)) = .false.
endif
icolor_conflict_found(icolormin:icolormax) = .false.
+ icolor_conflict_found(icolor) = .true. ! cannot move element to the color it already has
! mark the eight corners of the current element
mask_ibool(ibool(1,1,1,ispec)) = .true.
@@ -477,141 +829,198 @@
mask_ibool(ibool(1,NGLLY,NGLLZ,ispec)) = .true.
last_ispec_studied = ispec
- if(ispec > 1) then
- do ispec2 = 1,ispec - 1
- ! domain elements only
- if(ispec_is_d(ispec)) then
+ ! test if we can move this element to another color
+ do ispec2 = 1,nspec
+ ! domain elements only
+ if(ispec_is_d(ispec2)) then
- ! only inner elements
- if (is_on_a_slice_edge(ispec2)) cycle
+ ! do not test that element itself
+ if (ispec2 == ispec) cycle
- ! if conflict already found previously with this color, no need to test again
- if (icolor_conflict_found(color(ispec2))) cycle
+ ! only outer elements
+ if (.not. is_on_a_slice_edge(ispec2)) cycle
- ! test the eight corners of the current element for a common point with element under study
- if (mask_ibool(ibool(1,1,1,ispec2)) .or. &
- mask_ibool(ibool(NGLLX,1,1,ispec2)) .or. &
- mask_ibool(ibool(NGLLX,NGLLY,1,ispec2)) .or. &
- mask_ibool(ibool(1,NGLLY,1,ispec2)) .or. &
- mask_ibool(ibool(1,1,NGLLZ,ispec2)) .or. &
- mask_ibool(ibool(NGLLX,1,NGLLZ,ispec2)) .or. &
- mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
- mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
+ ! if conflict already found previously with this color, no need to test again
+ if (icolor_conflict_found(color(ispec2))) cycle
+
+ ! test the eight corners of the current element for a common point with element under study
+ if (mask_ibool(ibool(1,1,1,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,1,1,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,NGLLY,1,ispec2)) .or. &
+ mask_ibool(ibool(1,NGLLY,1,ispec2)) .or. &
+ mask_ibool(ibool(1,1,NGLLZ,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,1,NGLLZ,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
+ mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
icolor_conflict_found(color(ispec2)) = .true.
- endif ! domain elements
- enddo
- endif
+ endif ! domain elements
+ enddo
- ! check if the Droux 1993 algorithm found a solution
- if (all(icolor_conflict_found(icolormin:icolormax))) then
- ! user output
- !if(myrank == 0) write(IMAIN,*) ' Droux 1993 algorithm did not find any solution for ncolors = ',ncolors
+ ! if color is already above target size for a balanced set, do not move to it
+ do icolor_target = icolormin,icolormax
+ if(nb_elems_in_this_color(icolor_target) >= target_nb_elems_per_color) &
+ icolor_conflict_found(icolor_target) = .true.
+ enddo
- ! try with one more color
- if(nb_tries_of_Droux_1993 < MAX_NB_TRIES_OF_DROUX_1993) then
- nb_colors_inner_elements = nb_colors_inner_elements + 1
- deallocate(nb_elems_in_this_color)
- deallocate(icolor_conflict_found)
- nb_tries_of_Droux_1993 = nb_tries_of_Droux_1993 + 1
- goto 765
- else
- ! fail-safe mechanism: if Droux 1993 still fails after all the tries with one more color,
- ! then go back to my original simple and fast coloring algorithm
- try_Droux_coloring = .false.
- if(myrank == 0) write(IMAIN,*) ' giving up on Droux 1993 algorithm, calling fail-safe mechanism'
- goto 999
- endif
- endif
+ ! if cannot find any other color to move this element to
+ if (all(icolor_conflict_found(icolormin:icolormax))) cycle
- ! loop on all the colors to determine the color with the smallest number
- ! of elements and for which there is no conflict
+ ! loop on all the colors to determine the color with the smallest number of elements
+ ! and for which there is no conflict
nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
- do icolor = icolormin,icolormax
- if (.not. icolor_conflict_found(icolor) .and. nb_elems_in_this_color(icolor) < nb_elems_in_color_chosen) then
- icolor_chosen = icolor
- nb_elems_in_color_chosen = nb_elems_in_this_color(icolor)
+ do icolor_target = icolormin,icolormax
+ if (.not. icolor_conflict_found(icolor_target) .and. &
+ nb_elems_in_this_color(icolor_target) < nb_elems_in_color_chosen) then
+ icolor_chosen = icolor_target
+ nb_elems_in_color_chosen = nb_elems_in_this_color(icolor_target)
endif
enddo
- ! store the color finally chosen
+ ! move the element to that new color
+ ! remove element from its current color
+ nb_elems_in_this_color(color(ispec)) = nb_elems_in_this_color(color(ispec)) - 1
color(ispec) = icolor_chosen
- nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
-
+ ! and add it to the new color
+ nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
+
endif ! domain elements
enddo
- ! debug output
- if(myrank == 0) then
- write(IMAIN,*) ' created a total of ',maxval(color),' colors in this domain' ! 'for all the domain elements of the mesh'
- if( nb_colors_outer_elements > 0 ) &
- write(IMAIN,*) ' typical nb of elements per color for outer elements should be ', &
- nspec_outer / nb_colors_outer_elements
- if( nb_colors_inner_elements > 0 ) &
- write(IMAIN,*) ' typical nb of elements per color for inner elements should be ', &
- nspec_inner / nb_colors_inner_elements
- endif
+ enddo ! icolor
- endif ! of if(try_Droux_coloring)
+!! DK DK do it for inner elements
-!!!!!!!!!! DK DK Nov 2011 added this again to create more balanced colors according to JJ Droux (1993)
+ icolormin = nb_colors_outer_elements + 1
+ icolormax = ncolors
- !!!!!!!! DK DK now check that all the color sets are independent
- do icolor = 1,maxval(color)
+ ! ideal value if all colors are perfectly balanced
+ if( nb_colors_inner_elements > 0 ) then
+ target_nb_elems_per_color = nspec_inner / nb_colors_inner_elements + 1
+ else
+ target_nb_elems_per_color = 1
+ endif
+ ! print *,'nspec_inner,target_nb_elems_per_color = ',nspec_inner,target_nb_elems_per_color
+
+ ! count the initial number of elements in each color
+ nb_elems_in_this_color(:) = 0
+ do icolor = icolormin,icolormax
+ nb_elems_in_this_color(icolor) = count(color == icolor)
+ enddo
+
+ ! do not balance the last one, because it will be balanced automatically by the others
+ do icolor = icolormin,icolormax-1
+
+ ! if color is already balanced, do nothing
+ ! (this works because in the initial set the number of elements per color decreases when the color number increases)
+ if(nb_elems_in_this_color(icolor) <= target_nb_elems_per_color) cycle
+
mask_ibool(:) = .false.
+ last_ispec_studied = -1
+
do ispec = 1,nspec
! domain elements only
if(ispec_is_d(ispec)) then
- if(color(ispec) == icolor ) then
- ! the eight corners of the current element
- iglob1=ibool(1,1,1,ispec)
- iglob2=ibool(NGLLX,1,1,ispec)
- iglob3=ibool(NGLLX,NGLLY,1,ispec)
- iglob4=ibool(1,NGLLY,1,ispec)
- iglob5=ibool(1,1,NGLLZ,ispec)
- iglob6=ibool(NGLLX,1,NGLLZ,ispec)
- iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
- iglob8=ibool(1,NGLLY,NGLLZ,ispec)
- if(mask_ibool(iglob1) .or. mask_ibool(iglob2) .or. mask_ibool(iglob3) .or. mask_ibool(iglob4) .or. &
- mask_ibool(iglob5) .or. mask_ibool(iglob6) .or. mask_ibool(iglob7) .or. mask_ibool(iglob8)) then
- ! if element of this color has a common point with another element of that same color
- ! then there is a problem, the color set is not correct
- print*,'error check color:',icolor
- stop 'error detected: found a common point inside a color set'
- else
- mask_ibool(iglob1) = .true.
- mask_ibool(iglob2) = .true.
- mask_ibool(iglob3) = .true.
- mask_ibool(iglob4) = .true.
- mask_ibool(iglob5) = .true.
- mask_ibool(iglob6) = .true.
- mask_ibool(iglob7) = .true.
- mask_ibool(iglob8) = .true.
- endif
+ ! only inner elements
+ if (is_on_a_slice_edge(ispec)) cycle
+
+ ! only elements of this color
+ if (color(ispec) /= icolor) cycle
+
+ ! if color is now balanced because we have moved enough elements then stop searching
+ if(nb_elems_in_this_color(icolor) <= target_nb_elems_per_color) exit
+
+ ! unmark the eight corners of the previously marked element
+ if(last_ispec_studied > 0) then
+ mask_ibool(ibool(1,1,1,last_ispec_studied)) = .false.
+ mask_ibool(ibool(NGLLX,1,1,last_ispec_studied)) = .false.
+ mask_ibool(ibool(NGLLX,NGLLY,1,last_ispec_studied)) = .false.
+ mask_ibool(ibool(1,NGLLY,1,last_ispec_studied)) = .false.
+ mask_ibool(ibool(1,1,NGLLZ,last_ispec_studied)) = .false.
+ mask_ibool(ibool(NGLLX,1,NGLLZ,last_ispec_studied)) = .false.
+ mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,last_ispec_studied)) = .false.
+ mask_ibool(ibool(1,NGLLY,NGLLZ,last_ispec_studied)) = .false.
endif
- endif
- enddo
+ icolor_conflict_found(icolormin:icolormax) = .false.
+ icolor_conflict_found(icolor) = .true. ! cannot move element to the color it already has
- !debug output
- !if(myrank == 0) print *,' color ',icolor,' has disjoint elements only and is therefore OK'
- !if(myrank == 0) print *,' color ',icolor,' contains ',count(color == icolor),' elements'
- enddo
+ ! mark the eight corners of the current element
+ mask_ibool(ibool(1,1,1,ispec)) = .true.
+ mask_ibool(ibool(NGLLX,1,1,ispec)) = .true.
+ mask_ibool(ibool(NGLLX,NGLLY,1,ispec)) = .true.
+ mask_ibool(ibool(1,NGLLY,1,ispec)) = .true.
+ mask_ibool(ibool(1,1,NGLLZ,ispec)) = .true.
+ mask_ibool(ibool(NGLLX,1,NGLLZ,ispec)) = .true.
+ mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec)) = .true.
+ mask_ibool(ibool(1,NGLLY,NGLLZ,ispec)) = .true.
+ last_ispec_studied = ispec
- ! debug output
- if(myrank == 0) then
- !print*, ' the ',maxval(color),' color sets are OK'
- endif
+ ! test if we can move this element to another color
+ do ispec2 = 1,nspec
+ ! domain elements only
+ if(ispec_is_d(ispec2)) then
- deallocate(mask_ibool)
+ ! do not test that element itself
+ if (ispec2 == ispec) cycle
- end subroutine get_color_faster
+ ! only inner elements
+ if (is_on_a_slice_edge(ispec2)) cycle
+ ! if conflict already found previously with this color, no need to test again
+ if (icolor_conflict_found(color(ispec2))) cycle
+ ! test the eight corners of the current element for a common point with element under study
+ if (mask_ibool(ibool(1,1,1,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,1,1,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,NGLLY,1,ispec2)) .or. &
+ mask_ibool(ibool(1,NGLLY,1,ispec2)) .or. &
+ mask_ibool(ibool(1,1,NGLLZ,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,1,NGLLZ,ispec2)) .or. &
+ mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
+ mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
+ icolor_conflict_found(color(ispec2)) = .true.
+
+ endif ! domain elements
+ enddo
+
+ ! if color is already above target size for a balanced set, do not move to it
+ do icolor_target = icolormin,icolormax
+ if(nb_elems_in_this_color(icolor_target) >= target_nb_elems_per_color) &
+ icolor_conflict_found(icolor_target) = .true.
+ enddo
+
+ ! if cannot find any other color to move this element to
+ if (all(icolor_conflict_found(icolormin:icolormax))) cycle
+
+ ! loops on all the colors to determine the color with the smallest number of elements
+ ! and for which there is no conflict
+ nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
+ do icolor_target = icolormin,icolormax
+ if (.not. icolor_conflict_found(icolor_target) .and. &
+ nb_elems_in_this_color(icolor_target) < nb_elems_in_color_chosen) then
+ icolor_chosen = icolor_target
+ nb_elems_in_color_chosen = nb_elems_in_this_color(icolor_target)
+ endif
+ enddo
+
+ ! moves the element to that new color
+ ! remove element from its current color
+ nb_elems_in_this_color(color(ispec)) = nb_elems_in_this_color(color(ispec)) - 1
+ color(ispec) = icolor_chosen
+ ! and add it to the new color
+ nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
+
+ endif ! domain elements
+ enddo
+
+ enddo ! icolor
+
+ end subroutine balance_colors_simple
+
!
!-------------------------------------------------------------------------------------------------
!
-
subroutine get_final_perm(color,perm,first_elem_number_in_this_color, &
nspec,nb_colors,nb_colors_outer_elements, &
ispec_is_d,nspec_domain)
@@ -673,501 +1082,10 @@
end subroutine get_final_perm
-!
-!-------------------------------------------------------------------------------------------------
-!
-! unused...
-! subroutine get_perm_color(is_on_a_slice_edge,ispec_is_d, &
-! ibool,perm,nspec,nglob, &
-! nb_colors_outer_elements,nb_colors_inner_elements, &
-! nspec_outer,first_elem_number_in_this_color,myrank)
!
-! implicit none
-!
-! include "constants.h"
-! integer, parameter :: NGNOD_HEXAHEDRA = 8
-!
-! logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
-!
-! integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-! integer, dimension(nspec) :: perm
-! integer, dimension(nspec) :: color
-! integer, dimension(MAX_NUMBER_OF_COLORS+1) :: first_elem_number_in_this_color
-! integer :: nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer,myrank
-!
-! ! local variables
-! integer nspec,nglob_GLL_full
-!
-! ! a neighbor of a hexahedral node is a hexahedron that shares a face with it -> max degree of a node = 6
-! integer, parameter :: MAX_NUMBER_OF_NEIGHBORS = 100
-!
-! ! global corner numbers that need to be created
-! integer, dimension(nglob) :: global_corner_number
-!
-! integer mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1)
-! integer, dimension(:), allocatable :: ne,np,adj
-! integer xadj(nspec+1)
-!
-! !logical maskel(nspec)
-!
-! integer i,istart,istop,number_of_neighbors
-!
-! integer nglob_eight_corners_only,nglob
-!
-! ! only count the total size of the array that will be created, or actually create it
-! logical count_only
-! integer total_size_ne,total_size_adj
-!
-!
-! ! total number of points in the mesh
-! nglob_GLL_full = nglob
-!
-! !---- call Charbel Farhat's routines
-! if(myrank == 0) &
-! write(IMAIN,*) 'calling form_elt_connectivity_foelco to perform mesh coloring and inner/outer element splitting'
-! call form_elt_connectivity_foelco(mn,mp,nspec,global_corner_number,nglob_GLL_full,ibool,nglob_eight_corners_only)
-! do i=1,nspec
-! istart = mp(i)
-! istop = mp(i+1) - 1
-! enddo
-!
-! ! count only, to determine the size needed for the array
-! allocate(np(nglob_eight_corners_only+1))
-! count_only = .true.
-! total_size_ne = 1
-! if(myrank == 0) write(IMAIN,*) 'calling form_node_connectivity_fonoco to determine the size of the table'
-! allocate(ne(total_size_ne))
-! call form_node_connectivity_fonoco(mn,mp,ne,np,nglob_eight_corners_only,nspec,count_only,total_size_ne)
-! deallocate(ne)
-!
-! !print *, 'nglob_eight_corners_only'
-! !print *, nglob_eight_corners_only
-!
-! ! allocate the array with the right size
-! allocate(ne(total_size_ne))
-!
-! ! now actually generate the array
-! count_only = .false.
-! if(myrank == 0) write(IMAIN,*) 'calling form_node_connectivity_fonoco to actually create the table'
-! call form_node_connectivity_fonoco(mn,mp,ne,np,nglob_eight_corners_only,nspec,count_only,total_size_ne)
-! do i=1,nglob_eight_corners_only
-! istart = np(i)
-! istop = np(i+1) - 1
-! enddo
-!
-! !print *, 'total_size_ne'
-! !print *, total_size_ne
-!
-! ! count only, to determine the size needed for the array
-! count_only = .true.
-! total_size_adj = 1
-! if(myrank == 0) write(IMAIN,*) 'calling create_adjacency_table_adjncy to determine the size of the table'
-! allocate(adj(total_size_adj))
-! !call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,maskel,nspec,nglob_eight_corners_only,&
-! !count_only,total_size_ne,total_size_adj,.false.)
-! call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,nspec,nglob_eight_corners_only,&
-! count_only,total_size_ne,total_size_adj,.false.)
-! deallocate(adj)
-!
-! ! allocate the array with the right size
-! allocate(adj(total_size_adj))
-!
-! ! now actually generate the array
-! count_only = .false.
-! if(myrank == 0) write(IMAIN,*) 'calling create_adjacency_table_adjncy again to actually create the table'
-! !call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,maskel,nspec,nglob_eight_corners_only,&
-! !count_only,total_size_ne,total_size_adj,.false.)
-! call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,nspec,nglob_eight_corners_only,&
-! count_only,total_size_ne,total_size_adj,.false.)
-!
-! do i=1,nspec
-! istart = xadj(i)
-! istop = xadj(i+1) - 1
-! number_of_neighbors = istop-istart+1
-! if(number_of_neighbors < 1 .or. number_of_neighbors > MAX_NUMBER_OF_NEIGHBORS) stop 'incorrect number of neighbors'
-! enddo
-!
-! deallocate(ne,np)
-!
-! call get_color(adj,xadj,color,nspec,total_size_adj, &
-! is_on_a_slice_edge,ispec_is_d, &
-! nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer)
-!
-! if(myrank == 0) then
-! write(IMAIN,*) ' number of colors of the graph for inner elements = ',nb_colors_inner_elements
-! write(IMAIN,*) ' number of colors of the graph for outer elements = ',nb_colors_outer_elements
-! write(IMAIN,*) ' total number of colors of the graph (sum of both) = ', nb_colors_inner_elements + nb_colors_outer_elements
-! write(IMAIN,*) ' number of elements of the graph for outer elements = ',nspec_outer
-! endif
-!
-! deallocate(adj)
-!
-! if(myrank == 0) write(IMAIN,*) 'generating the final colors'
-! first_elem_number_in_this_color(:) = -1
-! call get_final_perm(color,perm,first_elem_number_in_this_color,nspec,nb_colors_inner_elements+nb_colors_outer_elements)
-!
-! if(myrank == 0) write(IMAIN,*) 'done with mesh coloring and inner/outer element splitting'
-!
-! end subroutine get_perm_color
-
-!
!-------------------------------------------------------------------------------------------------
!
-
-!unused...
-! subroutine get_color(adj,xadj,color,nspec,total_size_adj, &
-! is_on_a_slice_edge,ispec_is_d, &
-! nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer)
-!
-! integer, intent(in) :: nspec,total_size_adj
-! integer, intent(in) :: adj(total_size_adj),xadj(nspec+1)
-! integer :: color(nspec)
-! integer :: this_color,nb_already_colored,ispec,ixadj,ok
-! logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
-! integer :: nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer
-! logical :: is_outer_element(nspec)
-!
-! nspec_outer = 0
-!
-! is_outer_element(:) = .false.
-!
-! do ispec=1,nspec
-! if(ispec_is_d(ispec)) then
-! if (is_on_a_slice_edge(ispec)) then
-! is_outer_element(ispec) = .true.
-! nspec_outer=nspec_outer+1
-! endif
-! endif
-! enddo
-!
-! ! outer elements
-! color(:) = 0
-! this_color = 0
-! nb_already_colored = 0
-! do while(nb_already_colored<nspec_outer)
-! this_color = this_color + 1
-! do ispec = 1, nspec
-! if(ispec_is_d(ispec)) then
-! if (is_outer_element(ispec)) then
-! if (color(ispec) == 0) then
-! ok = 1
-! do ixadj = xadj(ispec), (xadj(ispec+1)-1)
-! if (is_outer_element(adj(ixadj)) .and. color(adj(ixadj)) == this_color) ok = 0
-! enddo
-! if (ok /= 0) then
-! color(ispec) = this_color
-! nb_already_colored = nb_already_colored + 1
-! endif
-! endif
-! endif
-! endif
-! enddo
-! enddo
-! nb_colors_outer_elements = this_color
-!
-! ! inner elements
-! do while(nb_already_colored<nspec)
-! this_color = this_color + 1
-! do ispec = 1, nspec
-! if(ispec_is_d(ispec)) then
-! if (.not. is_outer_element(ispec)) then
-! if (color(ispec) == 0) then
-! ok = 1
-! do ixadj = xadj(ispec), (xadj(ispec+1)-1)
-! if (.not. is_outer_element(adj(ixadj)) .and. color(adj(ixadj)) == this_color) ok = 0
-! enddo
-! if (ok /= 0) then
-! color(ispec) = this_color
-! nb_already_colored = nb_already_colored + 1
-! endif
-! endif
-! endif
-! endif
-! enddo
-! enddo
-!
-! nb_colors_inner_elements = this_color - nb_colors_outer_elements
-!
-!end subroutine get_color
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-!unused...
-!
-!!=======================================================================
-!!
-!! Charbel Farhat's FEM topology routines
-!!
-!! Dimitri Komatitsch, February 1996 - Code based on Farhat's original version from 1987
-!!
-!! modified and adapted by Dimitri Komatitsch, May 2006
-!!
-!!=======================================================================
-!
-! subroutine form_elt_connectivity_foelco(mn,mp,nspec,global_corner_number,&
-! nglob_GLL_full,ibool,nglob_eight_corners_only)
-!
-!!-----------------------------------------------------------------------
-!!
-!! Forms the MN and MP arrays
-!!
-!! Input :
-!! -------
-!! ibool Array needed to build the element connectivity table
-!! nspec Number of elements in the domain
-!! NGNOD_HEXAHEDRA number of nodes per hexahedron (brick with 8 corners)
-!!
-!! Output :
-!! --------
-!! MN, MP This is the element connectivity array pair.
-!! Array MN contains the list of the element
-!! connectivity, that is, the nodes contained in each
-!! element. They are stored in a stacked fashion.
-!!
-!! Pointer array MP stores the location of each
-!! element list. Its length is equal to the number
-!! of elements plus one.
-!!
-!!-----------------------------------------------------------------------
-!
-! implicit none
-!
-! include "constants.h"
-! integer, parameter :: NGNOD_HEXAHEDRA = 8
-!
-! integer nspec,nglob_GLL_full
-!
-! ! arrays with mesh parameters per slice
-! integer, intent(in), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-!
-! ! global corner numbers that need to be created
-! integer, intent(out), dimension(nglob_GLL_full) :: global_corner_number
-! integer, intent(out) :: mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1)
-! integer, intent(out) :: nglob_eight_corners_only
-!
-! integer ninter,nsum,ispec,node,k,inumcorner,ix,iy,iz
-!
-! ninter = 1
-! nsum = 1
-! mp(1) = 1
-!
-! !---- define topology of the elements in the mesh
-! !---- we need to define adjacent numbers from the sub-mesh consisting of the corners only
-! nglob_eight_corners_only = 0
-! global_corner_number(:) = -1
-!
-! do ispec=1,nspec
-!
-! inumcorner = 0
-! do iz = 1,NGLLZ,NGLLZ-1
-! do iy = 1,NGLLY,NGLLY-1
-! do ix = 1,NGLLX,NGLLX-1
-!
-! inumcorner = inumcorner + 1
-! if(inumcorner > NGNOD_HEXAHEDRA) stop 'corner number too large'
-!
-! ! check if this point was already assigned a number previously, otherwise create one and store it
-! if(global_corner_number(ibool(ix,iy,iz,ispec)) == -1) then
-! nglob_eight_corners_only = nglob_eight_corners_only + 1
-! global_corner_number(ibool(ix,iy,iz,ispec)) = nglob_eight_corners_only
-! endif
-!
-! node = global_corner_number(ibool(ix,iy,iz,ispec))
-! do k=nsum,ninter-1
-! if(node == mn(k)) goto 200
-! enddo
-!
-! mn(ninter) = node
-! ninter = ninter + 1
-! 200 continue
-!
-! enddo
-! enddo
-! enddo
-!
-! nsum = ninter
-! mp(ispec + 1) = nsum
-!
-! enddo
-!
-! end subroutine form_elt_connectivity_foelco
-!
-!!
-!!-------------------------------------------------------------------------------------------------
-!!
-!
-! subroutine form_node_connectivity_fonoco(mn,mp,ne,np,nglob_eight_corners_only,&
-! nspec,count_only,total_size_ne)
-!
-!!-----------------------------------------------------------------------
-!!
-!! Forms the NE and NP arrays
-!!
-!! Input :
-!! -------
-!! MN, MP, nspec
-!! nglob_eight_corners_only Number of nodes in the domain
-!!
-!! Output :
-!! --------
-!! NE, NP This is the node-connected element array pair.
-!! Integer array NE contains a list of the
-!! elements connected to each node, stored in stacked fashion.
-!!
-!! Array NP is the pointer array for the
-!! location of a node's element list in the NE array.
-!! Its length is equal to the number of points plus one.
-!!
-!!-----------------------------------------------------------------------
-!
-! implicit none
-!
-! include "constants.h"
-! integer, parameter :: NGNOD_HEXAHEDRA = 8
-!
-! ! only count the total size of the array that will be created, or actually create it
-! logical count_only
-! integer total_size_ne
-!
-! integer nglob_eight_corners_only,nspec
-!
-! integer, intent(in) :: mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1)
-!
-! integer, intent(out) :: ne(total_size_ne),np(nglob_eight_corners_only+1)
-!
-! integer nsum,inode,ispec,j
-!
-! nsum = 1
-! np(1) = 1
-!
-! do inode=1,nglob_eight_corners_only
-! do 200 ispec=1,nspec
-!
-! do j=mp(ispec),mp(ispec + 1) - 1
-! if (mn(j) == inode) then
-! if(count_only) then
-! total_size_ne = nsum
-! else
-! ne(nsum) = ispec
-! endif
-! nsum = nsum + 1
-! goto 200
-! endif
-! enddo
-! 200 continue
-!
-! np(inode + 1) = nsum
-!
-! enddo
-!
-! end subroutine form_node_connectivity_fonoco
-!
-!!
-!!-------------------------------------------------------------------------------------------------
-!!
-!
-! !subroutine create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,maskel,nspec,nglob_eight_corners_only,&
-! ! count_only,total_size_ne,total_size_adj,face)
-!
-! subroutine create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,nspec,nglob_eight_corners_only,&
-! count_only,total_size_ne,total_size_adj,face)
-!
-!!-----------------------------------------------------------------------
-!!
-!! Establishes the element adjacency information of the mesh
-!! Two elements are considered adjacent if they share a face.
-!!
-!! Input :
-!! -------
-!! MN, MP, NE, NP, nspec
-!! MASKEL logical mask (length = nspec)
-!!
-!! Output :
-!! --------
-!! ADJ, XADJ This is the element adjacency array pair. Array
-!! ADJ contains the list of the elements adjacent to
-!! element i. They are stored in a stacked fashion.
-!! Pointer array XADJ stores the location of each element list.
-!!
-!!-----------------------------------------------------------------------
-!
-! implicit none
-!
-! include "constants.h"
-! integer, parameter :: NGNOD_HEXAHEDRA = 8
-!
-! ! only count the total size of the array that will be created, or actually create it
-! logical count_only,face
-! integer total_size_ne,total_size_adj
-!
-! integer nglob_eight_corners_only
-!
-! integer, intent(in) :: mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1),ne(total_size_ne),np(nglob_eight_corners_only+1)
-!
-! integer, intent(out) :: adj(total_size_adj),xadj(nspec+1)
-!
-! logical maskel(nspec)
-! integer countel(nspec)
-!
-! integer nspec,iad,ispec,istart,istop,ino,node,jstart,jstop,nelem,jel
-!
-! xadj(1) = 1
-! iad = 1
-!
-! do ispec=1,nspec
-!
-! ! reset mask
-! maskel(:) = .false.
-!
-! ! mask current element
-! maskel(ispec) = .true.
-! if (face) countel(:) = 0
-!
-! istart = mp(ispec)
-! istop = mp(ispec+1) - 1
-! do ino=istart,istop
-! node = mn(ino)
-! jstart = np(node)
-! jstop = np(node + 1) - 1
-! do 120 jel=jstart,jstop
-! nelem = ne(jel)
-! if(maskel(nelem)) goto 120
-! if (face) then
-! ! if 2 elements share at least 3 corners, therefore they share a face
-! countel(nelem) = countel(nelem) + 1
-! if (countel(nelem)>=3) then
-! if(count_only) then
-! total_size_adj = iad
-! else
-! adj(iad) = nelem
-! endif
-! maskel(nelem) = .true.
-! iad = iad + 1
-! endif
-! else
-! if(count_only) then
-! total_size_adj = iad
-! else
-! adj(iad) = nelem
-! endif
-! maskel(nelem) = .true.
-! iad = iad + 1
-! endif
-! 120 continue
-! enddo
-!
-! xadj(ispec+1) = iad
-!
-! enddo
-!
-! end subroutine create_adjacency_table_adjncy
-
-!
-!-------------------------------------------------------------------------------------------------
-!
! PERMUTATIONS
!
!-------------------------------------------------------------------------------------------------
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in 2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in 2011-11-29 04:28:42 UTC (rev 19246)
@@ -258,10 +258,15 @@
logical, parameter :: USE_MESH_COLORING_GPU = .false.
integer, parameter :: MAX_NUMBER_OF_COLORS = 10000
-! enhanced coloring using Droux algorithm
+! enhanced coloring:
+!
+! using Droux algorithm
! try several times with one more color before giving up
logical, parameter :: USE_DROUX_OPTIMIZATION = .false.
integer, parameter :: MAX_NB_TRIES_OF_DROUX_1993 = 15
+!
+! postprocess the colors to balance them if Droux (1993) algorithm is not used
+ logical, parameter :: BALANCE_COLORS_SIMPLE_ALGO = .true.
!------------------------------------------------------
!----------- do not modify anything below -------------
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90 2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90 2011-11-29 04:28:42 UTC (rev 19246)
@@ -703,10 +703,11 @@
real(kind=CUSTOM_REAL), dimension(nglob) :: xstore,ystore,zstore
!local parameters
- real(kind=CUSTOM_REAL) :: mindist
+ real(kind=CUSTOM_REAL) :: mindist,distance
integer, dimension(:), allocatable :: valence_external_mesh
integer :: ispec,i,j,k,iglob,ier,count
-
+ real(kind=CUSTOM_REAL),parameter :: TOLERANCE_DISTANCE = 0.9
+
! detecting surface points/elements (based on valence check on NGLL points) for external mesh
allocate(valence_external_mesh(nglob),stat=ier)
if( ier /= 0 ) stop 'error allocate valence array'
@@ -722,7 +723,8 @@
+ (ystore(ibool(1,1,1,:)) - ystore(ibool(2,1,1,:)))**2 &
+ (zstore(ibool(1,1,1,:)) - zstore(ibool(2,1,1,:)))**2 )
mindist = sqrt(mindist)
-
+ distance = TOLERANCE_DISTANCE*mindist
+
! sets valence value to one corresponding to process rank for points on cross-sections
do ispec = 1, nspec
do k = 1, NGLLZ
@@ -732,7 +734,7 @@
! chooses points close to cross-section
if( abs((xstore(iglob)-section_xorg)*section_nx + (ystore(iglob)-section_yorg)*section_ny &
- + (zstore(iglob)-section_zorg)*section_nz ) < 0.8*mindist ) then
+ + (zstore(iglob)-section_zorg)*section_nz ) < distance ) then
! sets valence to 1 for points on cross-sections
valence_external_mesh(iglob) = myrank+1
endif
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90 2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90 2011-11-29 04:28:42 UTC (rev 19246)
@@ -118,6 +118,7 @@
real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: dist_pixel_image,dist_pixel_recv
real(kind=CUSTOM_REAL):: pixel_midpoint_x,pixel_midpoint_z,x_loc,z_loc,xtmp,ztmp
real(kind=CUSTOM_REAL):: ratio
+ real(kind=CUSTOM_REAL):: distance_x1,distance_x2,distance_z1,distance_z2
integer:: npgeo,npgeo_glob
integer:: i,j,k,iproc,iglob,ispec,ier
! data from mesh
@@ -263,11 +264,20 @@
NZ_IMAGE_color = NZ_IMAGE_color * zoom_factor
zoom = .true.
endif
-
+
! create all the pixels
- size_pixel_horizontal = (xmax_color_image - xmin_color_image) / dble(NX_IMAGE_color)
- size_pixel_vertical = (zmax_color_image - zmin_color_image) / dble(NZ_IMAGE_color)
-
+ if( NX_IMAGE_color /= 0 ) then
+ size_pixel_horizontal = (xmax_color_image - xmin_color_image) / dble(NX_IMAGE_color)
+ else
+ size_pixel_horizontal = 0.0
+ endif
+
+ if( NZ_IMAGE_color /= 0 ) then
+ size_pixel_vertical = (zmax_color_image - zmin_color_image) / dble(NZ_IMAGE_color)
+ else
+ size_pixel_vertical = 0.0
+ endif
+
if (myrank == 0) then
write(IMAIN,*) ' image points: ',npgeo_glob
write(IMAIN,*) ' xmin/xmax: ',xmin_color_image,'/',xmax_color_image
@@ -287,6 +297,19 @@
iglob_image_color(:,:) = -1
ispec_image_color(:,:) = 0
dist_pixel_image(:,:) = HUGEVAL
+
+ if( zoom ) then
+ distance_x1 = zoom_factor*size_pixel_horizontal
+ distance_x2 = (zoom_factor+1)*size_pixel_horizontal
+ distance_z1 = zoom_factor*size_pixel_vertical
+ distance_z2 = (zoom_factor+1)*size_pixel_vertical
+ else
+ distance_x1 = 0.0
+ distance_x2 = 2.0*size_pixel_horizontal
+ distance_z1 = 0.0
+ distance_z2 = 2.0*size_pixel_vertical
+ endif
+
do j=1,NZ_IMAGE_color
do i=1,NX_IMAGE_color
! calculates midpoint of pixel
@@ -309,15 +332,8 @@
z_loc = zcoord(iglob)
! checks if inside pixel range for larger numbers of points, minimizing computation time
- if( zoom ) then
- if( x_loc < xtmp-zoom_factor*size_pixel_horizontal .or. &
- x_loc > xtmp + (zoom_factor+1)*size_pixel_horizontal ) cycle
- if( z_loc < ztmp-zoom_factor*size_pixel_vertical .or. &
- z_loc > ztmp + (zoom_factor+1)*size_pixel_vertical ) cycle
- else
- if( x_loc < xtmp .or. x_loc > xtmp + size_pixel_horizontal ) cycle
- if( z_loc < ztmp .or. z_loc > ztmp + size_pixel_vertical ) cycle
- endif
+ if( x_loc < xtmp - distance_x1 .or. x_loc > xtmp + distance_x2 ) cycle
+ if( z_loc < ztmp - distance_z1 .or. z_loc > ztmp + distance_z2 ) cycle
! stores closest iglob
x_loc = pixel_midpoint_x - x_loc
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2011-11-29 04:28:42 UTC (rev 19246)
@@ -196,7 +196,7 @@
where( mustore > TINYVAL )
rhostore = (rho_vs*rho_vs) / mustore
elsewhere
- rhostore(:,:,:,:) = 0.0_CUSTOM_REAL
+ rhostore = 0.0_CUSTOM_REAL
endwhere
endif
endif
Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/Cluster/pbs/valgrind_go_solver_pbs.bash
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/Cluster/pbs/valgrind_go_solver_pbs.bash (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/Cluster/pbs/valgrind_go_solver_pbs.bash 2011-11-29 04:28:42 UTC (rev 19246)
@@ -0,0 +1,77 @@
+#!/bin/bash
+#
+# Valgrind, a suite of tools for debugging and profiling
+# http://valgrind.org/
+#
+
+# bash script
+#PBS -S /bin/bash
+
+# job name
+#PBS -N valgrind_go_solver
+
+# joins output and error information
+#PBS -j oe
+
+# job output file
+#PBS -o in_out_files/OUTPUT_FILES/job.o
+
+###########################################################
+# USER PARAMETERS
+
+# Queue
+#PBS -q tromp
+
+# 150 CPUs ( 18*8+6 ), walltime 15 hour
+#PBS -l nodes=18:ppn=8+1:ppn=6,walltime=15:00:00
+
+# valgrind mpi library
+PRELOAD_LIB=/my_valgrind_path/valgrind/lib/valgrind/libmpiwrap-x86-linux.so
+
+###########################################################
+
+cd $PBS_O_WORKDIR
+
+# script to run the mesher and the solver
+# read Par_file to get information about the run
+# compute total number of nodes needed
+NPROC=`grep NPROC in_data_files/Par_file | cut -d = -f 2 `
+
+# total number of nodes is the product of the values read
+numnodes=$NPROC
+
+mkdir -p in_out_files/OUTPUT_FILES
+
+# backup files used for this simulation
+cp in_data_files/Par_file in_out_files/OUTPUT_FILES/
+cp in_data_files/STATIONS in_out_files/OUTPUT_FILES/
+cp in_data_files/CMTSOLUTION in_out_files/OUTPUT_FILES/
+
+# obtain job information
+cat $PBS_NODEFILE > in_out_files/OUTPUT_FILES/compute_nodes
+echo "$PBS_JOBID" > in_out_files/OUTPUT_FILES/jobid
+
+echo starting run in current directory $PWD
+cd bin/
+
+echo " "
+echo "run: memory leaks"
+echo " "
+sleep 2
+
+# memory leaks
+LD_PRELOAD=$PRELOAD_LIB mpiexec -np $numnodes valgrind --leak-check=full ./xspecfem3D >& ../in_out_files/OUTPUT_FILES/output.memory-leaks.log
+
+sleep 2
+echo " "
+echo "run: cache misses"
+echo " "
+
+# cache misses
+LD_PRELOAD=$PRELOAD_LIB mpiexec -np $numnodes valgrind --tool=cachegrind ./xspecfem3D >& ../in_out_files/OUTPUT_FILES/output.cache-misses.log
+
+cp cachegrind.out.* ../in_out_files/OUTPUT_FILES/
+
+echo " "
+echo "finished successfully"
+
Property changes on: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/Cluster/pbs/valgrind_go_solver_pbs.bash
___________________________________________________________________
Name: svn:executable
+ *
Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/locate_partition.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/locate_partition.f90 (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/locate_partition.f90 2011-11-29 04:28:42 UTC (rev 19246)
@@ -0,0 +1,212 @@
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! utility to locate partition which is closest to given point location
+!
+! compile with:
+! > gfortran -I src/shared -o bin/xlocate_partition utils/locate_partition.f90
+!
+! run with:
+! > ../../bin/xlocate_partition -300000.0 11000.0 -3000.0 in_out_files/DATABASES_MPI/
+
+ program locate_partition
+
+! works for external, unregular meshes
+
+ implicit none
+
+ include 'constants.h'
+
+ ! mesh coordinates
+ real(kind=CUSTOM_REAL),dimension(:),allocatable :: xstore, ystore, zstore
+ integer, dimension(:,:,:,:),allocatable :: ibool
+ integer :: NSPEC_AB, NGLOB_AB
+
+ integer :: i,ios,ier
+ integer :: iproc
+ character(len=256) :: arg(4)
+ character(len=256) :: LOCAL_PATH
+ character(len=256) :: prname_lp
+
+ real(kind=CUSTOM_REAL) :: x_found,y_found,z_found,distance
+ real(kind=CUSTOM_REAL) :: target_x,target_y,target_z
+ real(kind=CUSTOM_REAL) :: total_x,total_y,total_z
+ real(kind=CUSTOM_REAL) :: total_distance
+ integer :: total_partition
+
+! checks given arguments
+ print *
+ print *,'locate partition'
+ print *,'----------------------------'
+
+ do i = 1, 4
+ call getarg(i,arg(i))
+ if (i <= 4 .and. trim(arg(i)) == '') then
+ print *, 'Usage: '
+ print *, ' xlocate_partition x y z Databases_directory'
+ stop ' Reenter command line options'
+ endif
+ enddo
+ read(arg(1),*) target_x
+ read(arg(2),*) target_y
+ read(arg(3),*) target_z
+ LOCAL_PATH = arg(4)
+
+ print *,'search location: '
+ print *,' x = ',target_x
+ print *,' y = ',target_y
+ print *,' z = ',target_z
+ print *,'in directory: ',trim(LOCAL_PATH)
+ print *,'----------------------------'
+ print *
+
+ ! loops over slices (process partitions)
+ total_distance = HUGEVAL
+ total_partition = -1
+ total_x = 0.0
+ total_y = 0.0
+ total_z = 0.0
+ iproc = -1
+ do while( iproc < 10000000 )
+ ! starts with 0
+ iproc = iproc + 1
+
+ ! gets number of elements and global points for this partition
+ write(prname_lp,'(a,i6.6,a)') trim(LOCAL_PATH)//'/proc',iproc,'_'
+ open(unit=27,file=prname_lp(1:len_trim(prname_lp))//'external_mesh.bin',&
+ status='old',action='read',form='unformatted',iostat=ios)
+ if( ios /= 0 ) exit
+
+ read(27) NSPEC_AB
+ read(27) NGLOB_AB
+
+ ! ibool file
+ allocate(ibool(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibool'
+ read(27) ibool
+
+ ! global point arrays
+ allocate(xstore(NGLOB_AB),ystore(NGLOB_AB),zstore(NGLOB_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array xstore etc.'
+ read(27) xstore
+ read(27) ystore
+ read(27) zstore
+ close(27)
+
+ print*,'partition: ',iproc
+ print*,' min/max x = ',minval(xstore),maxval(xstore)
+ print*,' min/max y = ',minval(ystore),maxval(ystore)
+ print*,' min/max z = ',minval(zstore),maxval(zstore)
+ print*
+
+ ! gets distance to target location
+ call get_closest_point(target_x,target_y,target_z, &
+ NGLOB_AB,NSPEC_AB,xstore,ystore,zstore,ibool, &
+ distance,x_found,y_found,z_found)
+
+ if( distance < total_distance ) then
+ total_distance = distance
+ total_partition = iproc
+ total_x = x_found
+ total_y = y_found
+ total_z = z_found
+ endif
+
+ ! cleans up memory allocations
+ deallocate(ibool,xstore,ystore,zstore)
+
+ enddo ! all slices for points
+
+ ! checks
+ if (total_partition < 0 ) then
+ print*,'Error: partition not found among ',iproc,'partitions searched'
+ stop 'Error: partition not found'
+ endif
+
+ ! output
+ print*,'number of partitions searched: ',iproc
+ print*
+ print*,'closest grid point location found:'
+ print*,' x = ',total_x
+ print*,' y = ',total_y
+ print*,' z = ',total_z
+ print*,' distance to search location = ',sqrt(total_distance)
+ print*,'closest partition: '
+ print*,' partition = ',total_partition
+ print*
+
+ end program locate_partition
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine get_closest_point(target_x,target_y,target_z, &
+ NGLOB_AB,NSPEC_AB,xstore,ystore,zstore,ibool, &
+ distance,x_found,y_found,z_found)
+
+ implicit none
+ include 'constants.h'
+
+ real(kind=CUSTOM_REAL),intent(in) :: target_x,target_y,target_z
+
+ integer,intent(in) :: NSPEC_AB,NGLOB_AB
+ real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: xstore, ystore, zstore
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
+
+ real(kind=CUSTOM_REAL),intent(out) :: distance
+ real(kind=CUSTOM_REAL),intent(out) :: x_found,y_found,z_found
+
+ ! local parameters
+ integer :: i,j,k,ispec,iglob
+ real(kind=CUSTOM_REAL) :: dist
+
+ distance = HUGEVAL
+ x_found = 0.0
+ y_found = 0.0
+ z_found = 0.0
+
+ do ispec=1,NSPEC_AB
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+ dist = (target_x - xstore(iglob))*(target_x - xstore(iglob)) &
+ + (target_y - ystore(iglob))*(target_y - ystore(iglob)) &
+ + (target_z - zstore(iglob))*(target_z - zstore(iglob))
+
+ if( dist < distance ) then
+ distance = dist
+ x_found = xstore(iglob)
+ y_found = ystore(iglob)
+ z_found = zstore(iglob)
+ endif
+ enddo
+ enddo
+ enddo
+ enddo
+
+ end subroutine
More information about the CIG-COMMITS
mailing list