[cig-commits] r19246 - in seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER: src/cuda src/generate_databases src/shared src/specfem3D utils utils/Cluster/pbs

danielpeter at geodynamics.org danielpeter at geodynamics.org
Mon Nov 28 20:28:43 PST 2011


Author: danielpeter
Date: 2011-11-28 20:28:42 -0800 (Mon, 28 Nov 2011)
New Revision: 19246

Added:
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/Cluster/pbs/valgrind_go_solver_pbs.bash
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/locate_partition.f90
Modified:
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
Log:
updates device initialization; updates mesh coloring routine w/ simple color balancing

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2011-11-29 04:28:42 UTC (rev 19246)
@@ -92,7 +92,7 @@
     {
       fprintf(stderr,"Error after %s: %s\n", kernel_name, cudaGetErrorString(err));
       pause_for_debugger(0);
-      free(kernel_name);
+      //free(kernel_name);
       exit(EXIT_FAILURE);
     }
 }
@@ -106,7 +106,7 @@
 #ifdef USE_MPI
   MPI_Abort(MPI_COMM_WORLD,1);
 #endif
-  free(info);
+  //free(info);
   exit(EXIT_FAILURE);
   return;
 }
@@ -467,9 +467,19 @@
   if (device_count == 0) exit_on_error("CUDA runtime error: there is no device supporting CUDA\n");
   *ncuda_devices = device_count;
 
-  // Sets the active device
+
+  // Sets the active device  
   if(device_count > 1) {
     // generalized for more GPUs per node
+    // note: without previous context release, cudaSetDevice will complain with the cuda error
+    //         "setting the device when a process is active is not allowed"
+    // releases previous contexts  
+    cudaThreadExit();
+    
+    //printf("rank %d: cuda device count = %d sets device = %d \n",myrank,device_count,myrank % device_count);
+    //MPI_Barrier(MPI_COMM_WORLD);
+    
+    // sets active device
     cudaSetDevice( myrank % device_count );
     exit_on_cuda_error("cudaSetDevice");
   }

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90	2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90	2011-11-29 04:28:42 UTC (rev 19246)
@@ -31,9 +31,6 @@
 ! start non-blocking MPI calls and overlap them with the calculation of the inner elements
 ! (which works fine because there are always far more inner elements than outer elements)
 
-!*********************************************************************************************************
-! Mila
-
 ! daniel: modified routines to use element domain flags given in ispec_is_d, thus
 !             coloring only acoustic or elastic (or..) elements in one run, then repeat run for other domains.
 !             also, the permutation re-starts at 1 for outer and for inner elements,
@@ -122,26 +119,27 @@
 
   ! local variables
   integer :: ispec
-  !! DK DK for mesh coloring GPU Joseph Fourier
   logical, dimension(:), allocatable :: mask_ibool
   integer :: icolor, nb_already_colored
   integer :: iglob1,iglob2,iglob3,iglob4,iglob5,iglob6,iglob7,iglob8
+  integer :: ier
   logical :: conflict_found_need_new_color
-  !! DK DK Nov 2011 added this
   ! Droux
-  logical, dimension(:), allocatable :: icolor_conflict_found
-  integer, dimension(:), allocatable :: nb_elems_in_this_color
   logical :: try_Droux_coloring
-  integer :: ispec2,ncolors,icolormin,icolormax,icolor_chosen,nb_elems_in_color_chosen
-  integer :: nb_tries_of_Droux_1993,last_ispec_studied
+  logical :: fail_safe
   ! valence
-  integer, dimension(:), allocatable :: count_ibool
   integer :: maxval_count_ibool_outer,maxval_count_ibool_inner
-
+  
+  ! display absolute minimum possible number of colors, i.e., maximum valence (for information only)
+  ! beware: this wastes memory (needs an additional array called "count_ibool")
+  logical, parameter :: DISPLAY_MIN_POSSIBLE_COLORS = .false.
+  
   ! user output
   if( myrank == 0 ) then
     if( USE_DROUX_OPTIMIZATION ) then
       write(IMAIN,*) '     fast coloring mesh algorithm w/ Droux optimization'
+    else if( BALANCE_COLORS_SIMPLE_ALGO ) then
+      write(IMAIN,*) '     fast coloring mesh algorithm w/ color balancing'
     else
       write(IMAIN,*) '     fast coloring mesh algorithm'
     endif
@@ -152,7 +150,9 @@
   nspec_inner = 0
   nspec_domain = 0
   do ispec=1,nspec
+    ! domain elements  
     if(ispec_is_d(ispec)) then
+      ! outer/inner elements
       if(is_on_a_slice_edge(ispec)) then
         nspec_outer=nspec_outer+1
       else
@@ -162,13 +162,6 @@
     endif
   enddo
 
-  !! DK DK start mesh coloring (new Apr 2010 version by DK for GPU Joseph Fourier)
-  allocate(mask_ibool(nglob))
-
-  !! DK DK ----------------------------------
-  !! DK DK color the mesh in the crust_mantle
-  !! DK DK ----------------------------------
-
   ! debug
   !if(myrank == 0) then
   !  print *
@@ -180,8 +173,29 @@
 
   ! Droux optimization
   try_Droux_coloring = USE_DROUX_OPTIMIZATION
-  nb_tries_of_Droux_1993 = 1
 
+  if(BALANCE_COLORS_SIMPLE_ALGO .and. USE_DROUX_OPTIMIZATION ) then
+    if( myrank == 0 ) then
+      print *,'noticed a problem with mesh coloring options: '
+      print *,'  cannot set both USE_DROUX_OPTIMAL_ALGO and BALANCE_COLORS_SIMPLE_ALGO'
+      print *,'  -> this run will use only BALANCE_COLORS_SIMPLE_ALGO'
+      print *,'please check parameter settings in constants.h...'
+    endif
+    try_Droux_coloring = .false.
+  endif
+
+  ! gives a lower bound for the number of colors needed
+  if(DISPLAY_MIN_POSSIBLE_COLORS .or. try_Droux_coloring) then
+    ! gets maximum values of valence for inner and outer element points
+    call count_mesh_valence(ibool,is_on_a_slice_edge,ispec_is_d, &
+                           myrank, nspec, nglob, &
+                           maxval_count_ibool_outer,maxval_count_ibool_inner)    
+  endif 
+
+  ! allocates mask
+  allocate(mask_ibool(nglob),stat=ier)
+  if( ier /= 0 ) stop 'error allocating mask_ibool array'
+  
   ! entry point for fail-safe mechanism when Droux 1993 fails
   999 continue
 
@@ -264,8 +278,6 @@
     334 continue
     icolor = icolor + 1
 
-    !if(myrank == 0) print *,'analyzing color ',icolor,' for all the elements of the mesh'
-
     ! debug: user output
     !if(myrank == 0) then
     !  print *,'  analyzing color ',icolor,' - inner elements'
@@ -326,34 +338,44 @@
 
   nb_colors_inner_elements = icolor - nb_colors_outer_elements
 
-!!!!!!!!!! DK DK Nov 2011 added this to create more balanced colors according to JJ Droux (1993)
-
-  ! Droux optimization: might not find an optimial solution.
-  ! we will probably have to try a few times with increasing colors
-  if(try_Droux_coloring) then
-
-    ! debug outupt
-    if( myrank == 0 ) then
-      write(IMAIN,*) '     fast algorithm: number of outer element colors = ',nb_colors_outer_elements
-      write(IMAIN,*) '     fast algorithm: number of inner element colors = ',nb_colors_inner_elements,' total:', &
-        nb_colors_outer_elements + nb_colors_inner_elements
-      write(IMAIN,*) '     fast algorithm: number of total colors = ',nb_colors_outer_elements + nb_colors_inner_elements
+  ! Droux optimization: 
+  ! added this to create more balanced colors according to JJ Droux (1993)  
+  ! note: this might not find an optimial solution.  
+  !          we will probably have to try a few times with increasing colors
+  if( try_Droux_coloring ) then
+    ! initializes fail-safe mechanism
+    fail_safe = .false.
+  
+    ! tries to find a balanced coloring
+    call balance_colors_Droux(ibool,is_on_a_slice_edge,ispec_is_d, &
+                              myrank, nspec, nglob, &
+                              color,nb_colors_outer_elements,nb_colors_inner_elements, &
+                              nspec_outer,nspec_inner,maxval_count_ibool_inner, &
+                              mask_ibool,fail_safe)
+    
+    ! in case it fails go back to simple coloring algorithm
+    if( fail_safe ) then
+      try_Droux_coloring = .false.
+      if(myrank == 0) write(IMAIN,*) '     giving up on Droux 1993 algorithm, calling fail-safe mechanism'
+      goto 999
     endif
+  endif ! of if(try_Droux_coloring)
 
-    !! DK DK Nov 2011: give a lower bound for the number of colors needed
-    allocate( count_ibool(nglob))
+  ! balances colors using a simple algorithm (if Droux was not used)
+  if( BALANCE_COLORS_SIMPLE_ALGO ) then
+    call balance_colors_simple(ibool,is_on_a_slice_edge,ispec_is_d, &
+                              myrank, nspec, nglob, &
+                              color,nb_colors_outer_elements,nb_colors_inner_elements, &
+                              nspec_outer,nspec_inner,mask_ibool)
+  endif
 
-    ! valence numbers of the mesh
-    maxval_count_ibool_outer = 0
-    maxval_count_ibool_inner = 0
-
-    ! valence for outer elements
-    count_ibool(:) = 0
+  ! checks that all the color sets are independent
+  do icolor = 1,maxval(color)
+    mask_ibool(:) = .false.
     do ispec = 1,nspec
       ! domain elements only
       if(ispec_is_d(ispec)) then
-        ! outer elements
-        if (is_on_a_slice_edge(ispec)) then
+        if(color(ispec) == icolor ) then
           ! the eight corners of the current element
           iglob1=ibool(1,1,1,ispec)
           iglob2=ibool(NGLLX,1,1,ispec)
@@ -364,85 +386,408 @@
           iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
           iglob8=ibool(1,NGLLY,NGLLZ,ispec)
 
-          count_ibool(iglob1) = count_ibool(iglob1) + 1
-          count_ibool(iglob2) = count_ibool(iglob2) + 1
-          count_ibool(iglob3) = count_ibool(iglob3) + 1
-          count_ibool(iglob4) = count_ibool(iglob4) + 1
-          count_ibool(iglob5) = count_ibool(iglob5) + 1
-          count_ibool(iglob6) = count_ibool(iglob6) + 1
-          count_ibool(iglob7) = count_ibool(iglob7) + 1
-          count_ibool(iglob8) = count_ibool(iglob8) + 1
+          if(mask_ibool(iglob1) .or. mask_ibool(iglob2) .or. mask_ibool(iglob3) .or. mask_ibool(iglob4) .or. &
+             mask_ibool(iglob5) .or. mask_ibool(iglob6) .or. mask_ibool(iglob7) .or. mask_ibool(iglob8)) then
+            ! if element of this color has a common point with another element of that same color
+            ! then there is a problem, the color set is not correct
+            print*,'error check color:',icolor
+            stop 'error detected: found a common point inside a color set'
+          else
+            mask_ibool(iglob1) = .true.
+            mask_ibool(iglob2) = .true.
+            mask_ibool(iglob3) = .true.
+            mask_ibool(iglob4) = .true.
+            mask_ibool(iglob5) = .true.
+            mask_ibool(iglob6) = .true.
+            mask_ibool(iglob7) = .true.
+            mask_ibool(iglob8) = .true.
+          endif
         endif
       endif
     enddo
-    maxval_count_ibool_outer = maxval(count_ibool)
 
-    ! valence for inner elements
-    count_ibool(:) = 0
-    do ispec = 1,nspec
-      ! domain elements only
-      if(ispec_is_d(ispec)) then
-        ! inner elements
-        if (.not. is_on_a_slice_edge(ispec)) then
-          ! the eight corners of the current element
-          iglob1=ibool(1,1,1,ispec)
-          iglob2=ibool(NGLLX,1,1,ispec)
-          iglob3=ibool(NGLLX,NGLLY,1,ispec)
-          iglob4=ibool(1,NGLLY,1,ispec)
-          iglob5=ibool(1,1,NGLLZ,ispec)
-          iglob6=ibool(NGLLX,1,NGLLZ,ispec)
-          iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
-          iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+    !debug output
+    !if(myrank == 0) print *,'  color ',icolor,' has disjoint elements only and is therefore OK'
+    !if(myrank == 0) print *,'  color ',icolor,' contains ',count(color == icolor),' elements'
+  enddo
+  ! debug output
+  !if(myrank == 0) then
+  !  print*, '     the ',maxval(color),' color sets are OK'
+  !endif
 
-          count_ibool(iglob1) = count_ibool(iglob1) + 1
-          count_ibool(iglob2) = count_ibool(iglob2) + 1
-          count_ibool(iglob3) = count_ibool(iglob3) + 1
-          count_ibool(iglob4) = count_ibool(iglob4) + 1
-          count_ibool(iglob5) = count_ibool(iglob5) + 1
-          count_ibool(iglob6) = count_ibool(iglob6) + 1
-          count_ibool(iglob7) = count_ibool(iglob7) + 1
-          count_ibool(iglob8) = count_ibool(iglob8) + 1
-        endif
-      endif
-    enddo
-    maxval_count_ibool_inner = maxval(count_ibool)
+  deallocate(mask_ibool)
 
-    ! debug outupt
-    if( myrank == 0 ) then
-      write(IMAIN,*) '     maximum valence (i.e. minimum possible nb of colors) for outer = ',maxval_count_ibool_outer
-      write(IMAIN,*) '     maximum valence (i.e. minimum possible nb of colors) for inner = ',maxval_count_ibool_inner
+  end subroutine get_color_faster
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine count_mesh_valence(ibool,is_on_a_slice_edge,ispec_is_d, &
+                                myrank, nspec, nglob, &
+                                maxval_count_ibool_outer,maxval_count_ibool_inner)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+  logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
+
+  integer :: myrank
+  integer :: maxval_count_ibool_outer,maxval_count_ibool_inner
+
+  ! local parameters
+  integer, dimension(:), allocatable :: count_ibool
+  integer :: ispec
+  integer :: iglob1,iglob2,iglob3,iglob4,iglob5,iglob6,iglob7,iglob8
+  integer :: ier
+  
+  ! allocates count array
+  allocate(count_ibool(nglob),stat=ier)
+  if( ier /= 0 ) stop 'error allocating count_ibool array'
+
+  ! valence numbers of the mesh
+  maxval_count_ibool_outer = 0
+  maxval_count_ibool_inner = 0
+
+  ! valence for outer elements
+  count_ibool(:) = 0
+  do ispec = 1,nspec
+    ! domain elements only
+    if(ispec_is_d(ispec)) then
+      ! outer elements
+      if (is_on_a_slice_edge(ispec)) then
+        ! the eight corners of the current element
+        iglob1=ibool(1,1,1,ispec)
+        iglob2=ibool(NGLLX,1,1,ispec)
+        iglob3=ibool(NGLLX,NGLLY,1,ispec)
+        iglob4=ibool(1,NGLLY,1,ispec)
+        iglob5=ibool(1,1,NGLLZ,ispec)
+        iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+        iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+        iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+
+        count_ibool(iglob1) = count_ibool(iglob1) + 1
+        count_ibool(iglob2) = count_ibool(iglob2) + 1
+        count_ibool(iglob3) = count_ibool(iglob3) + 1
+        count_ibool(iglob4) = count_ibool(iglob4) + 1
+        count_ibool(iglob5) = count_ibool(iglob5) + 1
+        count_ibool(iglob6) = count_ibool(iglob6) + 1
+        count_ibool(iglob7) = count_ibool(iglob7) + 1
+        count_ibool(iglob8) = count_ibool(iglob8) + 1
+      endif
     endif
+  enddo
+  maxval_count_ibool_outer = maxval(count_ibool)
 
-    deallocate(count_ibool)
+  ! valence for inner elements
+  count_ibool(:) = 0
+  do ispec = 1,nspec
+    ! domain elements only
+    if(ispec_is_d(ispec)) then
+      ! inner elements
+      if (.not. is_on_a_slice_edge(ispec)) then
+        ! the eight corners of the current element
+        iglob1=ibool(1,1,1,ispec)
+        iglob2=ibool(NGLLX,1,1,ispec)
+        iglob3=ibool(NGLLX,NGLLY,1,ispec)
+        iglob4=ibool(1,NGLLY,1,ispec)
+        iglob5=ibool(1,1,NGLLZ,ispec)
+        iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+        iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+        iglob8=ibool(1,NGLLY,NGLLZ,ispec)
 
-    ! initial guess of number of colors needed
-    if( maxval_count_ibool_inner > 0 .and. maxval_count_ibool_inner < nb_colors_inner_elements ) then
-      ! uses maximum valence to estimate number of colors for Droux
-      nb_colors_inner_elements = maxval_count_ibool_inner
+        count_ibool(iglob1) = count_ibool(iglob1) + 1
+        count_ibool(iglob2) = count_ibool(iglob2) + 1
+        count_ibool(iglob3) = count_ibool(iglob3) + 1
+        count_ibool(iglob4) = count_ibool(iglob4) + 1
+        count_ibool(iglob5) = count_ibool(iglob5) + 1
+        count_ibool(iglob6) = count_ibool(iglob6) + 1
+        count_ibool(iglob7) = count_ibool(iglob7) + 1
+        count_ibool(iglob8) = count_ibool(iglob8) + 1
+      endif
     endif
+  enddo
+  maxval_count_ibool_inner = maxval(count_ibool)
 
-    !! DK DK Nov 2011: give a lower bound for the number of colors needed
+  ! debug outupt
+  if( myrank == 0 ) then
+    write(IMAIN,*) '     maximum valence (i.e. minimum possible nb of colors) for outer = ',maxval_count_ibool_outer
+    write(IMAIN,*) '     maximum valence (i.e. minimum possible nb of colors) for inner = ',maxval_count_ibool_inner
+  endif
 
-    !! DK DK do it for inner elements only for now
-    nb_already_colored = nspec_outer
+  deallocate(count_ibool)
+  
+  end subroutine count_mesh_valence
 
-    765 continue
+!
+!-------------------------------------------------------------------------------------------------
+!
 
-    ! initial guess of number of colors needed
-    ncolors = nb_colors_outer_elements + nb_colors_inner_elements
+  subroutine balance_colors_Droux(ibool,is_on_a_slice_edge,ispec_is_d, &
+                                  myrank, nspec, nglob, &
+                                  color, nb_colors_outer_elements, nb_colors_inner_elements, &
+                                  nspec_outer,nspec_inner,maxval_count_ibool_inner, &
+                                  mask_ibool,fail_safe)
 
-    ! debug output
-    if( myrank == 0 ) then
-      write(IMAIN,*) '     Droux optimization: try = ',nb_tries_of_Droux_1993,'colors = ',ncolors
-    endif
+  implicit none
 
-    icolormin = nb_colors_outer_elements + 1
-    icolormax = ncolors
+  include "constants.h"
 
-    allocate(nb_elems_in_this_color(ncolors))
-    allocate(icolor_conflict_found(ncolors))
+  integer :: nspec,nglob
 
-    nb_elems_in_this_color(:) = 0
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+  logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
+  logical, dimension(nglob) :: mask_ibool
+
+  integer, dimension(nspec) :: color
+
+  integer :: myrank
+  integer :: nb_colors_outer_elements,nb_colors_inner_elements
+
+  integer :: nspec_outer,nspec_inner
+  integer :: maxval_count_ibool_inner
+
+  logical :: fail_safe
+  
+  ! local parameters
+  logical, dimension(:), allocatable :: icolor_conflict_found
+  integer, dimension(:), allocatable :: nb_elems_in_this_color
+  integer :: ispec,ispec2,icolor,ncolors,icolormin,icolormax,icolor_chosen,nb_elems_in_color_chosen
+  integer :: nb_tries_of_Droux_1993,last_ispec_studied
+  integer :: ier
+  
+  ! debug outupt
+  if( myrank == 0 ) then
+    write(IMAIN,*) '     balancing colors: Droux algorithm'  
+    write(IMAIN,*) '       initial number of outer element colors = ',nb_colors_outer_elements
+    write(IMAIN,*) '       initial number of inner element colors = ',nb_colors_inner_elements
+    write(IMAIN,*) '       initial number of total colors = ',nb_colors_outer_elements + nb_colors_inner_elements
+  endif
+
+  ! initial guess of number of colors needed
+  if( maxval_count_ibool_inner > 0 .and. maxval_count_ibool_inner < nb_colors_inner_elements ) then
+    ! uses maximum valence to estimate number of colors for Droux
+    nb_colors_inner_elements = maxval_count_ibool_inner
+  endif
+
+  !! DK DK do it for inner elements only for now
+  ! Droux optimization run
+  nb_tries_of_Droux_1993 = 1
+
+  ! entry point to re-try Droux
+  765 continue
+
+  ! initial guess of number of colors needed
+  ncolors = nb_colors_outer_elements + nb_colors_inner_elements
+
+  ! debug output
+  if( myrank == 0 ) then
+    write(IMAIN,*) '     Droux optimization: try = ',nb_tries_of_Droux_1993,'colors = ',ncolors
+  endif
+
+  icolormin = nb_colors_outer_elements + 1
+  icolormax = ncolors
+
+  ! allocates temporary arrays
+  allocate(nb_elems_in_this_color(ncolors), &
+          icolor_conflict_found(ncolors),stat=ier)
+  if( ier /= 0 ) stop 'error allocating nb_elems_in_this_color arrays'
+
+  nb_elems_in_this_color(:) = 0
+  mask_ibool(:) = .false.
+  last_ispec_studied = -1
+
+  do ispec = 1,nspec
+    ! domain elements only
+    if(ispec_is_d(ispec)) then
+
+      ! only inner elements
+      if (is_on_a_slice_edge(ispec)) cycle
+
+      ! unmark the eight corners of the previously marked element
+      if(last_ispec_studied > 0) then
+        mask_ibool(ibool(1,1,1,last_ispec_studied)) = .false.
+        mask_ibool(ibool(NGLLX,1,1,last_ispec_studied)) = .false.
+        mask_ibool(ibool(NGLLX,NGLLY,1,last_ispec_studied)) = .false.
+        mask_ibool(ibool(1,NGLLY,1,last_ispec_studied)) = .false.
+        mask_ibool(ibool(1,1,NGLLZ,last_ispec_studied)) = .false.
+        mask_ibool(ibool(NGLLX,1,NGLLZ,last_ispec_studied)) = .false.
+        mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,last_ispec_studied)) = .false.
+        mask_ibool(ibool(1,NGLLY,NGLLZ,last_ispec_studied)) = .false.
+      endif
+      icolor_conflict_found(icolormin:icolormax) = .false.
+
+      ! mark the eight corners of the current element
+      mask_ibool(ibool(1,1,1,ispec)) = .true.
+      mask_ibool(ibool(NGLLX,1,1,ispec)) = .true.
+      mask_ibool(ibool(NGLLX,NGLLY,1,ispec)) = .true.
+      mask_ibool(ibool(1,NGLLY,1,ispec)) = .true.
+      mask_ibool(ibool(1,1,NGLLZ,ispec)) = .true.
+      mask_ibool(ibool(NGLLX,1,NGLLZ,ispec)) = .true.
+      mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec)) = .true.
+      mask_ibool(ibool(1,NGLLY,NGLLZ,ispec)) = .true.
+      last_ispec_studied = ispec
+
+      if(ispec > 1) then
+        do ispec2 = 1,ispec - 1
+          ! domain elements only
+          if(ispec_is_d(ispec2)) then
+
+            ! only inner elements
+            if (is_on_a_slice_edge(ispec2)) cycle
+
+            ! if conflict already found previously with this color, no need to test again
+            if (icolor_conflict_found(color(ispec2))) cycle
+
+            ! test the eight corners of the current element for a common point with element under study
+            if (mask_ibool(ibool(1,1,1,ispec2)) .or. &
+                mask_ibool(ibool(NGLLX,1,1,ispec2)) .or. &
+                mask_ibool(ibool(NGLLX,NGLLY,1,ispec2)) .or. &
+                mask_ibool(ibool(1,NGLLY,1,ispec2)) .or. &
+                mask_ibool(ibool(1,1,NGLLZ,ispec2)) .or. &
+                mask_ibool(ibool(NGLLX,1,NGLLZ,ispec2)) .or. &
+                mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
+                mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
+              icolor_conflict_found(color(ispec2)) = .true.
+              
+          endif ! domain elements
+        enddo
+      endif
+
+      ! check if the Droux 1993 algorithm found a solution
+      if (all(icolor_conflict_found(icolormin:icolormax))) then
+        ! user output
+        !if(myrank == 0) write(IMAIN,*) '     Droux 1993 algorithm did not find any solution for ncolors = ',ncolors
+
+        ! try with one more color
+        if(nb_tries_of_Droux_1993 < MAX_NB_TRIES_OF_DROUX_1993) then
+          nb_colors_inner_elements = nb_colors_inner_elements + 1
+          deallocate(nb_elems_in_this_color)
+          deallocate(icolor_conflict_found)
+          nb_tries_of_Droux_1993 = nb_tries_of_Droux_1993 + 1
+          goto 765
+        else
+          ! fail-safe mechanism: if Droux 1993 still fails after all the tries with one more color,
+          ! then go back to my original simple and fast coloring algorithm
+          fail_safe = .true.
+          return
+        endif
+      endif
+
+      ! loop on all the colors to determine the color with the smallest number
+      ! of elements and for which there is no conflict
+      nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
+      do icolor = icolormin,icolormax
+        if (.not. icolor_conflict_found(icolor) .and. nb_elems_in_this_color(icolor) < nb_elems_in_color_chosen) then
+          icolor_chosen = icolor
+          nb_elems_in_color_chosen = nb_elems_in_this_color(icolor)
+        endif
+      enddo
+
+      ! store the color finally chosen
+      color(ispec) = icolor_chosen
+      nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
+
+    endif ! domain elements
+  enddo
+
+  ! debug output
+  if(myrank == 0) then
+    write(IMAIN,*) '     created a total of ',maxval(color),' colors in this domain' ! 'for all the domain elements of the mesh'
+    if( nb_colors_outer_elements > 0 ) &
+      write(IMAIN,*) '     typical nb of elements per color for outer elements should be ', &
+        nspec_outer / nb_colors_outer_elements
+    if( nb_colors_inner_elements > 0 ) &
+      write(IMAIN,*) '     typical nb of elements per color for inner elements should be ', &
+        nspec_inner / nb_colors_inner_elements
+  endif
+
+
+  end subroutine balance_colors_Droux
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine balance_colors_simple(ibool,is_on_a_slice_edge,ispec_is_d, &
+                                  myrank, nspec, nglob, &
+                                  color, nb_colors_outer_elements, nb_colors_inner_elements, &
+                                  nspec_outer,nspec_inner,mask_ibool)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+  logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
+  logical, dimension(nglob) :: mask_ibool
+
+  integer, dimension(nspec) :: color
+
+  integer :: myrank
+  integer :: nb_colors_outer_elements,nb_colors_inner_elements
+
+  integer :: nspec_outer,nspec_inner
+
+  ! local parameters
+  logical, dimension(:), allocatable :: icolor_conflict_found
+  integer, dimension(:), allocatable :: nb_elems_in_this_color
+  integer :: ispec,ispec2,icolor,ncolors,icolormin,icolormax,icolor_chosen,nb_elems_in_color_chosen
+  integer :: last_ispec_studied
+  integer :: target_nb_elems_per_color,icolor_target
+  integer :: ier
+    
+  ! debug outupt
+  if( myrank == 0 ) then
+    write(IMAIN,*) '     balancing colors: simple algorithm'
+    write(IMAIN,*) '       number of outer element colors = ',nb_colors_outer_elements
+    write(IMAIN,*) '       number of inner element colors = ',nb_colors_inner_elements
+    write(IMAIN,*) '       number of total colors = ',nb_colors_outer_elements + nb_colors_inner_elements
+  endif
+
+  ! balances colors in postprocess if Droux (1993) is not used
+  ncolors = nb_colors_outer_elements + nb_colors_inner_elements
+
+  ! allocates temporary arrays
+  allocate(nb_elems_in_this_color(ncolors), &
+          icolor_conflict_found(ncolors),stat=ier)
+  if( ier /= 0 ) stop 'error allocating nb_elems_in_this_color arrays'
+
+  !! DK DK do it for outer elements
+  icolormin = 1
+  icolormax = nb_colors_outer_elements
+
+  ! ideal value if all colors are perfectly balanced
+  if( nb_colors_outer_elements > 0 ) then
+    target_nb_elems_per_color = nspec_outer / nb_colors_outer_elements + 1
+  else
+    target_nb_elems_per_color = 1  
+  endif
+  
+  ! print *,'nspec_outer,target_nb_elems_per_color = ',nspec_outer,target_nb_elems_per_color
+
+  ! count the initial number of elements in each color
+  nb_elems_in_this_color(:) = 0
+  do icolor = icolormin,icolormax
+    nb_elems_in_this_color(icolor) = count(color == icolor)
+  enddo
+
+  ! do not balance the last one, because it will be balanced automatically by the others
+  do icolor = icolormin,icolormax-1 
+
+    ! if color is already balanced, do nothing
+    ! (this works because in the initial set the number of elements per color decreases when the color number increases)
+    if(nb_elems_in_this_color(icolor) <= target_nb_elems_per_color) cycle
+
     mask_ibool(:) = .false.
     last_ispec_studied = -1
 
@@ -450,9 +795,15 @@
       ! domain elements only
       if(ispec_is_d(ispec)) then
 
-        ! only inner elements
-        if (is_on_a_slice_edge(ispec)) cycle
+        ! only outer elements
+        if (.not. is_on_a_slice_edge(ispec)) cycle
 
+        ! only elements of this color
+        if (color(ispec) /= icolor) cycle
+
+        ! if color is now balanced because we have moved enough elements then stop searching
+        if(nb_elems_in_this_color(icolor) <= target_nb_elems_per_color) exit
+
         ! unmark the eight corners of the previously marked element
         if(last_ispec_studied > 0) then
           mask_ibool(ibool(1,1,1,last_ispec_studied)) = .false.
@@ -465,6 +816,7 @@
           mask_ibool(ibool(1,NGLLY,NGLLZ,last_ispec_studied)) = .false.
         endif
         icolor_conflict_found(icolormin:icolormax) = .false.
+        icolor_conflict_found(icolor) = .true. ! cannot move element to the color it already has
 
         ! mark the eight corners of the current element
         mask_ibool(ibool(1,1,1,ispec)) = .true.
@@ -477,141 +829,198 @@
         mask_ibool(ibool(1,NGLLY,NGLLZ,ispec)) = .true.
         last_ispec_studied = ispec
 
-        if(ispec > 1) then
-          do ispec2 = 1,ispec - 1
-            ! domain elements only
-            if(ispec_is_d(ispec)) then
+        ! test if we can move this element to another color
+        do ispec2 = 1,nspec
+          ! domain elements only
+          if(ispec_is_d(ispec2)) then
 
-              ! only inner elements
-              if (is_on_a_slice_edge(ispec2)) cycle
+            ! do not test that element itself
+            if (ispec2 == ispec) cycle
 
-              ! if conflict already found previously with this color, no need to test again
-              if (icolor_conflict_found(color(ispec2))) cycle
+            ! only outer elements
+            if (.not. is_on_a_slice_edge(ispec2)) cycle
 
-              ! test the eight corners of the current element for a common point with element under study
-              if (mask_ibool(ibool(1,1,1,ispec2)) .or. &
-                  mask_ibool(ibool(NGLLX,1,1,ispec2)) .or. &
-                  mask_ibool(ibool(NGLLX,NGLLY,1,ispec2)) .or. &
-                  mask_ibool(ibool(1,NGLLY,1,ispec2)) .or. &
-                  mask_ibool(ibool(1,1,NGLLZ,ispec2)) .or. &
-                  mask_ibool(ibool(NGLLX,1,NGLLZ,ispec2)) .or. &
-                  mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
-                  mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
+            ! if conflict already found previously with this color, no need to test again
+            if (icolor_conflict_found(color(ispec2))) cycle
+
+            ! test the eight corners of the current element for a common point with element under study
+            if (mask_ibool(ibool(1,1,1,ispec2)) .or. &
+              mask_ibool(ibool(NGLLX,1,1,ispec2)) .or. &
+              mask_ibool(ibool(NGLLX,NGLLY,1,ispec2)) .or. &
+              mask_ibool(ibool(1,NGLLY,1,ispec2)) .or. &
+              mask_ibool(ibool(1,1,NGLLZ,ispec2)) .or. &
+              mask_ibool(ibool(NGLLX,1,NGLLZ,ispec2)) .or. &
+              mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
+              mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
                 icolor_conflict_found(color(ispec2)) = .true.
-            endif ! domain elements
-          enddo
-        endif
+          endif ! domain elements
+        enddo
 
-        ! check if the Droux 1993 algorithm found a solution
-        if (all(icolor_conflict_found(icolormin:icolormax))) then
-          ! user output
-          !if(myrank == 0) write(IMAIN,*) '     Droux 1993 algorithm did not find any solution for ncolors = ',ncolors
+        ! if color is already above target size for a balanced set, do not move to it
+        do icolor_target = icolormin,icolormax
+          if(nb_elems_in_this_color(icolor_target) >= target_nb_elems_per_color) &
+            icolor_conflict_found(icolor_target) = .true.
+        enddo
 
-          ! try with one more color
-          if(nb_tries_of_Droux_1993 < MAX_NB_TRIES_OF_DROUX_1993) then
-            nb_colors_inner_elements = nb_colors_inner_elements + 1
-            deallocate(nb_elems_in_this_color)
-            deallocate(icolor_conflict_found)
-            nb_tries_of_Droux_1993 = nb_tries_of_Droux_1993 + 1
-            goto 765
-          else
-            ! fail-safe mechanism: if Droux 1993 still fails after all the tries with one more color,
-            ! then go back to my original simple and fast coloring algorithm
-            try_Droux_coloring = .false.
-            if(myrank == 0) write(IMAIN,*) '     giving up on Droux 1993 algorithm, calling fail-safe mechanism'
-            goto 999
-          endif
-        endif
+        ! if cannot find any other color to move this element to
+        if (all(icolor_conflict_found(icolormin:icolormax))) cycle
 
-        ! loop on all the colors to determine the color with the smallest number
-        ! of elements and for which there is no conflict
+        ! loop on all the colors to determine the color with the smallest number of elements 
+        ! and for which there is no conflict
         nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
-        do icolor = icolormin,icolormax
-          if (.not. icolor_conflict_found(icolor) .and. nb_elems_in_this_color(icolor) < nb_elems_in_color_chosen) then
-            icolor_chosen = icolor
-            nb_elems_in_color_chosen = nb_elems_in_this_color(icolor)
+        do icolor_target = icolormin,icolormax
+          if (.not. icolor_conflict_found(icolor_target) .and. &
+             nb_elems_in_this_color(icolor_target) < nb_elems_in_color_chosen) then
+            icolor_chosen = icolor_target
+            nb_elems_in_color_chosen = nb_elems_in_this_color(icolor_target)
           endif
         enddo
 
-        ! store the color finally chosen
+        ! move the element to that new color
+        ! remove element from its current color
+        nb_elems_in_this_color(color(ispec)) = nb_elems_in_this_color(color(ispec)) - 1 
         color(ispec) = icolor_chosen
-        nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
-
+        ! and add it to the new color
+        nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1 
+        
       endif ! domain elements
     enddo
 
-    ! debug output
-    if(myrank == 0) then
-      write(IMAIN,*) '     created a total of ',maxval(color),' colors in this domain' ! 'for all the domain elements of the mesh'
-      if( nb_colors_outer_elements > 0 ) &
-        write(IMAIN,*) '     typical nb of elements per color for outer elements should be ', &
-          nspec_outer / nb_colors_outer_elements
-      if( nb_colors_inner_elements > 0 ) &
-        write(IMAIN,*) '     typical nb of elements per color for inner elements should be ', &
-          nspec_inner / nb_colors_inner_elements
-    endif
+  enddo ! icolor
 
-  endif ! of if(try_Droux_coloring)
+!! DK DK do it for inner elements
 
-!!!!!!!!!! DK DK Nov 2011 added this again to create more balanced colors according to JJ Droux (1993)
+  icolormin = nb_colors_outer_elements + 1
+  icolormax = ncolors
 
-  !!!!!!!! DK DK now check that all the color sets are independent
-  do icolor = 1,maxval(color)
+  ! ideal value if all colors are perfectly balanced
+  if( nb_colors_inner_elements > 0 ) then
+    target_nb_elems_per_color = nspec_inner / nb_colors_inner_elements + 1
+  else
+    target_nb_elems_per_color = 1
+  endif
+  ! print *,'nspec_inner,target_nb_elems_per_color = ',nspec_inner,target_nb_elems_per_color
+
+  ! count the initial number of elements in each color
+  nb_elems_in_this_color(:) = 0
+  do icolor = icolormin,icolormax
+    nb_elems_in_this_color(icolor) = count(color == icolor)
+  enddo
+
+  ! do not balance the last one, because it will be balanced automatically by the others
+  do icolor = icolormin,icolormax-1 
+
+    ! if color is already balanced, do nothing
+    ! (this works because in the initial set the number of elements per color decreases when the color number increases)
+    if(nb_elems_in_this_color(icolor) <= target_nb_elems_per_color) cycle
+
     mask_ibool(:) = .false.
+    last_ispec_studied = -1
+
     do ispec = 1,nspec
       ! domain elements only
       if(ispec_is_d(ispec)) then
-        if(color(ispec) == icolor ) then
-          ! the eight corners of the current element
-          iglob1=ibool(1,1,1,ispec)
-          iglob2=ibool(NGLLX,1,1,ispec)
-          iglob3=ibool(NGLLX,NGLLY,1,ispec)
-          iglob4=ibool(1,NGLLY,1,ispec)
-          iglob5=ibool(1,1,NGLLZ,ispec)
-          iglob6=ibool(NGLLX,1,NGLLZ,ispec)
-          iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
-          iglob8=ibool(1,NGLLY,NGLLZ,ispec)
 
-          if(mask_ibool(iglob1) .or. mask_ibool(iglob2) .or. mask_ibool(iglob3) .or. mask_ibool(iglob4) .or. &
-             mask_ibool(iglob5) .or. mask_ibool(iglob6) .or. mask_ibool(iglob7) .or. mask_ibool(iglob8)) then
-            ! if element of this color has a common point with another element of that same color
-            ! then there is a problem, the color set is not correct
-            print*,'error check color:',icolor
-            stop 'error detected: found a common point inside a color set'
-          else
-            mask_ibool(iglob1) = .true.
-            mask_ibool(iglob2) = .true.
-            mask_ibool(iglob3) = .true.
-            mask_ibool(iglob4) = .true.
-            mask_ibool(iglob5) = .true.
-            mask_ibool(iglob6) = .true.
-            mask_ibool(iglob7) = .true.
-            mask_ibool(iglob8) = .true.
-          endif
+        ! only inner elements
+        if (is_on_a_slice_edge(ispec)) cycle
+
+        ! only elements of this color
+        if (color(ispec) /= icolor) cycle
+
+        ! if color is now balanced because we have moved enough elements then stop searching
+        if(nb_elems_in_this_color(icolor) <= target_nb_elems_per_color) exit
+
+        ! unmark the eight corners of the previously marked element
+        if(last_ispec_studied > 0) then
+          mask_ibool(ibool(1,1,1,last_ispec_studied)) = .false.
+          mask_ibool(ibool(NGLLX,1,1,last_ispec_studied)) = .false.
+          mask_ibool(ibool(NGLLX,NGLLY,1,last_ispec_studied)) = .false.
+          mask_ibool(ibool(1,NGLLY,1,last_ispec_studied)) = .false.
+          mask_ibool(ibool(1,1,NGLLZ,last_ispec_studied)) = .false.
+          mask_ibool(ibool(NGLLX,1,NGLLZ,last_ispec_studied)) = .false.
+          mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,last_ispec_studied)) = .false.
+          mask_ibool(ibool(1,NGLLY,NGLLZ,last_ispec_studied)) = .false.
         endif
-      endif
-    enddo
+        icolor_conflict_found(icolormin:icolormax) = .false.
+        icolor_conflict_found(icolor) = .true. ! cannot move element to the color it already has
 
-    !debug output
-    !if(myrank == 0) print *,'  color ',icolor,' has disjoint elements only and is therefore OK'
-    !if(myrank == 0) print *,'  color ',icolor,' contains ',count(color == icolor),' elements'
-  enddo
+        ! mark the eight corners of the current element
+        mask_ibool(ibool(1,1,1,ispec)) = .true.
+        mask_ibool(ibool(NGLLX,1,1,ispec)) = .true.
+        mask_ibool(ibool(NGLLX,NGLLY,1,ispec)) = .true.
+        mask_ibool(ibool(1,NGLLY,1,ispec)) = .true.
+        mask_ibool(ibool(1,1,NGLLZ,ispec)) = .true.
+        mask_ibool(ibool(NGLLX,1,NGLLZ,ispec)) = .true.
+        mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec)) = .true.
+        mask_ibool(ibool(1,NGLLY,NGLLZ,ispec)) = .true.
+        last_ispec_studied = ispec
 
-  ! debug output
-  if(myrank == 0) then
-    !print*, '     the ',maxval(color),' color sets are OK'
-  endif
+        ! test if we can move this element to another color
+        do ispec2 = 1,nspec
+          ! domain elements only
+          if(ispec_is_d(ispec2)) then
 
-  deallocate(mask_ibool)
+            ! do not test that element itself
+            if (ispec2 == ispec) cycle
 
-  end subroutine get_color_faster
+            ! only inner elements
+            if (is_on_a_slice_edge(ispec2)) cycle
 
+            ! if conflict already found previously with this color, no need to test again
+            if (icolor_conflict_found(color(ispec2))) cycle
 
+            ! test the eight corners of the current element for a common point with element under study
+            if (mask_ibool(ibool(1,1,1,ispec2)) .or. &
+              mask_ibool(ibool(NGLLX,1,1,ispec2)) .or. &
+              mask_ibool(ibool(NGLLX,NGLLY,1,ispec2)) .or. &
+              mask_ibool(ibool(1,NGLLY,1,ispec2)) .or. &
+              mask_ibool(ibool(1,1,NGLLZ,ispec2)) .or. &
+              mask_ibool(ibool(NGLLX,1,NGLLZ,ispec2)) .or. &
+              mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
+              mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
+                icolor_conflict_found(color(ispec2)) = .true.
+  
+          endif ! domain elements
+        enddo
+
+        ! if color is already above target size for a balanced set, do not move to it
+        do icolor_target = icolormin,icolormax
+          if(nb_elems_in_this_color(icolor_target) >= target_nb_elems_per_color) &
+            icolor_conflict_found(icolor_target) = .true.
+        enddo
+
+        ! if cannot find any other color to move this element to
+        if (all(icolor_conflict_found(icolormin:icolormax))) cycle 
+
+        ! loops on all the colors to determine the color with the smallest number of elements 
+        ! and for which there is no conflict
+        nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
+        do icolor_target = icolormin,icolormax
+          if (.not. icolor_conflict_found(icolor_target) .and. &
+            nb_elems_in_this_color(icolor_target) < nb_elems_in_color_chosen) then
+            icolor_chosen = icolor_target
+            nb_elems_in_color_chosen = nb_elems_in_this_color(icolor_target)
+          endif
+        enddo
+
+        ! moves the element to that new color
+        ! remove element from its current color
+        nb_elems_in_this_color(color(ispec)) = nb_elems_in_this_color(color(ispec)) - 1
+        color(ispec) = icolor_chosen
+        ! and add it to the new color
+        nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1 
+      
+      endif ! domain elements
+    enddo
+
+  enddo ! icolor
+
+  end subroutine balance_colors_simple
+
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-
   subroutine get_final_perm(color,perm,first_elem_number_in_this_color, &
                             nspec,nb_colors,nb_colors_outer_elements, &
                             ispec_is_d,nspec_domain)
@@ -673,501 +1082,10 @@
 
   end subroutine get_final_perm
 
-!
-!-------------------------------------------------------------------------------------------------
-!
 
-! unused...
-!  subroutine get_perm_color(is_on_a_slice_edge,ispec_is_d, &
-!                            ibool,perm,nspec,nglob, &
-!                            nb_colors_outer_elements,nb_colors_inner_elements, &
-!                            nspec_outer,first_elem_number_in_this_color,myrank)
 !
-!  implicit none
-!
-!  include "constants.h"
-!  integer, parameter :: NGNOD_HEXAHEDRA = 8
-!
-!  logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
-!
-!  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-!  integer, dimension(nspec) :: perm
-!  integer, dimension(nspec) :: color
-!  integer, dimension(MAX_NUMBER_OF_COLORS+1) :: first_elem_number_in_this_color
-!  integer :: nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer,myrank
-!
-!  ! local variables
-!  integer nspec,nglob_GLL_full
-!
-!  ! a neighbor of a hexahedral node is a hexahedron that shares a face with it -> max degree of a node = 6
-!  integer, parameter :: MAX_NUMBER_OF_NEIGHBORS = 100
-!
-!  ! global corner numbers that need to be created
-!  integer, dimension(nglob) :: global_corner_number
-!
-!  integer mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1)
-!  integer, dimension(:), allocatable :: ne,np,adj
-!  integer xadj(nspec+1)
-!
-!  !logical maskel(nspec)
-!
-!  integer i,istart,istop,number_of_neighbors
-!
-!  integer nglob_eight_corners_only,nglob
-!
-!  ! only count the total size of the array that will be created, or actually create it
-!  logical count_only
-!  integer total_size_ne,total_size_adj
-!
-!
-!  ! total number of points in the mesh
-!  nglob_GLL_full = nglob
-!
-!  !---- call Charbel Farhat's routines
-!  if(myrank == 0) &
-!    write(IMAIN,*) 'calling form_elt_connectivity_foelco to perform mesh coloring and inner/outer element splitting'
-!  call form_elt_connectivity_foelco(mn,mp,nspec,global_corner_number,nglob_GLL_full,ibool,nglob_eight_corners_only)
-!  do i=1,nspec
-!    istart = mp(i)
-!    istop = mp(i+1) - 1
-!  enddo
-!
-!  ! count only, to determine the size needed for the array
-!  allocate(np(nglob_eight_corners_only+1))
-!  count_only = .true.
-!  total_size_ne = 1
-!  if(myrank == 0) write(IMAIN,*) 'calling form_node_connectivity_fonoco to determine the size of the table'
-!  allocate(ne(total_size_ne))
-!  call form_node_connectivity_fonoco(mn,mp,ne,np,nglob_eight_corners_only,nspec,count_only,total_size_ne)
-!  deallocate(ne)
-!
-!  !print *, 'nglob_eight_corners_only'
-!  !print *, nglob_eight_corners_only
-!
-!  ! allocate the array with the right size
-!  allocate(ne(total_size_ne))
-!
-!  ! now actually generate the array
-!  count_only = .false.
-!  if(myrank == 0) write(IMAIN,*) 'calling form_node_connectivity_fonoco to actually create the table'
-!  call form_node_connectivity_fonoco(mn,mp,ne,np,nglob_eight_corners_only,nspec,count_only,total_size_ne)
-!  do i=1,nglob_eight_corners_only
-!    istart = np(i)
-!    istop = np(i+1) - 1
-!  enddo
-!
-!  !print *, 'total_size_ne'
-!  !print *, total_size_ne
-!
-!  ! count only, to determine the size needed for the array
-!  count_only = .true.
-!  total_size_adj = 1
-!  if(myrank == 0) write(IMAIN,*) 'calling create_adjacency_table_adjncy to determine the size of the table'
-!  allocate(adj(total_size_adj))
-!  !call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,maskel,nspec,nglob_eight_corners_only,&
-!  !count_only,total_size_ne,total_size_adj,.false.)
-!  call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,nspec,nglob_eight_corners_only,&
-!  count_only,total_size_ne,total_size_adj,.false.)
-!  deallocate(adj)
-!
-!  ! allocate the array with the right size
-!  allocate(adj(total_size_adj))
-!
-!  ! now actually generate the array
-!  count_only = .false.
-!  if(myrank == 0) write(IMAIN,*) 'calling create_adjacency_table_adjncy again to actually create the table'
-!  !call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,maskel,nspec,nglob_eight_corners_only,&
-!  !count_only,total_size_ne,total_size_adj,.false.)
-!  call create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,nspec,nglob_eight_corners_only,&
-!  count_only,total_size_ne,total_size_adj,.false.)
-!
-!  do i=1,nspec
-!    istart = xadj(i)
-!    istop = xadj(i+1) - 1
-!    number_of_neighbors = istop-istart+1
-!    if(number_of_neighbors < 1 .or. number_of_neighbors > MAX_NUMBER_OF_NEIGHBORS) stop 'incorrect number of neighbors'
-!  enddo
-!
-!  deallocate(ne,np)
-!
-!  call get_color(adj,xadj,color,nspec,total_size_adj, &
-!                is_on_a_slice_edge,ispec_is_d, &
-!                nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer)
-!
-!  if(myrank == 0) then
-!    write(IMAIN,*) '  number of colors of the graph for inner elements = ',nb_colors_inner_elements
-!    write(IMAIN,*) '  number of colors of the graph for outer elements = ',nb_colors_outer_elements
-!    write(IMAIN,*) '  total number of colors of the graph (sum of both) = ', nb_colors_inner_elements + nb_colors_outer_elements
-!    write(IMAIN,*) '  number of elements of the graph for outer elements = ',nspec_outer
-!  endif
-!
-!  deallocate(adj)
-!
-!  if(myrank == 0) write(IMAIN,*) 'generating the final colors'
-!  first_elem_number_in_this_color(:) = -1
-!  call get_final_perm(color,perm,first_elem_number_in_this_color,nspec,nb_colors_inner_elements+nb_colors_outer_elements)
-!
-!  if(myrank == 0) write(IMAIN,*) 'done with mesh coloring and inner/outer element splitting'
-!
-!  end subroutine get_perm_color
-
-!
 !-------------------------------------------------------------------------------------------------
 !
-
-!unused...
-!  subroutine get_color(adj,xadj,color,nspec,total_size_adj, &
-!                      is_on_a_slice_edge,ispec_is_d, &
-!                      nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer)
-!
-!  integer, intent(in) :: nspec,total_size_adj
-!  integer, intent(in) :: adj(total_size_adj),xadj(nspec+1)
-!  integer :: color(nspec)
-!  integer :: this_color,nb_already_colored,ispec,ixadj,ok
-!  logical, dimension(nspec) :: is_on_a_slice_edge,ispec_is_d
-!  integer :: nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer
-!  logical :: is_outer_element(nspec)
-!
-!  nspec_outer = 0
-!
-!  is_outer_element(:) = .false.
-!
-!  do ispec=1,nspec
-!    if(ispec_is_d(ispec)) then
-!      if (is_on_a_slice_edge(ispec)) then
-!        is_outer_element(ispec) = .true.
-!        nspec_outer=nspec_outer+1
-!      endif
-!    endif
-!  enddo
-!
-!  ! outer elements
-!  color(:) = 0
-!  this_color = 0
-!  nb_already_colored = 0
-!  do while(nb_already_colored<nspec_outer)
-!    this_color = this_color + 1
-!    do ispec = 1, nspec
-!      if(ispec_is_d(ispec)) then
-!        if (is_outer_element(ispec)) then
-!          if (color(ispec) == 0) then
-!            ok = 1
-!            do ixadj = xadj(ispec), (xadj(ispec+1)-1)
-!              if (is_outer_element(adj(ixadj)) .and. color(adj(ixadj)) == this_color) ok = 0
-!            enddo
-!            if (ok /= 0) then
-!              color(ispec) = this_color
-!              nb_already_colored = nb_already_colored + 1
-!            endif
-!          endif
-!        endif
-!      endif
-!    enddo
-!  enddo
-!  nb_colors_outer_elements = this_color
-!
-!  ! inner elements
-!  do while(nb_already_colored<nspec)
-!    this_color = this_color + 1
-!    do ispec = 1, nspec
-!      if(ispec_is_d(ispec)) then
-!        if (.not. is_outer_element(ispec)) then
-!          if (color(ispec) == 0) then
-!            ok = 1
-!            do ixadj = xadj(ispec), (xadj(ispec+1)-1)
-!              if (.not. is_outer_element(adj(ixadj)) .and. color(adj(ixadj)) == this_color) ok = 0
-!            enddo
-!            if (ok /= 0) then
-!              color(ispec) = this_color
-!              nb_already_colored = nb_already_colored + 1
-!            endif
-!          endif
-!        endif
-!      endif
-!    enddo
-!  enddo
-!
-!  nb_colors_inner_elements = this_color - nb_colors_outer_elements
-!
-!end subroutine get_color
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-!unused...
-!
-!!=======================================================================
-!!
-!!  Charbel Farhat's FEM topology routines
-!!
-!!  Dimitri Komatitsch, February 1996 - Code based on Farhat's original version from 1987
-!!
-!!  modified and adapted by Dimitri Komatitsch, May 2006
-!!
-!!=======================================================================
-!
-!  subroutine form_elt_connectivity_foelco(mn,mp,nspec,global_corner_number,&
-!                                          nglob_GLL_full,ibool,nglob_eight_corners_only)
-!
-!!-----------------------------------------------------------------------
-!!
-!!   Forms the MN and MP arrays
-!!
-!!     Input :
-!!     -------
-!!           ibool    Array needed to build the element connectivity table
-!!           nspec    Number of elements in the domain
-!!           NGNOD_HEXAHEDRA    number of nodes per hexahedron (brick with 8 corners)
-!!
-!!     Output :
-!!     --------
-!!           MN, MP   This is the element connectivity array pair.
-!!                    Array MN contains the list of the element
-!!                    connectivity, that is, the nodes contained in each
-!!                    element. They are stored in a stacked fashion.
-!!
-!!                    Pointer array MP stores the location of each
-!!                    element list. Its length is equal to the number
-!!                    of elements plus one.
-!!
-!!-----------------------------------------------------------------------
-!
-!  implicit none
-!
-!  include "constants.h"
-!  integer, parameter :: NGNOD_HEXAHEDRA = 8
-!
-!  integer nspec,nglob_GLL_full
-!
-!  ! arrays with mesh parameters per slice
-!  integer, intent(in), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-!
-!  ! global corner numbers that need to be created
-!  integer, intent(out), dimension(nglob_GLL_full) :: global_corner_number
-!  integer, intent(out) :: mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1)
-!  integer, intent(out) :: nglob_eight_corners_only
-!
-!  integer ninter,nsum,ispec,node,k,inumcorner,ix,iy,iz
-!
-!  ninter = 1
-!  nsum = 1
-!  mp(1) = 1
-!
-!  !---- define topology of the elements in the mesh
-!  !---- we need to define adjacent numbers from the sub-mesh consisting of the corners only
-!  nglob_eight_corners_only = 0
-!  global_corner_number(:) = -1
-!
-!  do ispec=1,nspec
-!
-!    inumcorner = 0
-!    do iz = 1,NGLLZ,NGLLZ-1
-!      do iy = 1,NGLLY,NGLLY-1
-!        do ix = 1,NGLLX,NGLLX-1
-!
-!          inumcorner = inumcorner + 1
-!          if(inumcorner > NGNOD_HEXAHEDRA) stop 'corner number too large'
-!
-!          ! check if this point was already assigned a number previously, otherwise create one and store it
-!          if(global_corner_number(ibool(ix,iy,iz,ispec)) == -1) then
-!            nglob_eight_corners_only = nglob_eight_corners_only + 1
-!            global_corner_number(ibool(ix,iy,iz,ispec)) = nglob_eight_corners_only
-!          endif
-!
-!          node = global_corner_number(ibool(ix,iy,iz,ispec))
-!            do k=nsum,ninter-1
-!              if(node == mn(k)) goto 200
-!            enddo
-!
-!            mn(ninter) = node
-!            ninter = ninter + 1
-!  200 continue
-!
-!        enddo
-!      enddo
-!    enddo
-!
-!      nsum = ninter
-!      mp(ispec + 1) = nsum
-!
-!  enddo
-!
-!  end subroutine form_elt_connectivity_foelco
-!
-!!
-!!-------------------------------------------------------------------------------------------------
-!!
-!
-!  subroutine form_node_connectivity_fonoco(mn,mp,ne,np,nglob_eight_corners_only,&
-!                                           nspec,count_only,total_size_ne)
-!
-!!-----------------------------------------------------------------------
-!!
-!!   Forms the NE and NP arrays
-!!
-!!     Input :
-!!     -------
-!!           MN, MP, nspec
-!!           nglob_eight_corners_only    Number of nodes in the domain
-!!
-!!     Output :
-!!     --------
-!!           NE, NP   This is the node-connected element array pair.
-!!                    Integer array NE contains a list of the
-!!                    elements connected to each node, stored in stacked fashion.
-!!
-!!                    Array NP is the pointer array for the
-!!                    location of a node's element list in the NE array.
-!!                    Its length is equal to the number of points plus one.
-!!
-!!-----------------------------------------------------------------------
-!
-!  implicit none
-!
-!  include "constants.h"
-!  integer, parameter :: NGNOD_HEXAHEDRA = 8
-!
-!  ! only count the total size of the array that will be created, or actually create it
-!  logical count_only
-!  integer total_size_ne
-!
-!  integer nglob_eight_corners_only,nspec
-!
-!  integer, intent(in) ::  mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1)
-!
-!  integer, intent(out) ::  ne(total_size_ne),np(nglob_eight_corners_only+1)
-!
-!  integer nsum,inode,ispec,j
-!
-!  nsum = 1
-!  np(1) = 1
-!
-!  do inode=1,nglob_eight_corners_only
-!      do 200 ispec=1,nspec
-!
-!            do j=mp(ispec),mp(ispec + 1) - 1
-!                  if (mn(j) == inode) then
-!                        if(count_only) then
-!                          total_size_ne = nsum
-!                        else
-!                          ne(nsum) = ispec
-!                        endif
-!                        nsum = nsum + 1
-!                        goto 200
-!                  endif
-!            enddo
-!  200 continue
-!
-!      np(inode + 1) = nsum
-!
-!  enddo
-!
-!  end subroutine form_node_connectivity_fonoco
-!
-!!
-!!-------------------------------------------------------------------------------------------------
-!!
-!
-!  !subroutine create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,maskel,nspec,nglob_eight_corners_only,&
-!  !                                         count_only,total_size_ne,total_size_adj,face)
-!
-!  subroutine create_adjacency_table_adjncy(mn,mp,ne,np,adj,xadj,nspec,nglob_eight_corners_only,&
-!                                           count_only,total_size_ne,total_size_adj,face)
-!
-!!-----------------------------------------------------------------------
-!!
-!!   Establishes the element adjacency information of the mesh
-!!   Two elements are considered adjacent if they share a face.
-!!
-!!     Input :
-!!     -------
-!!           MN, MP, NE, NP, nspec
-!!           MASKEL    logical mask (length = nspec)
-!!
-!!     Output :
-!!     --------
-!!           ADJ, XADJ This is the element adjacency array pair. Array
-!!                     ADJ contains the list of the elements adjacent to
-!!                     element i. They are stored in a stacked fashion.
-!!                     Pointer array XADJ stores the location of each element list.
-!!
-!!-----------------------------------------------------------------------
-!
-!  implicit none
-!
-!  include "constants.h"
-!  integer, parameter :: NGNOD_HEXAHEDRA = 8
-!
-!  ! only count the total size of the array that will be created, or actually create it
-!  logical count_only,face
-!  integer total_size_ne,total_size_adj
-!
-!  integer nglob_eight_corners_only
-!
-!  integer, intent(in) :: mn(nspec*NGNOD_HEXAHEDRA),mp(nspec+1),ne(total_size_ne),np(nglob_eight_corners_only+1)
-!
-!  integer, intent(out) :: adj(total_size_adj),xadj(nspec+1)
-!
-!  logical maskel(nspec)
-!  integer countel(nspec)
-!
-!  integer nspec,iad,ispec,istart,istop,ino,node,jstart,jstop,nelem,jel
-!
-!  xadj(1) = 1
-!  iad = 1
-!
-!  do ispec=1,nspec
-!
-!  ! reset mask
-!  maskel(:) = .false.
-!
-!  ! mask current element
-!  maskel(ispec) = .true.
-!  if (face) countel(:) = 0
-!
-!  istart = mp(ispec)
-!  istop = mp(ispec+1) - 1
-!    do ino=istart,istop
-!      node = mn(ino)
-!      jstart = np(node)
-!      jstop = np(node + 1) - 1
-!        do 120 jel=jstart,jstop
-!            nelem = ne(jel)
-!            if(maskel(nelem)) goto 120
-!            if (face) then
-!              ! if 2 elements share at least 3 corners, therefore they share a face
-!              countel(nelem) = countel(nelem) + 1
-!              if (countel(nelem)>=3) then
-!                if(count_only) then
-!                  total_size_adj = iad
-!                else
-!                  adj(iad) = nelem
-!                endif
-!                maskel(nelem) = .true.
-!                iad = iad + 1
-!              endif
-!            else
-!              if(count_only) then
-!                total_size_adj = iad
-!              else
-!                adj(iad) = nelem
-!              endif
-!              maskel(nelem) = .true.
-!              iad = iad + 1
-!            endif
-!  120   continue
-!    enddo
-!
-!    xadj(ispec+1) = iad
-!
-!  enddo
-!
-!  end subroutine create_adjacency_table_adjncy
-
-!
-!-------------------------------------------------------------------------------------------------
-!
 ! PERMUTATIONS
 !
 !-------------------------------------------------------------------------------------------------

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in	2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in	2011-11-29 04:28:42 UTC (rev 19246)
@@ -258,10 +258,15 @@
   logical, parameter :: USE_MESH_COLORING_GPU = .false.
   integer, parameter :: MAX_NUMBER_OF_COLORS = 10000
 
-! enhanced coloring using Droux algorithm
+! enhanced coloring:
+!
+! using Droux algorithm
 ! try several times with one more color before giving up
   logical, parameter :: USE_DROUX_OPTIMIZATION = .false.
   integer, parameter :: MAX_NB_TRIES_OF_DROUX_1993 = 15
+!  
+! postprocess the colors to balance them if Droux (1993) algorithm is not used
+  logical, parameter :: BALANCE_COLORS_SIMPLE_ALGO = .true.
 
 !------------------------------------------------------
 !----------- do not modify anything below -------------

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90	2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90	2011-11-29 04:28:42 UTC (rev 19246)
@@ -703,10 +703,11 @@
   real(kind=CUSTOM_REAL), dimension(nglob) :: xstore,ystore,zstore
 
 !local parameters
-  real(kind=CUSTOM_REAL) :: mindist
+  real(kind=CUSTOM_REAL) :: mindist,distance
   integer, dimension(:), allocatable :: valence_external_mesh
   integer :: ispec,i,j,k,iglob,ier,count
-
+  real(kind=CUSTOM_REAL),parameter :: TOLERANCE_DISTANCE = 0.9
+ 
 ! detecting surface points/elements (based on valence check on NGLL points) for external mesh
   allocate(valence_external_mesh(nglob),stat=ier)
   if( ier /= 0 ) stop 'error allocate valence array'
@@ -722,7 +723,8 @@
                   + (ystore(ibool(1,1,1,:)) - ystore(ibool(2,1,1,:)))**2 &
                   + (zstore(ibool(1,1,1,:)) - zstore(ibool(2,1,1,:)))**2 )
   mindist = sqrt(mindist)
-
+  distance = TOLERANCE_DISTANCE*mindist
+  
 ! sets valence value to one corresponding to process rank  for points on cross-sections
   do ispec = 1, nspec
     do k = 1, NGLLZ
@@ -732,7 +734,7 @@
 
           ! chooses points close to cross-section
           if( abs((xstore(iglob)-section_xorg)*section_nx + (ystore(iglob)-section_yorg)*section_ny &
-                 + (zstore(iglob)-section_zorg)*section_nz )  < 0.8*mindist ) then
+                 + (zstore(iglob)-section_zorg)*section_nz )  < distance ) then
             ! sets valence to 1 for points on cross-sections
             valence_external_mesh(iglob) = myrank+1
           endif

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90	2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90	2011-11-29 04:28:42 UTC (rev 19246)
@@ -118,6 +118,7 @@
   real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: dist_pixel_image,dist_pixel_recv
   real(kind=CUSTOM_REAL):: pixel_midpoint_x,pixel_midpoint_z,x_loc,z_loc,xtmp,ztmp
   real(kind=CUSTOM_REAL):: ratio
+  real(kind=CUSTOM_REAL):: distance_x1,distance_x2,distance_z1,distance_z2
   integer:: npgeo,npgeo_glob
   integer:: i,j,k,iproc,iglob,ispec,ier
   ! data from mesh
@@ -263,11 +264,20 @@
     NZ_IMAGE_color = NZ_IMAGE_color * zoom_factor
     zoom = .true.
   endif
-
+  
   ! create all the pixels
-  size_pixel_horizontal = (xmax_color_image - xmin_color_image) / dble(NX_IMAGE_color)
-  size_pixel_vertical = (zmax_color_image - zmin_color_image) / dble(NZ_IMAGE_color)
-
+  if( NX_IMAGE_color /= 0 ) then
+    size_pixel_horizontal = (xmax_color_image - xmin_color_image) / dble(NX_IMAGE_color)
+  else
+    size_pixel_horizontal = 0.0
+  endif
+  
+  if( NZ_IMAGE_color /= 0 ) then
+    size_pixel_vertical = (zmax_color_image - zmin_color_image) / dble(NZ_IMAGE_color)
+  else
+    size_pixel_vertical = 0.0
+  endif
+  
   if (myrank == 0) then
     write(IMAIN,*) '  image points: ',npgeo_glob
     write(IMAIN,*) '  xmin/xmax: ',xmin_color_image,'/',xmax_color_image
@@ -287,6 +297,19 @@
   iglob_image_color(:,:) = -1
   ispec_image_color(:,:) = 0
   dist_pixel_image(:,:) = HUGEVAL
+
+  if( zoom ) then
+    distance_x1 = zoom_factor*size_pixel_horizontal
+    distance_x2 = (zoom_factor+1)*size_pixel_horizontal
+    distance_z1 = zoom_factor*size_pixel_vertical
+    distance_z2 = (zoom_factor+1)*size_pixel_vertical
+  else
+    distance_x1 = 0.0
+    distance_x2 = 2.0*size_pixel_horizontal
+    distance_z1 = 0.0
+    distance_z2 = 2.0*size_pixel_vertical
+  endif
+  
   do j=1,NZ_IMAGE_color
     do i=1,NX_IMAGE_color
       ! calculates midpoint of pixel
@@ -309,15 +332,8 @@
         z_loc = zcoord(iglob)
 
         ! checks if inside pixel range for larger numbers of points, minimizing computation time
-        if( zoom ) then
-          if( x_loc < xtmp-zoom_factor*size_pixel_horizontal .or. &
-             x_loc > xtmp + (zoom_factor+1)*size_pixel_horizontal ) cycle
-          if( z_loc < ztmp-zoom_factor*size_pixel_vertical .or. &
-             z_loc > ztmp + (zoom_factor+1)*size_pixel_vertical ) cycle
-        else
-          if( x_loc < xtmp .or. x_loc > xtmp + size_pixel_horizontal ) cycle
-          if( z_loc < ztmp .or. z_loc > ztmp + size_pixel_vertical ) cycle
-        endif
+        if( x_loc < xtmp - distance_x1 .or. x_loc > xtmp + distance_x2 ) cycle
+        if( z_loc < ztmp - distance_z1 .or. z_loc > ztmp + distance_z2 ) cycle
 
         ! stores closest iglob
         x_loc = pixel_midpoint_x - x_loc

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90	2011-11-29 04:02:32 UTC (rev 19245)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90	2011-11-29 04:28:42 UTC (rev 19246)
@@ -196,7 +196,7 @@
         where( mustore > TINYVAL ) 
           rhostore = (rho_vs*rho_vs) / mustore
         elsewhere
-          rhostore(:,:,:,:) = 0.0_CUSTOM_REAL
+          rhostore = 0.0_CUSTOM_REAL
         endwhere
       endif
     endif

Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/Cluster/pbs/valgrind_go_solver_pbs.bash
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/Cluster/pbs/valgrind_go_solver_pbs.bash	                        (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/Cluster/pbs/valgrind_go_solver_pbs.bash	2011-11-29 04:28:42 UTC (rev 19246)
@@ -0,0 +1,77 @@
+#!/bin/bash
+#
+# Valgrind, a suite of tools for debugging and profiling
+# http://valgrind.org/
+#
+
+# bash script
+#PBS -S /bin/bash
+
+# job name 
+#PBS -N valgrind_go_solver
+
+# joins output and error information
+#PBS -j oe
+
+# job output file
+#PBS -o in_out_files/OUTPUT_FILES/job.o
+
+###########################################################
+# USER PARAMETERS
+
+# Queue
+#PBS -q tromp
+
+# 150 CPUs ( 18*8+6 ), walltime 15 hour
+#PBS -l nodes=18:ppn=8+1:ppn=6,walltime=15:00:00
+
+# valgrind mpi library 
+PRELOAD_LIB=/my_valgrind_path/valgrind/lib/valgrind/libmpiwrap-x86-linux.so
+
+###########################################################
+
+cd $PBS_O_WORKDIR
+
+# script to run the mesher and the solver
+# read Par_file to get information about the run
+# compute total number of nodes needed
+NPROC=`grep NPROC in_data_files/Par_file | cut -d = -f 2 `
+
+# total number of nodes is the product of the values read
+numnodes=$NPROC
+
+mkdir -p in_out_files/OUTPUT_FILES
+
+# backup files used for this simulation
+cp in_data_files/Par_file in_out_files/OUTPUT_FILES/
+cp in_data_files/STATIONS in_out_files/OUTPUT_FILES/
+cp in_data_files/CMTSOLUTION in_out_files/OUTPUT_FILES/
+
+# obtain job information
+cat $PBS_NODEFILE > in_out_files/OUTPUT_FILES/compute_nodes
+echo "$PBS_JOBID" > in_out_files/OUTPUT_FILES/jobid
+
+echo starting run in current directory $PWD
+cd bin/
+
+echo " "
+echo "run: memory leaks"
+echo " "
+sleep 2
+
+# memory leaks
+LD_PRELOAD=$PRELOAD_LIB mpiexec -np $numnodes valgrind --leak-check=full ./xspecfem3D >& ../in_out_files/OUTPUT_FILES/output.memory-leaks.log
+
+sleep 2
+echo " "
+echo "run: cache misses"
+echo " "
+
+# cache misses
+LD_PRELOAD=$PRELOAD_LIB mpiexec -np $numnodes valgrind --tool=cachegrind ./xspecfem3D >& ../in_out_files/OUTPUT_FILES/output.cache-misses.log
+
+cp cachegrind.out.* ../in_out_files/OUTPUT_FILES/
+
+echo " "
+echo "finished successfully"
+


Property changes on: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/Cluster/pbs/valgrind_go_solver_pbs.bash
___________________________________________________________________
Name: svn:executable
   + *

Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/locate_partition.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/locate_partition.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/locate_partition.f90	2011-11-29 04:28:42 UTC (rev 19246)
@@ -0,0 +1,212 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! utility to locate partition which is closest to given point location
+!
+! compile with:
+! > gfortran -I src/shared -o bin/xlocate_partition utils/locate_partition.f90
+!
+! run with:
+! > ../../bin/xlocate_partition -300000.0 11000.0 -3000.0 in_out_files/DATABASES_MPI/
+
+  program locate_partition
+
+! works for external, unregular meshes
+
+  implicit none
+
+  include 'constants.h'
+
+  ! mesh coordinates
+  real(kind=CUSTOM_REAL),dimension(:),allocatable :: xstore, ystore, zstore
+  integer, dimension(:,:,:,:),allocatable :: ibool
+  integer :: NSPEC_AB, NGLOB_AB
+
+  integer :: i,ios,ier
+  integer :: iproc
+  character(len=256) :: arg(4)
+  character(len=256) :: LOCAL_PATH
+  character(len=256) :: prname_lp
+
+  real(kind=CUSTOM_REAL) :: x_found,y_found,z_found,distance
+  real(kind=CUSTOM_REAL) :: target_x,target_y,target_z
+  real(kind=CUSTOM_REAL) :: total_x,total_y,total_z
+  real(kind=CUSTOM_REAL) :: total_distance
+  integer :: total_partition
+
+! checks given arguments
+  print *
+  print *,'locate partition'
+  print *,'----------------------------'
+  
+  do i = 1, 4
+    call getarg(i,arg(i))
+    if (i <= 4 .and. trim(arg(i)) == '') then
+      print *, 'Usage: '
+      print *, '        xlocate_partition x y z Databases_directory'
+      stop ' Reenter command line options'
+    endif
+  enddo
+  read(arg(1),*) target_x
+  read(arg(2),*) target_y
+  read(arg(3),*) target_z
+  LOCAL_PATH = arg(4)
+
+  print *,'search location: '
+  print *,'  x = ',target_x
+  print *,'  y = ',target_y
+  print *,'  z = ',target_z
+  print *,'in directory: ',trim(LOCAL_PATH)
+  print *,'----------------------------'
+  print *
+  
+  ! loops over slices (process partitions)
+  total_distance = HUGEVAL
+  total_partition = -1
+  total_x = 0.0
+  total_y = 0.0
+  total_z = 0.0
+  iproc = -1
+  do while( iproc < 10000000 )
+    ! starts with 0  
+    iproc = iproc + 1
+
+    ! gets number of elements and global points for this partition
+    write(prname_lp,'(a,i6.6,a)') trim(LOCAL_PATH)//'/proc',iproc,'_'
+    open(unit=27,file=prname_lp(1:len_trim(prname_lp))//'external_mesh.bin',&
+          status='old',action='read',form='unformatted',iostat=ios)
+    if( ios /= 0 ) exit
+    
+    read(27) NSPEC_AB
+    read(27) NGLOB_AB
+
+    ! ibool file
+    allocate(ibool(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array ibool'
+    read(27) ibool
+
+    ! global point arrays
+    allocate(xstore(NGLOB_AB),ystore(NGLOB_AB),zstore(NGLOB_AB),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array xstore etc.'
+    read(27) xstore
+    read(27) ystore
+    read(27) zstore
+    close(27)
+
+    print*,'partition: ',iproc
+    print*,'  min/max x = ',minval(xstore),maxval(xstore)
+    print*,'  min/max y = ',minval(ystore),maxval(ystore)
+    print*,'  min/max z = ',minval(zstore),maxval(zstore)
+    print*
+    
+    ! gets distance to target location
+    call get_closest_point(target_x,target_y,target_z, &
+                         NGLOB_AB,NSPEC_AB,xstore,ystore,zstore,ibool, &
+                         distance,x_found,y_found,z_found)
+
+    if( distance < total_distance ) then
+      total_distance = distance
+      total_partition = iproc
+      total_x = x_found
+      total_y = y_found
+      total_z = z_found
+    endif
+
+    ! cleans up memory allocations
+    deallocate(ibool,xstore,ystore,zstore)
+
+  enddo  ! all slices for points
+  
+  ! checks
+  if (total_partition < 0 ) then
+    print*,'Error: partition not found among ',iproc,'partitions searched'
+    stop 'Error: partition not found'
+  endif
+  
+  ! output
+  print*,'number of partitions searched: ',iproc
+  print*
+  print*,'closest grid point location found:'
+  print*,'  x = ',total_x
+  print*,'  y = ',total_y
+  print*,'  z = ',total_z
+  print*,'  distance to search location = ',sqrt(total_distance)
+  print*,'closest partition: '
+  print*,'  partition = ',total_partition
+  print*
+
+  end program locate_partition
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine get_closest_point(target_x,target_y,target_z, &
+                         NGLOB_AB,NSPEC_AB,xstore,ystore,zstore,ibool, &
+                         distance,x_found,y_found,z_found)
+
+  implicit none
+  include 'constants.h'
+  
+  real(kind=CUSTOM_REAL),intent(in) :: target_x,target_y,target_z
+
+  integer,intent(in) :: NSPEC_AB,NGLOB_AB
+  real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: xstore, ystore, zstore
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
+
+  real(kind=CUSTOM_REAL),intent(out) :: distance
+  real(kind=CUSTOM_REAL),intent(out) :: x_found,y_found,z_found
+
+  ! local parameters
+  integer :: i,j,k,ispec,iglob
+  real(kind=CUSTOM_REAL) :: dist
+
+  distance = HUGEVAL
+  x_found = 0.0
+  y_found = 0.0
+  z_found = 0.0
+  
+  do ispec=1,NSPEC_AB
+    do k=1,NGLLZ
+      do j=1,NGLLY 
+        do i=1,NGLLX
+          iglob = ibool(i,j,k,ispec)
+          dist =  (target_x - xstore(iglob))*(target_x - xstore(iglob)) &
+                + (target_y - ystore(iglob))*(target_y - ystore(iglob)) &
+                + (target_z - zstore(iglob))*(target_z - zstore(iglob)) 
+                
+          if( dist < distance ) then
+            distance = dist
+            x_found = xstore(iglob)
+            y_found = ystore(iglob)
+            z_found = zstore(iglob)
+          endif
+        enddo
+      enddo
+    enddo
+  enddo
+
+  end subroutine



More information about the CIG-COMMITS mailing list