[cig-commits] r12448 - seismo/2D/SPECFEM2D/trunk

dkomati1 at geodynamics.org dkomati1 at geodynamics.org
Mon Jul 21 05:01:01 PDT 2008


Author: dkomati1
Date: 2008-07-21 05:01:00 -0700 (Mon, 21 Jul 2008)
New Revision: 12448

Modified:
   seismo/2D/SPECFEM2D/trunk/checkgrid.F90
   seismo/2D/SPECFEM2D/trunk/compute_forces_acoustic.f90
   seismo/2D/SPECFEM2D/trunk/compute_forces_elastic.f90
   seismo/2D/SPECFEM2D/trunk/constants.h
   seismo/2D/SPECFEM2D/trunk/meshfem2D.F90
   seismo/2D/SPECFEM2D/trunk/plotpost.F90
   seismo/2D/SPECFEM2D/trunk/specfem2D.F90
Log:
committed version 2 of the improved code developed in Barcelona based on the ParaVer analysis performed with Jesus Labarta


Modified: seismo/2D/SPECFEM2D/trunk/checkgrid.F90
===================================================================
--- seismo/2D/SPECFEM2D/trunk/checkgrid.F90	2008-07-21 00:41:48 UTC (rev 12447)
+++ seismo/2D/SPECFEM2D/trunk/checkgrid.F90	2008-07-21 12:01:00 UTC (rev 12448)
@@ -42,7 +42,8 @@
 
   subroutine checkgrid(vpext,vsext,rhoext,density,elastcoef,ibool,kmato,coord,npoin,vpmin,vpmax, &
                  assign_external_model,nspec,numat,deltat,f0,t0,initialfield,time_function_type, &
-                 coorg,xinterp,zinterp,shapeint,knods,simulation_title,npgeo,pointsdisp,ngnod,any_elastic,myrank,nproc)
+                 coorg,xinterp,zinterp,shapeint,knods,simulation_title,npgeo,pointsdisp,ngnod,any_elastic,myrank,nproc, &
+                 nspec_outer,nspec_inner)
 
 ! check the mesh, stability and number of points per wavelength
 
@@ -53,6 +54,9 @@
   include 'mpif.h'
 #endif
 
+!! DK DK to analyze Cuthill-McKee display
+  integer :: UPPER_LIMIT_DISPLAY,nspec_outer,nspec_inner
+
 ! color palette
   integer, parameter :: NUM_COLORS = 236
   double precision, dimension(NUM_COLORS) :: red,green,blue
@@ -123,6 +127,14 @@
 ! title of the plot
   character(len=60) simulation_title
 
+!! DK DK to analyze Cuthill-McKee display
+! UPPER_LIMIT_DISPLAY = nspec
+  UPPER_LIMIT_DISPLAY = nspec_outer
+! UPPER_LIMIT_DISPLAY = nspec_inner
+! UPPER_LIMIT_DISPLAY = 2300
+
+  if(UPPER_LIMIT_DISPLAY > nspec) stop 'cannot have UPPER_LIMIT_DISPLAY > nspec in checkgrid.F90'
+
 #ifndef USE_MPI
   allocate(coorg_recv(1,1))
   allocate(RGB_recv(1))
@@ -1684,7 +1696,7 @@
 
      if ( myrank == 0 ) then
         num_ispec = num_ispec + 1
-        write(24,*) '% elem ',ispec
+        write(24,*) '% elem ',num_ispec
      end if
 
   do i=1,pointsdisp
@@ -2014,7 +2026,7 @@
   do ispec = 1, nspec
      if ( myrank == 0 ) then
         num_ispec = num_ispec + 1
-        write(24,*) '% elem ',ispec
+        write(24,*) '% elem ',num_ispec
      end if
 
   do i=1,pointsdisp
@@ -2396,10 +2408,10 @@
   num_ispec = 0
 end if
 
-  do ispec = 1, nspec
+  do ispec = 1, UPPER_LIMIT_DISPLAY
      if ( myrank == 0 ) then
         num_ispec = num_ispec + 1
-        write(24,*) '% elem ',ispec
+        write(24,*) '% elem ',num_ispec
      end if
   do i=1,pointsdisp
   do j=1,pointsdisp
@@ -2541,10 +2553,9 @@
      end do
 
   else
-     call MPI_SEND (nspec, 1, MPI_INTEGER, 0, 42, MPI_COMM_WORLD, ier)
-     call MPI_SEND (coorg_send, nspec*5*2, MPI_DOUBLE_PRECISION, 0, 42, MPI_COMM_WORLD, ier)
-     call MPI_SEND (greyscale_send, nspec, MPI_INTEGER, 0, 42, MPI_COMM_WORLD, ier)
-
+     call MPI_SEND (UPPER_LIMIT_DISPLAY, 1, MPI_INTEGER, 0, 42, MPI_COMM_WORLD, ier)
+     call MPI_SEND (coorg_send, UPPER_LIMIT_DISPLAY*5*2, MPI_DOUBLE_PRECISION, 0, 42, MPI_COMM_WORLD, ier)
+     call MPI_SEND (greyscale_send, UPPER_LIMIT_DISPLAY, MPI_INTEGER, 0, 42, MPI_COMM_WORLD, ier)
   end if
 #endif
 
@@ -2641,7 +2652,7 @@
   write(24,*) '24.35 CM 18.9 CM MV'
   write(24,*) usoffset,' CM 2 div neg 0 MR'
   write(24,*) 'currentpoint gsave translate -90 rotate 0 0 moveto'
-  write(24,*) '(Mesh partitioning) show'
+  write(24,*) '(Mesh stability condition \(red = bad\)) show'
   write(24,*) 'grestore'
   write(24,*) '25.35 CM 18.9 CM MV'
   write(24,*) usoffset,' CM 2 div neg 0 MR'
@@ -2669,11 +2680,11 @@
   num_ispec = 0
   end if
 
-  do ispec = 1, nspec
+  do ispec = 1, UPPER_LIMIT_DISPLAY
 
      if ( myrank == 0 ) then
         num_ispec = num_ispec + 1
-        write(24,*) '% elem ',ispec
+        write(24,*) '% elem ',num_ispec
      end if
 
   do i=1,pointsdisp
@@ -2794,8 +2805,8 @@
      end do
 
   else
-     call MPI_SEND (nspec, 1, MPI_INTEGER, 0, 42, MPI_COMM_WORLD, ier)
-     call MPI_SEND (coorg_send, nspec*5*2, MPI_DOUBLE_PRECISION, 0, 42, MPI_COMM_WORLD, ier)
+     call MPI_SEND (UPPER_LIMIT_DISPLAY, 1, MPI_INTEGER, 0, 42, MPI_COMM_WORLD, ier)
+     call MPI_SEND (coorg_send, UPPER_LIMIT_DISPLAY*5*2, MPI_DOUBLE_PRECISION, 0, 42, MPI_COMM_WORLD, ier)
 
   end if
 #endif

Modified: seismo/2D/SPECFEM2D/trunk/compute_forces_acoustic.f90
===================================================================
--- seismo/2D/SPECFEM2D/trunk/compute_forces_acoustic.f90	2008-07-21 00:41:48 UTC (rev 12447)
+++ seismo/2D/SPECFEM2D/trunk/compute_forces_acoustic.f90	2008-07-21 12:01:00 UTC (rev 12448)
@@ -48,7 +48,7 @@
                hprime_zz,hprimewgll_zz,wxgll,wzgll, &
                ibegin_bottom,iend_bottom,ibegin_top,iend_top, &
                jbegin_left,jend_left,jbegin_right,jend_right, &
-               nspec_inner_outer, ispec_inner_outer_to_glob, num_phase_inner_outer)
+               nspec_outer, we_are_in_phase_outer)
 
 ! compute forces for the acoustic elements
 
@@ -83,15 +83,15 @@
   real(kind=CUSTOM_REAL), dimension(NGLLZ) :: wzgll
 
 ! for overlapping MPI communications with computation
-  integer, intent(in)  :: nspec_inner_outer
-  integer, dimension(max(1,nspec_inner_outer)), intent(in)  :: ispec_inner_outer_to_glob
-  logical, intent(in)  :: num_phase_inner_outer
+  integer, intent(in)  :: nspec_outer
+!!!!!!!  integer, dimension(max(1,nspec_outer)), intent(in)  :: ispec_inner_outer_to_glob
+  logical, intent(in)  :: we_are_in_phase_outer
 
 !---
 !--- local variables
 !---
 
-  integer :: ispec,ispec_inner_outer,i,j,k,iglob,ispecabs,ibegin,iend,jbegin,jend
+  integer :: ispec,i,j,k,iglob,ispecabs,ibegin,iend,jbegin,jend
 
 ! spatial derivatives
   real(kind=CUSTOM_REAL) :: dux_dxi,dux_dgamma,dux_dxl,dux_dzl
@@ -106,10 +106,22 @@
   real(kind=CUSTOM_REAL) :: mul_relaxed,lambdal_relaxed,kappal,cpl,rhol
 
 ! loop over spectral elements
-  do ispec_inner_outer = 1,nspec_inner_outer
+! do ispec_inner_outer = 1,nspec_outer
 
-    ispec = ispec_inner_outer_to_glob(ispec_inner_outer)
+!   ispec = ispec_inner_outer_to_glob(ispec_inner_outer)
 
+  integer :: ifirstelem,ilastelem
+
+  if(we_are_in_phase_outer) then
+    ifirstelem = 1
+    ilastelem = nspec_outer
+  else
+    ifirstelem = nspec_outer + 1
+    ilastelem = nspec
+  endif
+
+  do ispec = ifirstelem,ilastelem
+
 !---
 !--- acoustic spectral element
 !---
@@ -177,7 +189,7 @@
     enddo ! end of loop over all spectral elements
 
 ! only for the first call to compute_forces_acoustic (during computation on outer elements)
-  if ( num_phase_inner_outer ) then
+  if ( we_are_in_phase_outer ) then
 
 !
 !--- absorbing boundaries

Modified: seismo/2D/SPECFEM2D/trunk/compute_forces_elastic.f90
===================================================================
--- seismo/2D/SPECFEM2D/trunk/compute_forces_elastic.f90	2008-07-21 00:41:48 UTC (rev 12447)
+++ seismo/2D/SPECFEM2D/trunk/compute_forces_elastic.f90	2008-07-21 12:01:00 UTC (rev 12448)
@@ -49,7 +49,7 @@
        e13,dux_dxl_n,duz_dzl_n,duz_dxl_n,dux_dzl_n, &
        dux_dxl_np1,duz_dzl_np1,duz_dxl_np1,dux_dzl_np1,hprime_xx,hprimewgll_xx, &
        hprime_zz,hprimewgll_zz,wxgll,wzgll,inv_tau_sigma_nu1,phi_nu1,inv_tau_sigma_nu2,phi_nu2,Mu_nu1,Mu_nu2,N_SLS, &
-       nspec_inner_outer,ispec_inner_outer_to_glob,num_phase_inner_outer,deltat,coord,add_Bielak_conditions, &
+       nspec_outer,we_are_in_phase_outer,deltat,coord,add_Bielak_conditions, &
        x0_source, z0_source, A_plane, B_plane, C_plane, angleforce_refl, c_inc, c_refl, time_offset,f0, &
        v0x_left,v0z_left,v0x_right,v0z_right,v0x_bot,v0z_bot,t0x_left,t0z_left,t0x_right,t0z_right,t0x_bot,t0z_bot,&
        nleft,nright,nbot,over_critical_angle)
@@ -100,15 +100,15 @@
   real(kind=CUSTOM_REAL), dimension(NGLLZ) :: wzgll
 
 ! for overlapping MPI communications with computation
-  integer, intent(in) :: nspec_inner_outer
-  integer, dimension(max(1,nspec_inner_outer)), intent(in) :: ispec_inner_outer_to_glob
-  logical, intent(in) :: num_phase_inner_outer
+  integer, intent(in) :: nspec_outer
+!!!!!!!!!!!!!!!!!  integer, dimension(max(1,nspec_outer)), intent(in) :: ispec_inner_outer_to_glob
+  logical, intent(in) :: we_are_in_phase_outer
 
 !---
 !--- local variables
 !---
 
-  integer :: ispec,ispec_inner_outer,i,j,k,iglob,ispecabs,ibegin,iend
+  integer :: ispec,i,j,k,iglob,ispecabs,ibegin,iend
 
 ! spatial derivatives
   real(kind=CUSTOM_REAL) :: dux_dxi,dux_dgamma,duz_dxi,duz_dgamma
@@ -141,19 +141,31 @@
   double precision, dimension(nbot) :: v0x_bot,v0z_bot,t0x_bot,t0z_bot
   integer count_left,count_right,count_bot
 
+  integer :: ifirstelem,ilastelem
+
 ! only for the first call to compute_forces_elastic (during computation on outer elements)
-  if ( num_phase_inner_outer ) then
+  if ( we_are_in_phase_outer ) then
 ! compute Grad(displ_elastic) at time step n for attenuation
   if(TURN_ATTENUATION_ON) call compute_gradient_attenuation(displ_elastic,dux_dxl_n,duz_dxl_n, &
       dux_dzl_n,duz_dzl_n,xix,xiz,gammax,gammaz,ibool,elastic,hprime_xx,hprime_zz,nspec,npoin)
   endif
 
 ! loop over spectral elements
-  do ispec_inner_outer = 1,nspec_inner_outer
+! do ispec_inner_outer = 1,nspec_outer
 
 ! get global numbering for inner or outer elements
-    ispec = ispec_inner_outer_to_glob(ispec_inner_outer)
+!   ispec = ispec_inner_outer_to_glob(ispec_inner_outer)
 
+  if(we_are_in_phase_outer) then
+    ifirstelem = 1
+    ilastelem = nspec_outer
+  else
+    ifirstelem = nspec_outer + 1
+    ilastelem = nspec
+  endif
+
+  do ispec = ifirstelem,ilastelem
+
 !---
 !--- elastic spectral element
 !---
@@ -298,7 +310,7 @@
     enddo ! end of loop over all spectral elements
 
 ! only for the first call to compute_forces_elastic (during computation on outer elements)
-  if ( num_phase_inner_outer ) then
+  if ( we_are_in_phase_outer ) then
 
 !
 !--- absorbing boundaries

Modified: seismo/2D/SPECFEM2D/trunk/constants.h
===================================================================
--- seismo/2D/SPECFEM2D/trunk/constants.h	2008-07-21 00:41:48 UTC (rev 12447)
+++ seismo/2D/SPECFEM2D/trunk/constants.h	2008-07-21 12:01:00 UTC (rev 12448)
@@ -16,6 +16,16 @@
 ! the code does NOT work if NGLLZ /= NGLLX because it then cannot handle a non-structured mesh
   integer, parameter :: NGLLZ = NGLLX
 
+! for Cuthill-McKee (1969) permutation
+  logical, parameter :: PERFORM_CUTHILL_MCKEE = .true.
+  logical, parameter :: INVERSE = .true.
+  logical, parameter :: FACE = .false.
+  integer, parameter :: NGNOD_QUADRANGLE = 4
+! perform classical or multi-level Cuthill-McKee ordering
+  logical, parameter :: CMcK_MULTI = .false.
+! maximum size if multi-level Cuthill-McKee ordering
+  integer, parameter :: LIMIT_MULTI_CUTHILL = 50
+
 ! compute and output acoustic and elastic energy (slows down the code significantly)
   logical, parameter :: OUTPUT_ENERGY = .false.
 

Modified: seismo/2D/SPECFEM2D/trunk/meshfem2D.F90
===================================================================
--- seismo/2D/SPECFEM2D/trunk/meshfem2D.F90	2008-07-21 00:41:48 UTC (rev 12447)
+++ seismo/2D/SPECFEM2D/trunk/meshfem2D.F90	2008-07-21 12:01:00 UTC (rev 12448)
@@ -984,13 +984,16 @@
 
 ! partitioning
      select case (partitioning_method)
+
      case(1)
+
         do iproc = 0, nproc-2
            part(iproc*floor(real(nelmnts)/real(nproc)):(iproc+1)*floor(real(nelmnts)/real(nproc))-1) = iproc
         end do
         part(floor(real(nelmnts)/real(nproc))*(nproc-1):nelmnts-1) = nproc - 1
 
      case(2)
+
 #ifdef USE_METIS
         call Part_metis(nelmnts, xadj, adjncy, vwgt, adjwgt, nproc, nb_edges, edgecut, part, metis_options)
 #else
@@ -1000,6 +1003,7 @@
 #endif
 
      case(3)
+
 #ifdef USE_SCOTCH
         call Part_scotch(nelmnts, xadj, adjncy, vwgt, adjwgt, nproc, nb_edges, edgecut, part, scotch_strategy)
 #else

Modified: seismo/2D/SPECFEM2D/trunk/plotpost.F90
===================================================================
--- seismo/2D/SPECFEM2D/trunk/plotpost.F90	2008-07-21 00:41:48 UTC (rev 12447)
+++ seismo/2D/SPECFEM2D/trunk/plotpost.F90	2008-07-21 12:01:00 UTC (rev 12448)
@@ -49,7 +49,7 @@
           boundvect,assign_external_model,cutsnaps,sizemax_arrows,nelemabs,numat,pointsdisp, &
           nspec,ngnod,coupled_acoustic_elastic,any_acoustic,plot_lowerleft_corner_only, &
           fluid_solid_acoustic_ispec,fluid_solid_acoustic_iedge,num_fluid_solid_edges, &
-          myrank, nproc)
+          myrank,nproc)
 
 !
 ! PostScript display routine
@@ -1377,7 +1377,7 @@
   if ( myrank == 0 ) then
      write(IOUT,*) 'X min, max = ',xmin,xmax
      write(IOUT,*) 'Z min, max = ',zmin,zmax
-  end if
+  endif
 
 ! ratio of physical page size/size of the domain meshed
   ratio_page = min(rpercentz*sizez/(zmax-zmin),rpercentx*sizex/(xmax-xmin)) / 100.d0
@@ -1390,7 +1390,7 @@
 #endif
   if ( myrank == 0 ) then
      write(IOUT,*) 'Max norm = ',dispmax
-  end if
+  endif
 
 !
 !---- open PostScript file
@@ -1552,8 +1552,7 @@
 !---- print the spectral elements mesh in PostScript
 !
 
-  write(IOUT,*) 'Shape functions based on ',ngnod,' control nodes'
-  end if
+  endif
 
 
   convert = PI / 180.d0
@@ -1566,7 +1565,7 @@
   if ( myrank /= 0 ) then
      allocate(coorg_send(2,nspec*((NGLLX-subsamp)/subsamp)*((NGLLX-subsamp)/subsamp)*4))
      allocate(RGB_send(1,nspec*((NGLLX-subsamp)/subsamp)*((NGLLX-subsamp)/subsamp)))
-  end if
+  endif
   buffer_offset = 0
   RGB_offset = 0
 
@@ -1609,7 +1608,7 @@
      buffer_offset = buffer_offset + 1
      coorg_send(1,buffer_offset) = xw
      coorg_send(2,buffer_offset) = zw
-  end if
+  endif
 
   xw = coord(1,ibool(i+subsamp,j,ispec))
   zw = coord(2,ibool(i+subsamp,j,ispec))
@@ -1623,7 +1622,7 @@
      buffer_offset = buffer_offset + 1
      coorg_send(1,buffer_offset) = xw
      coorg_send(2,buffer_offset) = zw
-  end if
+  endif
 
   xw = coord(1,ibool(i+subsamp,j+subsamp,ispec))
   zw = coord(2,ibool(i+subsamp,j+subsamp,ispec))
@@ -1637,7 +1636,7 @@
      buffer_offset = buffer_offset + 1
      coorg_send(1,buffer_offset) = xw
      coorg_send(2,buffer_offset) = zw
-  end if
+  endif
 
   xw = coord(1,ibool(i,j+subsamp,ispec))
   zw = coord(2,ibool(i,j+subsamp,ispec))
@@ -1651,7 +1650,7 @@
      buffer_offset = buffer_offset + 1
      coorg_send(1,buffer_offset) = xw
      coorg_send(2,buffer_offset) = zw
-  end if
+  endif
 
 ! display P-velocity model using gray levels
   if ( myrank == 0 ) then
@@ -1659,13 +1658,12 @@
   else
      RGB_offset = RGB_offset + 1
      RGB_send(1,RGB_offset) = x1
-  end if
+  endif
 
           enddo
     enddo
   enddo
 
-
 #ifdef USE_MPI
   if (myrank == 0 ) then
 
@@ -1693,14 +1691,14 @@
                  write(24,499) coorg_recv(1,buffer_offset), coorg_recv(2,buffer_offset)
                  RGB_offset = RGB_offset + 1
                  write(24,604) RGB_recv(1,RGB_offset)
-              end do
-           end do
-        end do
+              enddo
+           enddo
+        enddo
 
         deallocate(coorg_recv)
         deallocate(RGB_recv)
 
-     end do
+     enddo
   else
      call MPI_SEND (nspec, 1, MPI_INTEGER, 0, 42, MPI_COMM_WORLD, ier)
      call MPI_SEND (coorg_send(1,1), 2*nspec*((NGLLX-subsamp)/subsamp)*((NGLLX-subsamp)/subsamp)*4, &
@@ -1711,7 +1709,7 @@
      deallocate(coorg_send)
      deallocate(RGB_send)
 
-  end if
+  endif
 
 
 #endif
@@ -1727,7 +1725,7 @@
      write(24,*) '%'
      write(24,*) '% spectral element mesh'
      write(24,*) '%'
-  end if
+  endif
 
   if ( myrank /= 0 ) then
 
@@ -1738,13 +1736,13 @@
               allocate(color_send(2*nspec))
            else
               allocate(color_send(1*nspec))
-           end if
+           endif
         else
            allocate(coorg_send(2,nspec*6))
            if ( colors == 1 ) then
               allocate(color_send(1*nspec))
-           end if
-        end if
+           endif
+        endif
      else
         if ( numbers == 1 ) then
            allocate(coorg_send(2,nspec*((pointsdisp-1)*3+max(0,pointsdisp-2)+1+1)))
@@ -1752,16 +1750,16 @@
               allocate(color_send(2*nspec))
            else
               allocate(color_send(1*nspec))
-           end if
+           endif
         else
            allocate(coorg_send(2,nspec*((pointsdisp-1)*3+max(0,pointsdisp-2)+1)))
            if ( colors == 1 ) then
               allocate(color_send(1*nspec))
-           end if
-        end if
-     end if
+           endif
+        endif
+     endif
 
-  end if
+  endif
   buffer_offset = 0
   RGB_offset = 0
 
@@ -1769,7 +1767,7 @@
 
   if ( myrank == 0 ) then
      write(24,*) '% elem ',ispec
-  end if
+  endif
 
   do i=1,pointsdisp
   do j=1,pointsdisp
@@ -1796,7 +1794,7 @@
      buffer_offset = buffer_offset + 1
      coorg_send(1,buffer_offset) = x1
      coorg_send(2,buffer_offset) = z1
-  end if
+  endif
 
   if(ngnod == 4) then
 
@@ -1813,7 +1811,7 @@
      buffer_offset = buffer_offset + 1
      coorg_send(1,buffer_offset) = x2
      coorg_send(2,buffer_offset) = z2
-  end if
+  endif
 
   ir=pointsdisp
   is=pointsdisp
@@ -1827,7 +1825,7 @@
      buffer_offset = buffer_offset + 1
      coorg_send(1,buffer_offset) = x2
      coorg_send(2,buffer_offset) = z2
-  end if
+  endif
 
   is=pointsdisp
   ir=1
@@ -1841,7 +1839,7 @@
      buffer_offset = buffer_offset + 1
      coorg_send(1,buffer_offset) = x2
      coorg_send(2,buffer_offset) = z2
-  end if
+  endif
 
   ir=1
   is=2
@@ -1855,7 +1853,7 @@
      buffer_offset = buffer_offset + 1
      coorg_send(1,buffer_offset) = x2
      coorg_send(2,buffer_offset) = z2
-  end if
+  endif
 
   else
 
@@ -1871,7 +1869,7 @@
        buffer_offset = buffer_offset + 1
        coorg_send(1,buffer_offset) = x2
        coorg_send(2,buffer_offset) = z2
-    end if
+    endif
   enddo
 
   ir=pointsdisp
@@ -1886,7 +1884,7 @@
        buffer_offset = buffer_offset + 1
        coorg_send(1,buffer_offset) = x2
        coorg_send(2,buffer_offset) = z2
-    end if
+    endif
   enddo
 
   is=pointsdisp
@@ -1901,7 +1899,7 @@
        buffer_offset = buffer_offset + 1
        coorg_send(1,buffer_offset) = x2
        coorg_send(2,buffer_offset) = z2
-    end if
+    endif
   enddo
 
   ir=1
@@ -1916,14 +1914,14 @@
        buffer_offset = buffer_offset + 1
        coorg_send(1,buffer_offset) = x2
        coorg_send(2,buffer_offset) = z2
-    end if
+    endif
   enddo
 
   endif
 
   if ( myrank == 0 ) then
      write(24,*) 'CO'
-  end if
+  endif
 
   if(colors == 1) then
 
@@ -1940,7 +1938,7 @@
   else
      RGB_offset = RGB_offset + 1
      color_send(RGB_offset) = icol
-  end if
+  endif
 
   endif
 
@@ -1952,7 +1950,7 @@
       write(24,*) '0 setgray ST'
     endif
   endif
-  end if
+  endif
 
 ! write the element number, the group number and the material number inside the element
   if(numbers == 1) then
@@ -1966,7 +1964,7 @@
 
   if ( myrank == 0 ) then
   if(colors == 1) write(24,*) '1 setgray'
-  end if
+  endif
 
   if ( myrank == 0 ) then
      write(24,500) xw,zw
@@ -1974,7 +1972,7 @@
      buffer_offset = buffer_offset + 1
      coorg_send(1,buffer_offset) = x2
      coorg_send(2,buffer_offset) = z2
-  end if
+  endif
 
 ! write spectral element number
   if ( myrank == 0 ) then
@@ -1982,13 +1980,12 @@
   else
      RGB_offset = RGB_offset + 1
      color_send(RGB_offset) = ispec
-  end if
+  endif
 
   endif
 
   enddo
 
-
 #ifdef USE_MPI
   if (myrank == 0 ) then
 
@@ -1997,24 +1994,24 @@
         nb_coorg_per_elem = 1
         if ( numbers == 1 ) then
            nb_coorg_per_elem = nb_coorg_per_elem + 1
-        end if
+        endif
         if ( ngnod == 4 ) then
            nb_coorg_per_elem = nb_coorg_per_elem + 4
         else
            nb_coorg_per_elem = nb_coorg_per_elem + 3*(pointsdisp-1)+(pointsdisp-2)
-        end if
+        endif
         nb_color_per_elem = 0
         if ( colors == 1 ) then
            nb_color_per_elem = nb_color_per_elem + 1
-        end if
+        endif
         if ( numbers == 1 ) then
            nb_color_per_elem = nb_color_per_elem + 1
-        end if
+        endif
 
         allocate(coorg_recv(2,nspec_recv*nb_coorg_per_elem))
         if ( nb_color_per_elem > 0 ) then
            allocate(color_recv(nspec_recv*nb_color_per_elem))
-        end if
+        endif
         call MPI_RECV (coorg_recv(1,1), 2*nspec_recv*nb_coorg_per_elem, &
              MPI_DOUBLE_PRECISION, iproc, 43, MPI_COMM_WORLD, request_mpi_status, ier)
         call MPI_RECV (color_recv(1), nspec_recv*nb_coorg_per_elem, &
@@ -2043,21 +2040,21 @@
               do ir=2,pointsdisp
                  buffer_offset = buffer_offset + 1
                  write(24,681) coorg_recv(1,buffer_offset), coorg_recv(2,buffer_offset)
-              end do
+              enddo
               do is=2,pointsdisp
                  buffer_offset = buffer_offset + 1
                  write(24,681) coorg_recv(1,buffer_offset), coorg_recv(2,buffer_offset)
-              end do
+              enddo
               do ir=pointsdisp-1,1,-1
                  buffer_offset = buffer_offset + 1
                  write(24,681) coorg_recv(1,buffer_offset), coorg_recv(2,buffer_offset)
-              end do
+              enddo
               do is=pointsdisp-1,2,-1
                  buffer_offset = buffer_offset + 1
                  write(24,681) coorg_recv(1,buffer_offset), coorg_recv(2,buffer_offset)
-              end do
+              enddo
 
-           end if
+           endif
 
            write(24,*) 'CO'
            if ( colors == 1 ) then
@@ -2068,7 +2065,7 @@
                  RGB_offset = RGB_offset + 1
                  write(24,679) red(color_recv(RGB_offset)), green(color_recv(RGB_offset)), blue(color_recv(RGB_offset))
               endif
-           end if
+           endif
            if(meshvect) then
               if(modelvect) then
                  write(24,*) 'Colmesh ST'
@@ -2082,48 +2079,46 @@
               write(24,500) coorg_recv(1,buffer_offset), coorg_recv(2,buffer_offset)
               RGB_offset = RGB_offset + 1
               write(24,502) color_recv(RGB_offset)
-           end if
+           endif
 
-        end do
+        enddo
 
         deallocate(coorg_recv)
         deallocate(color_recv)
 
-     end do
+     enddo
   else
      call MPI_SEND (nspec, 1, MPI_INTEGER, 0, 43, MPI_COMM_WORLD, ier)
      nb_coorg_per_elem = 1
      if ( numbers == 1 ) then
         nb_coorg_per_elem = nb_coorg_per_elem + 1
-     end if
+     endif
      if ( ngnod == 4 ) then
         nb_coorg_per_elem = nb_coorg_per_elem + 4
      else
         nb_coorg_per_elem = nb_coorg_per_elem + 3*(pointsdisp-1)+(pointsdisp-2)
-     end if
+     endif
      nb_color_per_elem = 0
      if ( colors == 1 ) then
         nb_color_per_elem = nb_color_per_elem + 1
-     end if
+     endif
      if ( numbers == 1 ) then
         nb_color_per_elem = nb_color_per_elem + 1
-     end if
+     endif
      call MPI_SEND (coorg_send(1,1), 2*nspec*nb_coorg_per_elem, &
           MPI_DOUBLE_PRECISION, 0, 43, MPI_COMM_WORLD, ier)
      if ( nb_color_per_elem > 0 ) then
         call MPI_SEND (color_send(1), nspec*nb_color_per_elem, &
              MPI_INTEGER, 0, 43, MPI_COMM_WORLD, ier)
-     end if
+     endif
 
      deallocate(coorg_send)
      deallocate(color_send)
 
-  end if
+  endif
 
-
 #endif
 
-
 !
 !--- draw absorbing boundaries with a thick color line
 !
@@ -2144,11 +2139,11 @@
   write(24,*) '0.10 CM setlinewidth'
   write(24,*) '% uncomment this when zooming on parts of the mesh'
   write(24,*) '% 0.02 CM setlinewidth'
-  end if
+  endif
 
   if ( myrank /= 0 .and. anyabs ) then
      allocate(coorg_send(4,4*nelemabs))
-  end if
+  endif
   buffer_offset = 0
 
   if ( anyabs ) then
@@ -2191,15 +2186,14 @@
      coorg_send(2,buffer_offset) = z1
      coorg_send(3,buffer_offset) = x2
      coorg_send(4,buffer_offset) = z2
-  end if
+  endif
 
   endif
   enddo
 
   enddo
-  end if
+  endif
 
-
 #ifdef USE_MPI
   if (myrank == 0 ) then
 
@@ -2215,31 +2209,29 @@
            buffer_offset = buffer_offset + 1
            write(24,602) coorg_recv(1,buffer_offset), coorg_recv(2,buffer_offset), &
                 coorg_recv(3,buffer_offset), coorg_recv(4,buffer_offset)
-        end do
+        enddo
         deallocate(coorg_recv)
-        end if
-     end do
+        endif
+     enddo
   else
      call MPI_SEND (buffer_offset, 1, MPI_INTEGER, 0, 44, MPI_COMM_WORLD, ier)
      if ( buffer_offset > 0 ) then
      call MPI_SEND (coorg_send(1,1), 4*buffer_offset, &
           MPI_DOUBLE_PRECISION, 0, 44, MPI_COMM_WORLD, ier)
      deallocate(coorg_send)
-     end if
+     endif
 
-  end if
+  endif
 
 #endif
 
-
   if ( myrank == 0 ) then
   write(24,*) '0 setgray'
   write(24,*) '0.01 CM setlinewidth'
-  end if
+  endif
 
   endif
 
-
 !
 !--- draw free surface with a thick color line
 !
@@ -2255,11 +2247,11 @@
   write(24,*) '0.10 CM setlinewidth'
   write(24,*) '% uncomment this when zooming on parts of the mesh'
   write(24,*) '% 0.02 CM setlinewidth'
-  end if
+  endif
 
   if ( myrank /= 0 .and. nelem_acoustic_surface > 0 ) then
      allocate(coorg_send(4,4*nelem_acoustic_surface))
-  end if
+  endif
   buffer_offset = 0
 
   if ( nelem_acoustic_surface > 0 ) then
@@ -2282,12 +2274,11 @@
      coorg_send(2,buffer_offset) = z1
      coorg_send(3,buffer_offset) = x2
      coorg_send(4,buffer_offset) = z2
-  end if
+  endif
 
   enddo
-  end if
+  endif
 
-
 #ifdef USE_MPI
   if (myrank == 0 ) then
 
@@ -2303,30 +2294,27 @@
            buffer_offset = buffer_offset + 1
            write(24,602) coorg_recv(1,buffer_offset), coorg_recv(2,buffer_offset), &
                 coorg_recv(3,buffer_offset), coorg_recv(4,buffer_offset)
-        end do
+        enddo
         deallocate(coorg_recv)
-        end if
-     end do
+        endif
+     enddo
   else
      call MPI_SEND (buffer_offset, 1, MPI_INTEGER, 0, 44, MPI_COMM_WORLD, ier)
      if ( buffer_offset > 0 ) then
      call MPI_SEND (coorg_send(1,1), 4*buffer_offset, &
           MPI_DOUBLE_PRECISION, 0, 44, MPI_COMM_WORLD, ier)
      deallocate(coorg_send)
-     end if
+     endif
 
-  end if
+  endif
 
 #endif
 
-
   if ( myrank == 0 ) then
-  write(24,*) '0 setgray'
-  write(24,*) '0.01 CM setlinewidth'
-  end if
+    write(24,*) '0 setgray'
+    write(24,*) '0.01 CM setlinewidth'
+  endif
 
-
-
 !
 !----  draw the fluid-solid coupling edges with a thick color line
 !
@@ -2345,11 +2333,9 @@
   write(24,*) '0.10 CM setlinewidth'
   write(24,*) '% uncomment this when zooming on parts of the mesh'
   write(24,*) '% 0.02 CM setlinewidth'
-  end if
+  endif
 
-  if ( myrank /= 0 .and. num_fluid_solid_edges > 0 ) then
-     allocate(coorg_send(4,num_fluid_solid_edges))
-  end if
+  if ( myrank /= 0 .and. num_fluid_solid_edges > 0 ) allocate(coorg_send(4,num_fluid_solid_edges))
   buffer_offset = 0
 
 ! loop on all the coupling edges
@@ -2362,7 +2348,7 @@
 ! use pink color
   if ( myrank == 0 ) then
   write(24,*) '1 0.75 0.8 RG'
-  end if
+  endif
 
   if(iedge == ITOP) then
     ideb = 3
@@ -2396,11 +2382,10 @@
      coorg_send(2,buffer_offset) = z1
      coorg_send(3,buffer_offset) = x2
      coorg_send(4,buffer_offset) = z2
-  end if
+  endif
 
   enddo
 
-
 #ifdef USE_MPI
   if (myrank == 0 ) then
 
@@ -2417,30 +2402,28 @@
            write(24,*) '1 0.75 0.8 RG'
            write(24,602) coorg_recv(1,buffer_offset), coorg_recv(2,buffer_offset), &
                 coorg_recv(3,buffer_offset), coorg_recv(4,buffer_offset)
-        end do
+        enddo
         deallocate(coorg_recv)
-        end if
-     end do
+        endif
+     enddo
   else
      call MPI_SEND (buffer_offset, 1, MPI_INTEGER, 0, 45, MPI_COMM_WORLD, ier)
      if ( buffer_offset > 0 ) then
      call MPI_SEND (coorg_send(1,1), 4*buffer_offset, &
           MPI_DOUBLE_PRECISION, 0, 45, MPI_COMM_WORLD, ier)
      deallocate(coorg_send)
-     end if
-  end if
+     endif
+  endif
 
 #endif
 
-
   if ( myrank == 0 ) then
-  write(24,*) '0 setgray'
-  write(24,*) '0.01 CM setlinewidth'
-  end if
+    write(24,*) '0 setgray'
+    write(24,*) '0.01 CM setlinewidth'
+  endif
 
   endif
 
-
 !
 !----  draw the normalized vector field
 !
@@ -2462,13 +2445,11 @@
   else
         write(24,*) '0 setgray'
   endif
-  end if
+  endif
 
   if(interpol) then
 
-  if ( myrank == 0 ) then
-  write(IOUT,*) 'Interpolating the vector field...'
-  end if
+  if (myrank == 0) write(IOUT,*) 'Interpolating the vector field...'
 
 ! option to plot only lowerleft corner value to avoid very large files if dense meshes
   if(plot_lowerleft_corner_only) then
@@ -2480,7 +2461,7 @@
   if ( myrank /= 0 ) then
      allocate(coorg_send(8,nspec*pointsdisp_loop*pointsdisp_loop))
 
-  end if
+  endif
   buffer_offset = 0
 
   do ispec=1,nspec
@@ -2580,7 +2561,7 @@
      coorg_send(6,buffer_offset) = z2
      coorg_send(7,buffer_offset) = x1
      coorg_send(8,buffer_offset) = z1
-  end if
+  endif
 
   endif
 
@@ -2588,7 +2569,6 @@
   enddo
   enddo
 
-
 #ifdef USE_MPI
   if (myrank == 0 ) then
 
@@ -2625,19 +2605,19 @@
              enddo
              ch2(index_char) = ch1(line_length)
              write(24,"(100(a1))") (ch2(ii), ii=1,index_char)
-          end do
+          enddo
           deallocate(coorg_recv)
-          end if
-       end do
+          endif
+       enddo
     else
        call MPI_SEND (buffer_offset, 1, MPI_INTEGER, 0, 46, MPI_COMM_WORLD, ier)
        if ( buffer_offset > 0 ) then
        call MPI_SEND (coorg_send(1,1), 8*buffer_offset, &
             MPI_DOUBLE_PRECISION, 0, 46, MPI_COMM_WORLD, ier)
        deallocate(coorg_send)
-       end if
+       endif
 
-  end if
+  endif
 
 #endif
 
@@ -2648,7 +2628,7 @@
   if ( myrank /= 0 ) then
      allocate(coorg_send(8,npoin))
 
-  end if
+  endif
   buffer_offset = 0
 
   do ipoin=1,npoin
@@ -2722,12 +2702,11 @@
      coorg_send(6,buffer_offset) = z2
      coorg_send(7,buffer_offset) = x1
      coorg_send(8,buffer_offset) = z1
-  end if
   endif
+  endif
 
   enddo
 
-
 #ifdef USE_MPI
   if (myrank == 0 ) then
 
@@ -2764,18 +2743,18 @@
              enddo
              ch2(index_char) = ch1(line_length)
              write(24,"(100(a1))") (ch2(ii), ii=1,index_char)
-          end do
+          enddo
           deallocate(coorg_recv)
-          end if
-       end do
+          endif
+       enddo
     else
        call MPI_SEND (buffer_offset, 1, MPI_INTEGER, 0, 47, MPI_COMM_WORLD, ier)
        if ( buffer_offset > 0 ) then
        call MPI_SEND (coorg_send(1,1), 8*buffer_offset, &
             MPI_DOUBLE_PRECISION, 0, 47, MPI_COMM_WORLD, ier)
        deallocate(coorg_send)
-       end if
-  end if
+       endif
+  endif
 
 #endif
 
@@ -2827,7 +2806,7 @@
   write(24,*) 'showpage'
 
   close(24)
-  end if
+  endif
 
  10  format('%!PS-Adobe-2.0',/,'%%',/,'%% Title: ',a50,/,'%% Created by: Specfem2D',/,'%% Author: Dimitri Komatitsch',/,'%%')
  600 format(f6.3,' neg CM 0 MR (Time =',f8.3,' s) show')

Modified: seismo/2D/SPECFEM2D/trunk/specfem2D.F90
===================================================================
--- seismo/2D/SPECFEM2D/trunk/specfem2D.F90	2008-07-21 00:41:48 UTC (rev 12447)
+++ seismo/2D/SPECFEM2D/trunk/specfem2D.F90	2008-07-21 12:01:00 UTC (rev 12448)
@@ -144,6 +144,10 @@
 
   implicit none
 
+! implement Cuthill-McKee or replace with identity permutation
+  logical, parameter :: ACTUALLY_IMPLEMENT_PERM_OUT = .false.
+  logical, parameter :: ACTUALLY_IMPLEMENT_PERM_INN = .false.
+
   include "constants.h"
 #ifdef USE_MPI
   include "mpif.h"
@@ -219,7 +223,7 @@
 
   double precision, dimension(:,:,:,:), allocatable :: dershape2D,dershape2D_display
 
-  integer, dimension(:,:,:), allocatable :: ibool
+  integer, dimension(:,:,:), allocatable :: ibool,ibool_outer,ibool_inner
   integer, dimension(:,:), allocatable  :: knods
   integer, dimension(:), allocatable :: kmato,numabs, &
      ibegin_bottom,iend_bottom,ibegin_top,iend_top,jbegin_left,jend_left,jbegin_right,jend_right
@@ -384,7 +388,12 @@
   logical :: over_critical_angle
 
 ! further reduce cache misses inner/outer in two passes in the case of an MPI simulation
-  integer :: ipass,ispec_inner,ispec_outer,NUMBER_OF_PASSES
+  integer :: ipass,ispec_inner,ispec_outer,NUMBER_OF_PASSES,kmato_read,my_interfaces_read
+!!!!!!!!!!!!!!!!!!!  integer :: ioldvalue,inewvalue
+  integer :: npoin_outer,npoin_inner
+!!!!!!!!!!!!!!!!!!!  logical :: foundthisvalue
+  integer, dimension(:), allocatable :: knods_read
+  integer, dimension(:), allocatable :: perm,antecedent_list,check_perm
 
 !***********************************************************************
 !
@@ -412,17 +421,27 @@
   iproc = 0
   ispec_inner = 0
   ispec_outer = 0
-  NUMBER_OF_PASSES = 1
 
+!! DK DK changed this
+  if(PERFORM_CUTHILL_MCKEE) then
+    NUMBER_OF_PASSES = 2
+  else
+    NUMBER_OF_PASSES = 1
+  endif
+
 #endif
 
+! determine if we write to file instead of standard output
+  if(IOUT /= ISTANDARD_OUTPUT) open(IOUT,file='simulation_results.txt',status='unknown')
+
+! reduction of cache misses inner/outer in two passes
+!! DK DK added this for third pass but included in first for simplicity
+  do ipass = 1,NUMBER_OF_PASSES
+
   write(prname,230) myrank
  230 format('./OUTPUT_FILES/Database',i5.5)
   open(unit=IIN,file=prname,status='old',action='read')
 
-! determine if we write to file instead of standard output
-  if(IOUT /= ISTANDARD_OUTPUT) open(IOUT,file='simulation_results.txt',status='unknown')
-
 !
 !---  read job title and skip remaining titles of the input file
 !
@@ -543,7 +562,7 @@
 !
 !---- read the spectral macrobloc nodal coordinates
 !
-  allocate(coorg(NDIM,npgeo))
+  if(ipass == 1) allocate(coorg(NDIM,npgeo))
 
   ipoin = 0
   read(IIN,"(a80)") datlin
@@ -566,6 +585,7 @@
 !
 !---- allocate arrays
 !
+if(ipass == 1) then
   allocate(shape2D(ngnod,NGLLX,NGLLZ))
   allocate(dershape2D(NDIM,ngnod,NGLLX,NGLLZ))
   allocate(shape2D_display(ngnod,pointsdisp,pointsdisp))
@@ -590,6 +610,7 @@
   allocate(inv_tau_sigma_nu2(N_SLS))
   allocate(phi_nu1(N_SLS))
   allocate(phi_nu2(N_SLS))
+endif
 
 ! --- allocate arrays for absorbing boundary conditions
   if(nelemabs <= 0) then
@@ -598,6 +619,7 @@
   else
     anyabs = .true.
   endif
+if(ipass == 1) then
   allocate(numabs(nelemabs))
   allocate(codeabs(4,nelemabs))
 
@@ -610,6 +632,7 @@
   allocate(jend_left(nelemabs))
   allocate(jbegin_right(nelemabs))
   allocate(jend_right(nelemabs))
+endif
 
 !
 !---- print element group main parameters
@@ -630,9 +653,22 @@
 !
   n = 0
   read(IIN,"(a80)") datlin
+!! DK DK changed
+  allocate(knods_read(ngnod))
   do ispec = 1,nspec
-    read(IIN,*) n,kmato(n),(knods(k,n), k=1,ngnod)
+!! DK DK changed    read(IIN,*) n,kmato(n),(knods(k,n), k=1,ngnod)
+    read(IIN,*) n,kmato_read,(knods_read(k), k=1,ngnod)
+if(ipass == 1) then
+  kmato(n) = kmato_read
+  knods(:,n)= knods_read(:)
+else if(ipass == 2) then
+  kmato(perm(antecedent_list(n))) = kmato_read
+  knods(:,perm(antecedent_list(n)))= knods_read(:)
+else
+  stop 'error: maximum is 2 passes'
+endif
   enddo
+  deallocate(knods_read)
 
 !
 !----  determine if each spectral element is elastic or acoustic
@@ -657,6 +693,7 @@
   endif
 
 ! allocate memory variables for attenuation
+if(ipass == 1) then
   allocate(e1(NGLLX,NGLLZ,nspec_allocate,N_SLS))
   allocate(e11(NGLLX,NGLLZ,nspec_allocate,N_SLS))
   allocate(e13(NGLLX,NGLLZ,nspec_allocate,N_SLS))
@@ -672,6 +709,7 @@
   allocate(duz_dzl_np1(NGLLX,NGLLZ,nspec_allocate))
   allocate(duz_dxl_np1(NGLLX,NGLLZ,nspec_allocate))
   allocate(dux_dzl_np1(NGLLX,NGLLZ,nspec_allocate))
+endif
 
 ! define the attenuation constants
   call attenuation_model(N_SLS,Qp_attenuation,Qs_attenuation,f0_attenuation, &
@@ -683,6 +721,7 @@
   read(IIN,"(a80)") datlin
   read(IIN,*) ninterface, max_interface_size
   if ( ninterface > 0 ) then
+if(ipass == 1) then
      allocate(my_neighbours(ninterface))
      allocate(my_nelmnts_neighbours(ninterface))
      allocate(my_interfaces(4,max_interface_size,ninterface))
@@ -692,13 +731,23 @@
      allocate(nibool_interfaces_elastic(ninterface))
      allocate(inum_interfaces_acoustic(ninterface))
      allocate(inum_interfaces_elastic(ninterface))
+endif
 
      do num_interface = 1, ninterface
         read(IIN,*) my_neighbours(num_interface), my_nelmnts_neighbours(num_interface)
         do ie = 1, my_nelmnts_neighbours(num_interface)
-           read(IIN,*) my_interfaces(1,ie,num_interface), my_interfaces(2,ie,num_interface), &
+           read(IIN,*) my_interfaces_read, my_interfaces(2,ie,num_interface), &
                 my_interfaces(3,ie,num_interface), my_interfaces(4,ie,num_interface)
 
+!! DK DK added this
+           if(ipass == 1) then
+             my_interfaces(1,ie,num_interface) = my_interfaces_read
+           else if(ipass == 2) then
+             my_interfaces(1,ie,num_interface) = perm(antecedent_list(my_interfaces_read))
+           else
+             stop 'error: maximum number of passes is 2'
+           endif
+
         enddo
      enddo
   endif
@@ -728,19 +777,19 @@
 !
   read(IIN,"(a80)") datlin
   if(nelem_acoustic_surface > 0) then
-     allocate(acoustic_edges(4,nelem_acoustic_surface))
+     if(ipass == 1) allocate(acoustic_edges(4,nelem_acoustic_surface))
       do inum = 1,nelem_acoustic_surface
         read(IIN,*) acoustic_edges(1,inum), acoustic_edges(2,inum), acoustic_edges(3,inum), &
              acoustic_edges(4,inum)
      enddo
-     allocate(acoustic_surface(5,nelem_acoustic_surface))
+     if(ipass == 1) allocate(acoustic_surface(5,nelem_acoustic_surface))
      call construct_acoustic_surface ( nspec, ngnod, knods, nelem_acoustic_surface, &
           acoustic_edges, acoustic_surface)
     write(IOUT,*)
     write(IOUT,*) 'Number of free surface elements: ',nelem_acoustic_surface
   else
-    allocate(acoustic_edges(4,1))
-    allocate(acoustic_surface(5,1))
+    if(ipass == 1) allocate(acoustic_edges(4,1))
+    if(ipass == 1) allocate(acoustic_surface(5,1))
   endif
 
 !
@@ -748,19 +797,22 @@
 !
   read(IIN,"(a80)") datlin
   if ( num_fluid_solid_edges > 0 ) then
+if(ipass == 1) then
      allocate(fluid_solid_acoustic_ispec(num_fluid_solid_edges))
      allocate(fluid_solid_acoustic_iedge(num_fluid_solid_edges))
      allocate(fluid_solid_elastic_ispec(num_fluid_solid_edges))
      allocate(fluid_solid_elastic_iedge(num_fluid_solid_edges))
+endif
      do inum = 1, num_fluid_solid_edges
         read(IIN,*) fluid_solid_acoustic_ispec(inum), fluid_solid_elastic_ispec(inum)
      enddo
   else
+if(ipass == 1) then
      allocate(fluid_solid_acoustic_ispec(1))
      allocate(fluid_solid_acoustic_iedge(1))
      allocate(fluid_solid_elastic_ispec(1))
      allocate(fluid_solid_elastic_iedge(1))
-
+endif
   endif
 
 !
@@ -789,41 +841,56 @@
   endif
 
 ! reduction of cache misses inner/outer in two passes
-  do ipass = 1,NUMBER_OF_PASSES
+!!!!!!!!!! DK DK moved this above      do ipass = 1,NUMBER_OF_PASSES
 
 ! create a new indirect addressing array to reduce cache misses in memory access in the solver
   if(ipass == 1) then
 
-  allocate(copy_ibool_ori(NGLLX,NGLLZ,nspec))
-  allocate(mask_ibool(npoin))
+! allocate(copy_ibool_ori(NGLLX,NGLLZ,nspec))
+! allocate(mask_ibool(npoin))
 
-  mask_ibool(:) = -1
-  copy_ibool_ori(:,:,:) = ibool(:,:,:)
+! mask_ibool(:) = -1
+! copy_ibool_ori(:,:,:) = ibool(:,:,:)
 
-  inumber = 0
-  do ispec=1,nspec
-    do j=1,NGLLZ
-      do i=1,NGLLX
-        if(mask_ibool(copy_ibool_ori(i,j,ispec)) == -1) then
+! inumber = 0
+! do ispec=1,nspec
+!   do j=1,NGLLZ
+!     do i=1,NGLLX
+!       if(mask_ibool(copy_ibool_ori(i,j,ispec)) == -1) then
 ! create a new point
-          inumber = inumber + 1
-          ibool(i,j,ispec) = inumber
-          mask_ibool(copy_ibool_ori(i,j,ispec)) = inumber
-        else
+!         inumber = inumber + 1
+!         ibool(i,j,ispec) = inumber
+!         mask_ibool(copy_ibool_ori(i,j,ispec)) = inumber
+!       else
 ! use an existing point created previously
-          ibool(i,j,ispec) = mask_ibool(copy_ibool_ori(i,j,ispec))
-        endif
-      enddo
-    enddo
-  enddo
+!         ibool(i,j,ispec) = mask_ibool(copy_ibool_ori(i,j,ispec))
+!       endif
+!     enddo
+!   enddo
+! enddo
 
-  if(NUMBER_OF_PASSES == 1) then
-    deallocate(copy_ibool_ori)
-    deallocate(mask_ibool)
-  endif
+!! DK DK added call to Cuthill-McKee
+!!!!!!!!!  allocate(perm(nspec))
+!!!!!!!!!  call get_perm(ibool,perm,LIMIT_MULTI_CUTHILL,nspec,npoin)
 
+!! DK DK debug perm identite
+! do ispec = 1,nspec
+!   perm(ispec) = ispec
+! enddo
+
+! if(NUMBER_OF_PASSES == 1) then
+!   deallocate(copy_ibool_ori)
+!   deallocate(mask_ibool)
+! endif
+
   else if(ipass == 2) then
 
+!! DK DK added this
+  deallocate(perm)
+
+  allocate(copy_ibool_ori(NGLLX,NGLLZ,nspec))
+  allocate(mask_ibool(npoin))
+
   mask_ibool(:) = -1
   copy_ibool_ori(:,:,:) = ibool(:,:,:)
 
@@ -832,11 +899,12 @@
 ! first reduce cache misses in outer elements, since they are taken first
 
 ! loop over spectral elements
-  do ispec_outer = 1,nspec_outer
+!!!!!!!!!!!!!!!  do ispec_outer = 1,nspec_outer
 
 ! get global numbering for inner or outer elements
-    ispec = ispec_outer_to_glob(ispec_outer)
+!!!!!!!!!!!!!!!    ispec = perm(ispec_outer_to_glob(ispec_outer))
 
+  do ispec = 1,nspec_outer
     do j=1,NGLLZ
       do i=1,NGLLX
         if(mask_ibool(copy_ibool_ori(i,j,ispec)) == -1) then
@@ -855,11 +923,12 @@
 ! then reduce cache misses in inner elements, since they are taken second
 
 ! loop over spectral elements
-  do ispec_inner = 1,nspec_inner
+!!!!!!!!!!!!!!!!!!  do ispec_inner = 1,nspec_inner
 
 ! get global numbering for inner or outer elements
-    ispec = ispec_inner_to_glob(ispec_inner)
+!!!!!!!!!!!!!!!!!!    ispec = perm(ispec_inner_to_glob(ispec_inner))
 
+  do ispec = nspec_outer+1,nspec
     do j=1,NGLLZ
       do i=1,NGLLX
         if(mask_ibool(copy_ibool_ori(i,j,ispec)) == -1) then
@@ -1244,17 +1313,12 @@
     if(ipass == 1) allocate(mask_ispec_inner_outer(nspec))
     mask_ispec_inner_outer(:) = .false.
 
-    call prepare_assemble_MPI (nspec,ibool, &
-          knods, ngnod, &
-          npoin, elastic, &
-          ninterface, max_interface_size, &
-          my_nelmnts_neighbours, my_interfaces, &
+    call prepare_assemble_MPI (nspec,ibool,knods, ngnod,npoin,elastic, &
+          ninterface, max_interface_size,my_nelmnts_neighbours, my_interfaces, &
           ibool_interfaces_acoustic, ibool_interfaces_elastic, &
           nibool_interfaces_acoustic, nibool_interfaces_elastic, &
           inum_interfaces_acoustic, inum_interfaces_elastic, &
-          ninterface_acoustic, ninterface_elastic, &
-          mask_ispec_inner_outer &
-          )
+          ninterface_acoustic, ninterface_elastic,mask_ispec_inner_outer)
 
     nspec_outer = count(mask_ispec_inner_outer)
     nspec_inner = nspec - nspec_outer
@@ -1263,6 +1327,7 @@
     if(ipass == 1) allocate(ispec_inner_to_glob(nspec_inner))
 
 ! building of corresponding arrays between inner/outer elements and their global number
+if(ipass == 1) then
     num_ispec_outer = 0
     num_ispec_inner = 0
     do ispec = 1, nspec
@@ -1272,9 +1337,9 @@
       else
         num_ispec_inner = num_ispec_inner + 1
         ispec_inner_to_glob(num_ispec_inner) = ispec
-
       endif
     enddo
+endif
 
   max_ibool_interfaces_size_ac = maxval(nibool_interfaces_acoustic(:))
   max_ibool_interfaces_size_el = NDIM*maxval(nibool_interfaces_elastic(:))
@@ -1342,6 +1407,197 @@
 
 #endif
 
+!! DK DK added call to Cuthill-McKee
+!! DK DK 3333333333333333333333333333333333333333333333333
+
+if(ipass == 1) then
+
+  allocate(antecedent_list(nspec))
+
+! loop over spectral elements
+  do ispec_outer = 1,nspec_outer
+! get global numbering for inner or outer elements
+    ispec = ispec_outer_to_glob(ispec_outer)
+    antecedent_list(ispec) = ispec_outer
+  enddo
+
+! loop over spectral elements
+  do ispec_inner = 1,nspec_inner
+! get global numbering for inner or outer elements
+    ispec = ispec_inner_to_glob(ispec_inner)
+    antecedent_list(ispec) = nspec_outer + ispec_inner
+  enddo
+
+  allocate(ibool_outer(NGLLX,NGLLZ,nspec_outer))
+  allocate(ibool_inner(NGLLX,NGLLZ,nspec_inner))
+
+! loop over spectral elements
+  do ispec_outer = 1,nspec_outer
+! get global numbering for inner or outer elements
+    ispec = ispec_outer_to_glob(ispec_outer)
+    ibool_outer(:,:,ispec_outer) = ibool(:,:,ispec)
+  enddo
+
+! loop over spectral elements
+  do ispec_inner = 1,nspec_inner
+! get global numbering for inner or outer elements
+    ispec = ispec_inner_to_glob(ispec_inner)
+    ibool_inner(:,:,ispec_inner) = ibool(:,:,ispec)
+  enddo
+
+!! DK DK re-add renumbering of new ibool to use consecutive values only for outer
+!! DK DK this too slow, better algorithm below
+! inewvalue = 1
+! do ioldvalue = minval(ibool_outer),maxval(ibool_outer)
+!   foundthisvalue = .false.
+!   do ispec_outer = 1,nspec_outer
+!     do j = 1,NGLLZ
+!       do i = 1,NGLLX
+!         if (ibool_outer(i,j,ispec_outer) == ioldvalue) then
+!           ibool_outer(i,j,ispec_outer) = inewvalue
+!           foundthisvalue = .true.
+!         endif
+!       enddo
+!     enddo
+!   enddo
+!   if(foundthisvalue) inewvalue = inewvalue + 1
+! enddo
+
+  allocate(copy_ibool_ori(NGLLX,NGLLZ,nspec_outer))
+  allocate(mask_ibool(npoin))
+
+  mask_ibool(:) = -1
+  copy_ibool_ori(:,:,:) = ibool_outer(:,:,:)
+
+  inumber = 0
+
+  do ispec = 1,nspec_outer
+    do j=1,NGLLZ
+      do i=1,NGLLX
+        if(mask_ibool(copy_ibool_ori(i,j,ispec)) == -1) then
+! create a new point
+          inumber = inumber + 1
+          ibool_outer(i,j,ispec) = inumber
+          mask_ibool(copy_ibool_ori(i,j,ispec)) = inumber
+        else
+! use an existing point created previously
+          ibool_outer(i,j,ispec) = mask_ibool(copy_ibool_ori(i,j,ispec))
+        endif
+      enddo
+    enddo
+  enddo
+
+  deallocate(copy_ibool_ori)
+  deallocate(mask_ibool)
+
+! the total number of points without multiples in this region is now known
+  npoin_outer = maxval(ibool_outer)
+
+!! DK DK re-add renumbering of new ibool to use consecutive values only for inner
+!! DK DK this too slow, better algorithm below
+! inewvalue = 1
+! do ioldvalue = minval(ibool_inner),maxval(ibool_inner)
+!   foundthisvalue = .false.
+!   do ispec_inner = 1,nspec_inner
+!     do j = 1,NGLLZ
+!       do i = 1,NGLLX
+!         if (ibool_inner(i,j,ispec_inner) == ioldvalue) then
+!           ibool_inner(i,j,ispec_inner) = inewvalue
+!           foundthisvalue = .true.
+!         endif
+!       enddo
+!     enddo
+!   enddo
+!   if(foundthisvalue) inewvalue = inewvalue + 1
+! enddo
+
+  allocate(copy_ibool_ori(NGLLX,NGLLZ,nspec_inner))
+  allocate(mask_ibool(npoin))
+
+  mask_ibool(:) = -1
+  copy_ibool_ori(:,:,:) = ibool_inner(:,:,:)
+
+  inumber = 0
+
+  do ispec = 1,nspec_inner
+    do j=1,NGLLZ
+      do i=1,NGLLX
+        if(mask_ibool(copy_ibool_ori(i,j,ispec)) == -1) then
+! create a new point
+          inumber = inumber + 1
+          ibool_inner(i,j,ispec) = inumber
+          mask_ibool(copy_ibool_ori(i,j,ispec)) = inumber
+        else
+! use an existing point created previously
+          ibool_inner(i,j,ispec) = mask_ibool(copy_ibool_ori(i,j,ispec))
+        endif
+      enddo
+    enddo
+  enddo
+
+  deallocate(copy_ibool_ori)
+  deallocate(mask_ibool)
+
+! the total number of points without multiples in this region is now known
+  npoin_inner = maxval(ibool_inner)
+
+!!! 4444444444444444444444444444444444
+
+  allocate(perm(nspec))
+
+  if(ACTUALLY_IMPLEMENT_PERM_OUT) then
+
+  allocate(check_perm(nspec_outer))
+  call get_perm(ibool_outer,perm(1:nspec_outer),LIMIT_MULTI_CUTHILL,nspec_outer,npoin_outer)
+!! DK DK check that the permutation obtained is bijective
+  check_perm(:) = -1
+  do ispec = 1,nspec_outer
+    check_perm(perm(ispec)) = ispec
+  enddo
+  if(minval(check_perm) /= 1) stop 'minval check_perm is incorrect for outer'
+  if(maxval(check_perm) /= nspec_outer) stop 'maxval check_perm is incorrect for outer'
+  deallocate(check_perm)
+  deallocate(ibool_outer)
+
+!!!!!!!! DK DK 3333333333333333 YYYYYYYYYYY identity perm for now
+  else
+    do ispec = 1,nspec_outer
+      perm(ispec) = ispec
+    enddo
+  endif
+
+  if(ACTUALLY_IMPLEMENT_PERM_INN) then
+
+  allocate(check_perm(nspec_inner))
+  call get_perm(ibool_inner,perm(nspec_outer+1:nspec),LIMIT_MULTI_CUTHILL,nspec_inner,npoin_inner)
+!! DK DK check that the permutation obtained is bijective
+  check_perm(:) = -1
+  do ispec = 1,nspec_inner
+    check_perm(perm(nspec_outer+ispec)) = ispec
+  enddo
+  if(minval(check_perm) /= 1) stop 'minval check_perm is incorrect for inner'
+  if(maxval(check_perm) /= nspec_inner) stop 'maxval check_perm is incorrect for inner'
+  deallocate(check_perm)
+!! DK DK add the right offset
+  perm(nspec_outer+1:nspec) = perm(nspec_outer+1:nspec) + nspec_outer
+
+!!!!!!!!!!!!! call get_perm(ibool,perm,LIMIT_MULTI_CUTHILL,nspec,npoin)
+
+  deallocate(ibool_inner)
+!!!!!!!!!!!!!!!!!!!!!  deallocate(antecedent_list)
+
+!!!!!!!! DK DK 3333333333333333 YYYYYYYYYYY identity perm for now
+  else
+    do ispec = nspec_outer+1,nspec
+      perm(ispec) = ispec
+    enddo
+  endif
+
+endif
+
+!! DK DK for ParaVer tests
+ call MPI_BARRIER(MPI_COMM_WORLD,ier)
+
   enddo ! end of further reduction of cache misses inner/outer in two passes
 
 ! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally
@@ -1356,7 +1612,8 @@
 ! check the mesh, stability and number of points per wavelength
   call checkgrid(vpext,vsext,rhoext,density,elastcoef,ibool,kmato,coord,npoin,vpmin,vpmax, &
                  assign_external_model,nspec,numat,deltat,f0,t0,initialfield,time_function_type, &
-                 coorg,xinterp,zinterp,shape2D_display,knods,simulation_title,npgeo,pointsdisp,ngnod,any_elastic,myrank,nproc)
+                 coorg,xinterp,zinterp,shape2D_display,knods,simulation_title,npgeo,pointsdisp,ngnod,any_elastic,myrank,nproc, &
+                 nspec_outer,nspec_inner)
 
 ! convert receiver angle to radians
   anglerec = anglerec * pi / 180.d0
@@ -2184,6 +2441,9 @@
 ! ************* MAIN LOOP OVER THE TIME STEPS *************
 ! *********************************************************
 
+!! DK DK for ParaVer tests
+ call MPI_BARRIER(MPI_COMM_WORLD,ier)
+
   do it = 1,NSTEP
 
 ! update position in seismograms
@@ -2224,7 +2484,7 @@
                hprime_zz,hprimewgll_zz,wxgll,wzgll, &
                ibegin_bottom,iend_bottom,ibegin_top,iend_top, &
                jbegin_left,jend_left,jbegin_right,jend_right, &
-               nspec_outer, ispec_outer_to_glob, .true.)
+               nspec_outer, .true.)
 
  endif ! end of test if any acoustic element
 
@@ -2315,7 +2575,7 @@
                hprime_zz,hprimewgll_zz,wxgll,wzgll, &
                ibegin_bottom,iend_bottom,ibegin_top,iend_top, &
                jbegin_left,jend_left,jbegin_right,jend_right, &
-               nspec_inner, ispec_inner_to_glob, .false.)
+               nspec_outer, .false.)
    endif
 
 ! assembling potential_dot_dot for acoustic elements (receive)
@@ -2379,10 +2639,10 @@
                e1,e11,e13,dux_dxl_n,duz_dzl_n,duz_dxl_n,dux_dzl_n, &
                dux_dxl_np1,duz_dzl_np1,duz_dxl_np1,dux_dzl_np1,hprime_xx,hprimewgll_xx, &
                hprime_zz,hprimewgll_zz,wxgll,wzgll,inv_tau_sigma_nu1,phi_nu1,inv_tau_sigma_nu2,phi_nu2,Mu_nu1,Mu_nu2,N_SLS, &
-               nspec_outer, ispec_outer_to_glob,.true.,deltat,coord,add_Bielak_conditions, x0_source, z0_source, &
+               nspec_outer, .true.,deltat,coord,add_Bielak_conditions, x0_source, z0_source, &
                A_plane, B_plane, C_plane, angleforce_refl, c_inc, c_refl, time_offset, f0,&
-               v0x_left(:,it),v0z_left(:,it),v0x_right(:,it),v0z_right(:,it),v0x_bot(:,it),v0z_bot(:,it), &
-               t0x_left(:,it),t0z_left(:,it),t0x_right(:,it),t0z_right(:,it),t0x_bot(:,it),t0z_bot(:,it), &
+               v0x_left(1,it),v0z_left(1,it),v0x_right(1,it),v0z_right(1,it),v0x_bot(1,it),v0z_bot(1,it), &
+               t0x_left(1,it),t0z_left(1,it),t0x_right(1,it),t0z_right(1,it),t0x_bot(1,it),t0z_bot(1,it), &
                count_left,count_right,count_bot,over_critical_angle)
 
 ! *********************************************************
@@ -2471,10 +2731,10 @@
                e1,e11,e13,dux_dxl_n,duz_dzl_n,duz_dxl_n,dux_dzl_n, &
                dux_dxl_np1,duz_dzl_np1,duz_dxl_np1,dux_dzl_np1,hprime_xx,hprimewgll_xx, &
                hprime_zz,hprimewgll_zz,wxgll,wzgll,inv_tau_sigma_nu1,phi_nu1,inv_tau_sigma_nu2,phi_nu2,Mu_nu1,Mu_nu2,N_SLS, &
-               nspec_inner, ispec_inner_to_glob,.false.,deltat,coord,add_Bielak_conditions, x0_source, z0_source, &
+               nspec_outer,.false.,deltat,coord,add_Bielak_conditions, x0_source, z0_source, &
                A_plane, B_plane, C_plane, angleforce_refl, c_inc, c_refl, time_offset, f0,&
-               v0x_left(:,it),v0z_left(:,it),v0x_right(:,it),v0z_right(:,it),v0x_bot(:,it),v0z_bot(:,it), &
-               t0x_left(:,it),t0z_left(:,it),t0x_right(:,it),t0z_right(:,it),t0x_bot(:,it),t0z_bot(:,it), &
+               v0x_left(1,it),v0z_left(1,it),v0x_right(1,it),v0z_right(1,it),v0x_bot(1,it),v0z_bot(1,it), &
+               t0x_left(1,it),t0z_left(1,it),t0x_right(1,it),t0z_right(1,it),t0x_bot(1,it),t0z_bot(1,it), &
                count_left,count_right,count_bot,over_critical_angle)
 
 
@@ -2707,7 +2967,7 @@
           colors,numbers,subsamp,imagetype,interpol,meshvect,modelvect, &
           boundvect,assign_external_model,cutsnaps,sizemax_arrows,nelemabs,numat,pointsdisp, &
           nspec,ngnod,coupled_acoustic_elastic,any_acoustic,plot_lowerleft_corner_only, &
-          fluid_solid_acoustic_ispec,fluid_solid_acoustic_iedge,num_fluid_solid_edges,myrank,nproc)
+          fluid_solid_acoustic_ispec,fluid_solid_acoustic_iedge,num_fluid_solid_edges,myrank,nproc,ier)
 
   else if(imagetype == 2) then
 
@@ -2725,7 +2985,7 @@
           colors,numbers,subsamp,imagetype,interpol,meshvect,modelvect, &
           boundvect,assign_external_model,cutsnaps,sizemax_arrows,nelemabs,numat,pointsdisp, &
           nspec,ngnod,coupled_acoustic_elastic,any_acoustic,plot_lowerleft_corner_only, &
-          fluid_solid_acoustic_ispec,fluid_solid_acoustic_iedge,num_fluid_solid_edges,myrank,nproc)
+          fluid_solid_acoustic_ispec,fluid_solid_acoustic_iedge,num_fluid_solid_edges,myrank,nproc,ier)
 
   else if(imagetype == 3) then
 
@@ -2743,7 +3003,7 @@
           colors,numbers,subsamp,imagetype,interpol,meshvect,modelvect, &
           boundvect,assign_external_model,cutsnaps,sizemax_arrows,nelemabs,numat,pointsdisp, &
           nspec,ngnod,coupled_acoustic_elastic,any_acoustic,plot_lowerleft_corner_only, &
-          fluid_solid_acoustic_ispec,fluid_solid_acoustic_iedge,num_fluid_solid_edges,myrank,nproc)
+          fluid_solid_acoustic_ispec,fluid_solid_acoustic_iedge,num_fluid_solid_edges,myrank,nproc,ier)
 
   else if(imagetype == 4) then
 
@@ -2877,6 +3137,9 @@
 
   endif
 
+!! DK DK for ParaVer tests
+ call MPI_BARRIER(MPI_COMM_WORLD,ier)
+
   enddo ! end of the main time loop
 
   deallocate(v0x_left)



More information about the cig-commits mailing list