[cig-commits] r22193 - seismo/3D/SPECFEM3D/trunk/src/specfem3D

xie.zhinan at geodynamics.org xie.zhinan at geodynamics.org
Sat Jun 8 05:12:16 PDT 2013


Author: xie.zhinan
Date: 2013-06-08 05:12:16 -0700 (Sat, 08 Jun 2013)
New Revision: 22193

Modified:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_acoustic.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_viscoelastic.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_acoustic.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_viscoelastic.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/noise_tomography.f90
Log:
finish splitting the GPU part in stage of compute_forces_acoustic and compute_forces_viscoelastic


Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_acoustic.f90	2013-06-08 10:25:46 UTC (rev 22192)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_acoustic.f90	2013-06-08 12:12:16 UTC (rev 22193)
@@ -35,8 +35,7 @@
                                   SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
                                   nrec,islice_selected_rec,ispec_selected_rec, &
                                   nadj_rec_local,adj_sourcearrays,b_potential_dot_dot_acoustic, &
-                                  NTSTEP_BETWEEN_READ_ADJSRC, &
-                                  GPU_MODE, Mesh_pointer )
+                                  NTSTEP_BETWEEN_READ_ADJSRC)
 
   use specfem_par,only: PRINT_SOURCE_TIME_FUNCTION,stf_used_total, &
                         xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,&
@@ -76,8 +75,6 @@
 
 !adjoint simulations
   integer:: SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT
-  logical:: GPU_MODE
-  integer(kind=8) :: Mesh_pointer
   integer:: nrec
   integer,dimension(nrec) :: islice_selected_rec,ispec_selected_rec
   integer:: nadj_rec_local
@@ -94,7 +91,6 @@
   real(kind=CUSTOM_REAL) stf_used,stf_used_total_all,time_source
   integer :: isource,iglob,ispec,i,j,k,ier
   integer :: irec_local,irec
-  double precision, dimension(NSOURCES) :: stf_pre_compute
 
 ! adjoint sources in SU format
   integer :: it_start,it_end
@@ -118,134 +114,100 @@
   if (SIMULATION_TYPE == 1 .and. nsources_local > 0) then
 
 !way 2
-     if(GPU_MODE) then
-        if( NSOURCES > 0 ) then
-           do isource = 1,NSOURCES
-              ! precomputes source time function factor
-              if(USE_FORCE_POINT_SOURCE) then
-                 if( USE_RICKER_TIME_FUNCTION ) then
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
-                 else
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_src(isource),hdur_tiny(isource))
-                 endif
-              else
-                 if( USE_RICKER_TIME_FUNCTION ) then
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
-                 else
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
-                 endif
-              endif
-           enddo
-           stf_used_total = stf_used_total + sum(stf_pre_compute(:))
-           ! only implements SIMTYPE=1 and NOISE_TOM=0
-           ! write(*,*) "fortran dt = ", dt
-           ! change dt -> DT
-           call compute_add_sources_ac_cuda(Mesh_pointer, phase_is_inner, &
-                NSOURCES, stf_pre_compute, myrank)
-        endif
+     ! adds acoustic sources
+     do isource = 1,NSOURCES
 
-     else ! .NOT. GPU_MODE
+        !   add the source (only if this proc carries the source)
+        if(myrank == islice_selected_source(isource)) then
 
-        ! adds acoustic sources
-        do isource = 1,NSOURCES
+           ispec = ispec_selected_source(isource)
 
-           !   add the source (only if this proc carries the source)
-           if(myrank == islice_selected_source(isource)) then
+           if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
 
-              ispec = ispec_selected_source(isource)
+              if( ispec_is_acoustic(ispec) ) then
 
-              if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+                 if(USE_FORCE_POINT_SOURCE) then
 
-                 if( ispec_is_acoustic(ispec) ) then
+                    ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+                    iglob = ibool(nint(xi_source(isource)), &
+                                  nint(eta_source(isource)), &
+                                  nint(gamma_source(isource)), &
+                                  ispec)
 
-                    if(USE_FORCE_POINT_SOURCE) then
+                    f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing FORCESOLUTION file format
 
-                       ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
-                       iglob = ibool(nint(xi_source(isource)), &
-                            nint(eta_source(isource)), &
-                            nint(gamma_source(isource)), &
-                            ispec)
+                    !if (it == 1 .and. myrank == 0) then
+                    !  write(IMAIN,*) 'using a source of dominant frequency ',f0
+                    !  write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+                    !  write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+                    !endif
 
-                       f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing FORCESOLUTION file format
+                    if( USE_RICKER_TIME_FUNCTION ) then
+                       stf_used = factor_force_source(isource) * sourcearrays(isource,1,i,j,k) * &
+                            comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),f0)
+                    else
+                       stf_used = factor_force_source(isource) * sourcearrays(isource,1,i,j,k) * &
+                            comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_src(isource),hdur_tiny(isource))
+                    endif
 
-                       !if (it == 1 .and. myrank == 0) then
-                       !  write(IMAIN,*) 'using a source of dominant frequency ',f0
-                       !  write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
-                       !  write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
-                       !endif
-
-                       if( USE_RICKER_TIME_FUNCTION ) then
-                          stf_used = factor_force_source(isource) * sourcearrays(isource,1,i,j,k) * &
-                               comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),f0)
-                       else
-                          stf_used = factor_force_source(isource) * sourcearrays(isource,1,i,j,k) * &
-                               comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_src(isource),hdur_tiny(isource))
-                       endif
-
-                       ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
-                       ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
-                       ! to add minus the source to Chi_dot_dot to get plus the source in pressure:
-
-                       ! acoustic source for pressure gets divided by kappa
-                       ! source contribution
-                       potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
+                    ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
+                    ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
+                    ! to add minus the source to Chi_dot_dot to get plus the source in pressure:
+                       
+                    ! acoustic source for pressure gets divided by kappa
+                    ! source contribution
+                    potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
                             - stf_used / kappastore(nint(xi_source(isource)), &
-                            nint(eta_source(isource)), &
-                            nint(gamma_source(isource)),ispec)
+                                                    nint(eta_source(isource)), &
+                                                    nint(gamma_source(isource)),ispec)
 
+                 else
+
+                    if( USE_RICKER_TIME_FUNCTION ) then
+                       stf = comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
                     else
+                       ! gaussian source time
+                       stf = comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
+                    endif
 
-                       if( USE_RICKER_TIME_FUNCTION ) then
-                          stf = comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
-                       else
-                          ! gaussian source time
-                          stf = comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
-                       endif
+                    ! quasi-Heaviside
+                    !stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
 
-                       ! quasi-Heaviside
-                       !stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
+                    ! source encoding
+                    stf = stf * pm1_source_encoding(isource)
 
-                       ! source encoding
-                       stf = stf * pm1_source_encoding(isource)
+                    ! distinguishes between single and double precision for reals
+                    if(CUSTOM_REAL == SIZE_REAL) then
+                       stf_used = sngl(stf)
+                    else
+                       stf_used = stf
+                    endif
 
-                       ! distinguishes between single and double precision for reals
-                       if(CUSTOM_REAL == SIZE_REAL) then
-                          stf_used = sngl(stf)
-                       else
-                          stf_used = stf
-                       endif
+                    ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
+                    ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
+                    ! to add minus the source to Chi_dot_dot to get plus the source in pressure
 
-                       ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
-                       ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
-                       ! to add minus the source to Chi_dot_dot to get plus the source in pressure
-
-                       !     add source array
-                       do k=1,NGLLZ
-                          do j=1,NGLLY
-                             do i=1,NGLLX
-                                ! adds source contribution
-                                ! note: acoustic source for pressure gets divided by kappa
-                                iglob = ibool(i,j,k,ispec)
-                                potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
+                    !     add source array
+                    do k=1,NGLLZ
+                       do j=1,NGLLY
+                          do i=1,NGLLX
+                             ! adds source contribution
+                             ! note: acoustic source for pressure gets divided by kappa
+                             iglob = ibool(i,j,k,ispec)
+                             potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
                                      - sourcearrays(isource,1,i,j,k) * stf_used / kappastore(i,j,k,ispec)
-                             enddo
                           enddo
                        enddo
+                    enddo
 
-                    endif ! USE_FORCE_POINT_SOURCE
+                 endif ! USE_FORCE_POINT_SOURCE
 
-                    stf_used_total = stf_used_total + stf_used
+                 stf_used_total = stf_used_total + stf_used
 
-                 endif ! ispec_is_acoustic
-              endif ! ispec_is_inner
-           endif ! myrank
-
-        enddo ! NSOURCES
-     endif ! GPU_MODE
+              endif ! ispec_is_acoustic
+           endif ! ispec_is_inner
+        endif ! myrank
+     enddo ! NSOURCES
   endif
 
 ! NOTE: adjoint sources and backward wavefield timing:
@@ -360,54 +322,43 @@
 
       if( it < NSTEP ) then
         ! receivers act as sources
-        if( .NOT. GPU_MODE ) then
-          irec_local = 0
-          do irec = 1,nrec
-            ! add the source (only if this proc carries the source)
-            if (myrank == islice_selected_rec(irec)) then
-              irec_local = irec_local + 1
+        irec_local = 0
+        do irec = 1,nrec
+          ! add the source (only if this proc carries the source)
+          if (myrank == islice_selected_rec(irec)) then
+            irec_local = irec_local + 1
 
-              ! adds source array
-              ispec = ispec_selected_rec(irec)
-              if( ispec_is_acoustic(ispec) ) then
+            ! adds source array
+            ispec = ispec_selected_rec(irec)
+            if( ispec_is_acoustic(ispec) ) then
 
-                ! checks if element is in phase_is_inner run
-                if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
-                  do k = 1,NGLLZ
-                    do j = 1,NGLLY
-                      do i = 1,NGLLX
-                        iglob = ibool(i,j,k,ispec)
-                        ! beware, for acoustic medium, a pressure source would be taking the negative
-                        ! and divide by Kappa of the fluid;
-                        ! this would have to be done when constructing the adjoint source.
-                        !
-                        ! note: we take the first component of the adj_sourcearrays
-                        !          the idea is to have e.g. a pressure source, where all 3 components would be the same
-                        potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
-                              + adj_sourcearrays(irec_local, &
-                                NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC), &
-                                1,i,j,k)
-                      enddo
+              ! checks if element is in phase_is_inner run
+              if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+                do k = 1,NGLLZ
+                  do j = 1,NGLLY
+                    do i = 1,NGLLX
+                      iglob = ibool(i,j,k,ispec)
+                      ! beware, for acoustic medium, a pressure source would be taking the negative
+                      ! and divide by Kappa of the fluid;
+                      ! this would have to be done when constructing the adjoint source.
+                      !
+                      ! note: we take the first component of the adj_sourcearrays
+                      !          the idea is to have e.g. a pressure source, where all 3 components would be the same
+                      potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
+                                 + adj_sourcearrays(irec_local, &
+                                                    NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC), &
+                                                    1,i,j,k)
                     enddo
                   enddo
-                endif ! phase_is_inner
-              endif
+                enddo
+              endif ! phase_is_inner
             endif
-          enddo ! nrec
-        else
-          ! on GPU
-          call add_sources_ac_sim_2_or_3_cuda(Mesh_pointer,adj_sourcearrays,phase_is_inner, &
-                                             ispec_is_inner,ispec_is_acoustic, &
-                                             ispec_selected_rec,myrank,nrec, &
-                                             NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC), &
-                                             islice_selected_rec,nadj_rec_local, &
-                                             NTSTEP_BETWEEN_READ_ADJSRC)
+          endif
+        enddo ! nrec
+    endif ! it
+  endif ! nadj_rec_local > 0
+endif
 
-        endif ! GPU_MODE
-      endif ! it
-    endif ! nadj_rec_local > 0
-  endif
-
 ! note:  b_potential() is read in after Newmark time scheme, thus
 !           b_potential(it=1) corresponds to -t0 + (NSTEP-1)*DT.
 !           thus indexing is NSTEP - it , instead of NSTEP - it - 1
@@ -415,135 +366,101 @@
 ! adjoint simulations
   if (SIMULATION_TYPE == 3 .and. nsources_local > 0) then
 
-     ! on GPU
-     if(GPU_MODE) then
-        if( NSOURCES > 0 ) then
-           do isource = 1,NSOURCES
-              ! precomputes source time function factors
-              if(USE_FORCE_POINT_SOURCE) then
-                 if( USE_RICKER_TIME_FUNCTION ) then
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur(isource))
-                 else
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_gauss(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_tiny(isource))
-                 endif
-              else
-                 if( USE_RICKER_TIME_FUNCTION ) then
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur(isource))
-                 else
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_gauss(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
-                 endif
-              endif
-           enddo
-           stf_used_total = stf_used_total + sum(stf_pre_compute(:))
-           ! only implements SIMTYPE=3
-           call compute_add_sources_ac_s3_cuda(Mesh_pointer, phase_is_inner, &
-                NSOURCES,stf_pre_compute, myrank)
-        endif
+     ! adds acoustic sources
+     do isource = 1,NSOURCES
 
-     else ! .NOT. GPU_MODE
+        !   add the source (only if this proc carries the source)
+        if(myrank == islice_selected_source(isource)) then
 
-        ! adds acoustic sources
-        do isource = 1,NSOURCES
+           ispec = ispec_selected_source(isource)
 
-           !   add the source (only if this proc carries the source)
-           if(myrank == islice_selected_source(isource)) then
+           if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
 
-              ispec = ispec_selected_source(isource)
+              if( ispec_is_acoustic(ispec) ) then
 
-              if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+                 if(USE_FORCE_POINT_SOURCE) then
 
-                 if( ispec_is_acoustic(ispec) ) then
+                    ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+                    iglob = ibool(nint(xi_source(isource)), &
+                                  nint(eta_source(isource)), &
+                                  nint(gamma_source(isource)), &
+                                  ispec)
 
-                    if(USE_FORCE_POINT_SOURCE) then
+                    f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
 
-                       ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
-                       iglob = ibool(nint(xi_source(isource)), &
-                            nint(eta_source(isource)), &
-                            nint(gamma_source(isource)), &
-                            ispec)
+                    !if (it == 1 .and. myrank == 0) then
+                    !  write(IMAIN,*) 'using a source of dominant frequency ',f0
+                    !  write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+                    !  write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+                    !endif
 
-                       f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
+                    if( USE_RICKER_TIME_FUNCTION ) then
+                       stf_used = factor_force_source(isource) * sourcearrays(isource,1,i,j,k) * &
+                            comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_src(isource),f0)
+                    else
+                       stf_used = factor_force_source(isource) * sourcearrays(isource,1,i,j,k) * &
+                            comp_source_time_function_gauss(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_tiny(isource))
+                    endif
 
-                       !if (it == 1 .and. myrank == 0) then
-                       !  write(IMAIN,*) 'using a source of dominant frequency ',f0
-                       !  write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
-                       !  write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
-                       !endif
+                    ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
+                    ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
+                    ! to add minus the source to Chi_dot_dot to get plus the source in pressure:
 
-                       if( USE_RICKER_TIME_FUNCTION ) then
-                          stf_used = factor_force_source(isource) * sourcearrays(isource,1,i,j,k) * &
-                               comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_src(isource),f0)
-                       else
-                          stf_used = factor_force_source(isource) * sourcearrays(isource,1,i,j,k) * &
-                               comp_source_time_function_gauss(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_tiny(isource))
-                       endif
+                    ! acoustic source for pressure gets divided by kappa
+                    ! source contribution
+                    b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
+                                               - stf_used / kappastore(nint(xi_source(isource)), &
+                                                                       nint(eta_source(isource)), &
+                                                                       nint(gamma_source(isource)),ispec)
 
-                       ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
-                       ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
-                       ! to add minus the source to Chi_dot_dot to get plus the source in pressure:
-
-                       ! acoustic source for pressure gets divided by kappa
-                       ! source contribution
-                       b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
-                            - stf_used / kappastore(nint(xi_source(isource)), &
-                            nint(eta_source(isource)), &
-                            nint(gamma_source(isource)),ispec)
-
+                 else
+                    if( USE_RICKER_TIME_FUNCTION ) then
+                       stf = comp_source_time_function_rickr( &
+                             dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur(isource))
                     else
+                       ! gaussian source time
+                       stf = comp_source_time_function_gauss( &
+                             dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
+                    endif
 
-                       if( USE_RICKER_TIME_FUNCTION ) then
-                          stf = comp_source_time_function_rickr( &
-                               dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur(isource))
-                       else
-                          ! gaussian source time
-                          stf = comp_source_time_function_gauss( &
-                               dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
-                       endif
+                    ! quasi-Heaviside
+                    !stf = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
 
-                       ! quasi-Heaviside
-                       !stf = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
+                    ! source encoding
+                    stf = stf * pm1_source_encoding(isource)
 
-                       ! source encoding
-                       stf = stf * pm1_source_encoding(isource)
+                    ! distinguishes between single and double precision for reals
+                    if(CUSTOM_REAL == SIZE_REAL) then
+                       stf_used = sngl(stf)
+                    else
+                       stf_used = stf
+                    endif
 
-                       ! distinguishes between single and double precision for reals
-                       if(CUSTOM_REAL == SIZE_REAL) then
-                          stf_used = sngl(stf)
-                       else
-                          stf_used = stf
-                       endif
+                    ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
+                    ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
+                    ! to add minus the source to Chi_dot_dot to get plus the source in pressure
 
-                       ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
-                       ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
-                       ! to add minus the source to Chi_dot_dot to get plus the source in pressure
-
-                       !     add source array
-                       do k=1,NGLLZ
-                          do j=1,NGLLY
-                             do i=1,NGLLX
-                                ! adds source contribution
-                                ! note: acoustic source for pressure gets divided by kappa
-                                iglob = ibool(i,j,k,ispec)
-                                b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
+                    !     add source array
+                    do k=1,NGLLZ
+                       do j=1,NGLLY
+                          do i=1,NGLLX
+                             ! adds source contribution
+                             ! note: acoustic source for pressure gets divided by kappa
+                             iglob = ibool(i,j,k,ispec)
+                             b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
                                      - sourcearrays(isource,1,i,j,k) * stf_used / kappastore(i,j,k,ispec)
-                             enddo
                           enddo
                        enddo
+                    enddo
 
-                    endif ! USE_FORCE_POINT_SOURCE
+                 endif ! USE_FORCE_POINT_SOURCE
 
-                    stf_used_total = stf_used_total + stf_used
+                 stf_used_total = stf_used_total + stf_used
 
-                 endif ! ispec_is_elastic
-              endif ! ispec_is_inner
-           endif ! myrank
-
-        enddo ! NSOURCES
-     endif ! GPU_MODE
+              endif ! ispec_is_elastic
+           endif ! ispec_is_inner
+        endif ! myrank
+     enddo ! NSOURCES
   endif
 
   ! master prints out source time function to file

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_viscoelastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_viscoelastic.f90	2013-06-08 10:25:46 UTC (rev 22192)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_add_sources_viscoelastic.f90	2013-06-08 12:12:16 UTC (rev 22193)
@@ -33,7 +33,7 @@
                         ispec_is_elastic,SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
                         nrec,islice_selected_rec,ispec_selected_rec, &
                         nadj_rec_local,adj_sourcearrays,b_accel, &
-                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY,GPU_MODE,Mesh_pointer  )
+                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
 
   use specfem_par,only: PRINT_SOURCE_TIME_FUNCTION,stf_used_total, &
                         xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,&
@@ -76,8 +76,6 @@
 
 !adjoint simulations
   integer:: SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT
-  logical:: GPU_MODE
-  integer(kind=8) :: Mesh_pointer
   integer:: nrec
   integer,dimension(nrec) :: islice_selected_rec,ispec_selected_rec
   integer:: nadj_rec_local
@@ -91,8 +89,6 @@
   double precision :: stf
   real(kind=CUSTOM_REAL),dimension(:,:,:,:,:),allocatable:: adj_sourcearray
   real(kind=CUSTOM_REAL) stf_used,stf_used_total_all,time_source
-  ! for GPU_MODE
-  double precision, dimension(NSOURCES) :: stf_pre_compute
   integer :: isource,iglob,i,j,k,ispec
   integer :: irec_local,irec, ier
 
@@ -117,116 +113,82 @@
   ! forward simulations
   if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then
 
-    if(GPU_MODE) then
-       if( NSOURCES > 0 ) then
-          do isource = 1,NSOURCES
-             ! precomputes source time function factor
-             if(USE_FORCE_POINT_SOURCE) then
-                if( USE_RICKER_TIME_FUNCTION ) then
-                   stf_pre_compute(isource) = &
-                        comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
-                else
-                   stf_pre_compute(isource) = &
-                        comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_src(isource),hdur_tiny(isource))
-                endif
-             else
-                if( USE_RICKER_TIME_FUNCTION ) then
-                   stf_pre_compute(isource) = &
-                        comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
-                else
-                   stf_pre_compute(isource) = &
-                        comp_source_time_function(dble(it-1)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
-                endif
-             endif
-          enddo
-          ! only implements SIMTYPE=1 and NOISE_TOM=0
-          ! write(*,*) "fortran dt = ", dt
-          ! change dt -> DT
-          call compute_add_sources_el_cuda(Mesh_pointer, phase_is_inner, &
-                                          NSOURCES, stf_pre_compute, myrank)
+    do isource = 1,NSOURCES
 
-       endif
+       !   add the source (only if this proc carries the source)
+       if(myrank == islice_selected_source(isource)) then
 
-    else ! .NOT. GPU_MODE
+          ispec = ispec_selected_source(isource)
 
-      do isource = 1,NSOURCES
+          if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
 
-         !   add the source (only if this proc carries the source)
-         if(myrank == islice_selected_source(isource)) then
+             if( ispec_is_elastic(ispec) ) then
 
-            ispec = ispec_selected_source(isource)
+                if(USE_FORCE_POINT_SOURCE) then
 
-            if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+                  !f0 = hdur(isource) !! using hdur as a FREQUENCY
+                  !if (it == 1 .and. myrank == 0) then
+                  !  write(IMAIN,*) 'using a source of dominant frequency ',f0
+                  !  write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+                  !  write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+                  !endif
 
-               if( ispec_is_elastic(ispec) ) then
+                  if( USE_RICKER_TIME_FUNCTION) then
+                     stf = comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
+                  else
+                     stf = comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_src(isource),hdur_tiny(isource))
+                  endif
 
-                  if(USE_FORCE_POINT_SOURCE) then
+                  ! add the inclined force source array
+                  ! distinguish between single and double precision for reals
+                  if(CUSTOM_REAL == SIZE_REAL) then
+                     stf_used = sngl(stf)
+                  else
+                     stf_used = stf
+                  endif
 
-                    !f0 = hdur(isource) !! using hdur as a FREQUENCY
-                    !if (it == 1 .and. myrank == 0) then
-                    !  write(IMAIN,*) 'using a source of dominant frequency ',f0
-                    !  write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
-                    !  write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
-                    !endif
+                  do k=1,NGLLZ
+                     do j=1,NGLLY
+                        do i=1,NGLLX
+                           iglob = ibool(i,j,k,ispec)
+                           accel(:,iglob) = accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
+                        enddo
+                     enddo
+                  enddo
 
-                    if( USE_RICKER_TIME_FUNCTION) then
-                       stf = comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
-                    else
-                       stf = comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_src(isource),hdur_tiny(isource))
-                    endif
+                else
 
-                    ! add the inclined force source array
-                    ! distinguish between single and double precision for reals
-                    if(CUSTOM_REAL == SIZE_REAL) then
-                       stf_used = sngl(stf)
-                    else
-                       stf_used = stf
-                    endif
+                  if( USE_RICKER_TIME_FUNCTION) then
+                    stf = comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
+                  else
+                    stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
+                  endif
 
-                    do k=1,NGLLZ
-                       do j=1,NGLLY
-                          do i=1,NGLLX
-                             iglob = ibool(i,j,k,ispec)
-                             accel(:,iglob) = accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
-                          enddo
-                       enddo
-                    enddo
-
+                  !     distinguish between single and double precision for reals
+                  if(CUSTOM_REAL == SIZE_REAL) then
+                    stf_used = sngl(stf)
                   else
+                    stf_used = stf
+                  endif
 
-                    if( USE_RICKER_TIME_FUNCTION) then
-                      stf = comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
-                    else
-                      stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
-                    endif
-
-                    !     distinguish between single and double precision for reals
-                    if(CUSTOM_REAL == SIZE_REAL) then
-                      stf_used = sngl(stf)
-                    else
-                      stf_used = stf
-                    endif
-
-                    !     add source array
-                    do k=1,NGLLZ
-                      do j=1,NGLLY
-                        do i=1,NGLLX
-                          iglob = ibool(i,j,k,ispec)
-                          accel(:,iglob) = accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
-                        enddo
+                  !     add source array
+                  do k=1,NGLLZ
+                    do j=1,NGLLY
+                      do i=1,NGLLX
+                        iglob = ibool(i,j,k,ispec)
+                        accel(:,iglob) = accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
                       enddo
                     enddo
+                  enddo
 
-                  endif ! USE_FORCE_POINT_SOURCE
+                endif ! USE_FORCE_POINT_SOURCE
 
-                  stf_used_total = stf_used_total + stf_used
+                stf_used_total = stf_used_total + stf_used
 
-               endif ! ispec_is_elastic
-            endif ! ispec_is_inner
-         endif ! myrank
-
-      enddo ! NSOURCES
-    endif ! GPU_MODE
+             endif ! ispec_is_elastic
+          endif ! ispec_is_inner
+       endif ! myrank
+    enddo ! NSOURCES
   endif ! forward
 
 ! NOTE: adjoint sources and backward wavefield timing:
@@ -352,51 +314,40 @@
 
 
       if( it < NSTEP ) then
+        ! receivers act as sources
+        irec_local = 0
+        do irec = 1,nrec
 
-        if(.NOT. GPU_MODE) then
+          ! add the source (only if this proc carries the source)
+          if (myrank == islice_selected_rec(irec)) then
+            irec_local = irec_local + 1
 
-          ! receivers act as sources
-          irec_local = 0
-          do irec = 1,nrec
+            ispec = ispec_selected_rec(irec)
+            if( ispec_is_elastic(ispec) ) then
 
-            ! add the source (only if this proc carries the source)
-            if (myrank == islice_selected_rec(irec)) then
-              irec_local = irec_local + 1
+              ! checks if element is in phase_is_inner run
+              if (ispec_is_inner(ispec_selected_rec(irec)) .eqv. phase_is_inner) then
 
-              ispec = ispec_selected_rec(irec)
-              if( ispec_is_elastic(ispec) ) then
+                ! adds source array
+                do k = 1,NGLLZ
+                  do j = 1,NGLLY
+                    do i = 1,NGLLX
+                      iglob = ibool(i,j,k,ispec_selected_rec(irec))
 
-                ! checks if element is in phase_is_inner run
-                if (ispec_is_inner(ispec_selected_rec(irec)) .eqv. phase_is_inner) then
-
-                  ! adds source array
-                  do k = 1,NGLLZ
-                    do j = 1,NGLLY
-                      do i = 1,NGLLX
-                        iglob = ibool(i,j,k,ispec_selected_rec(irec))
-
-                        accel(:,iglob) = accel(:,iglob)  &
-                                  + adj_sourcearrays(irec_local, &
-                                  NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC), &
-                                  :,i,j,k)
-                      enddo
+                      accel(:,iglob) = accel(:,iglob)  &
+                                + adj_sourcearrays(irec_local, &
+                                NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC), &
+                                :,i,j,k)
                     enddo
                   enddo
-                endif ! phase_is_inner
-              endif ! ispec_is_elastic
-            endif
-          enddo ! nrec
-        else ! GPU_MODE == .true.
-          call add_sources_el_sim_type_2_or_3(Mesh_pointer,adj_sourcearrays,phase_is_inner, &
-                                            ispec_is_inner,ispec_is_elastic, &
-                                            ispec_selected_rec,myrank,nrec, &
-                                            NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC), &
-                                            islice_selected_rec,nadj_rec_local, &
-                                            NTSTEP_BETWEEN_READ_ADJSRC)
-        endif ! GPU_MODE
-      endif ! it
-    endif ! nadj_rec_local
-  endif !adjoint
+                enddo
+              endif ! phase_is_inner
+            endif ! ispec_is_elastic
+          endif
+        enddo ! nrec
+    endif ! it
+  endif ! nadj_rec_local
+endif !adjoint
 
 ! note:  b_displ() is read in after Newmark time scheme, thus
 !           b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT.
@@ -405,118 +356,86 @@
 ! adjoint simulations
   if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then
 
-     if(GPU_MODE) then
-        if( NSOURCES > 0 ) then
-           do isource = 1,NSOURCES
-              ! precomputes source time function factors
-              if(USE_FORCE_POINT_SOURCE) then
-                 if( USE_RICKER_TIME_FUNCTION ) then
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur(isource))
-                 else
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_gauss(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_tiny(isource))
-                 endif
-              else
-                 if( USE_RICKER_TIME_FUNCTION ) then
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur(isource))
-                 else
-                    stf_pre_compute(isource) = &
-                         comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
-                 endif
-              endif
-           enddo
-           ! only implements SIMTYPE=3
-           call compute_add_sources_el_s3_cuda(Mesh_pointer,stf_pre_compute, &
-                NSOURCES,phase_is_inner,myrank)
+    ! backward source reconstruction
+    do isource = 1,NSOURCES
 
-        endif
+       ! add the source (only if this proc carries the source)
+       if(myrank == islice_selected_source(isource)) then
 
-    else ! .NOT. GPU_MODE
+          ispec = ispec_selected_source(isource)
 
-      ! backward source reconstruction
-      do isource = 1,NSOURCES
+          if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
 
-         ! add the source (only if this proc carries the source)
-         if(myrank == islice_selected_source(isource)) then
+             if( ispec_is_elastic(ispec) ) then
 
-            ispec = ispec_selected_source(isource)
+                if(USE_FORCE_POINT_SOURCE) then
 
-            if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+                   !f0 = hdur(isource) !! using hdur as a FREQUENCY
+                   !if (it == 1 .and. myrank == 0) then
+                   !   write(IMAIN,*) 'using a source of dominant frequency ',f0
+                   !   write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+                   !   write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+                   !endif
 
-               if( ispec_is_elastic(ispec) ) then
+                   if( USE_RICKER_TIME_FUNCTION ) then
+                      stf = comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur(isource))
+                   else
+                      stf = comp_source_time_function_gauss(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_tiny(isource))
+                   endif
 
-                  if(USE_FORCE_POINT_SOURCE) then
+                   ! add the inclined force source array
+                   ! distinguish between single and double precision for reals
+                   if(CUSTOM_REAL == SIZE_REAL) then
+                      stf_used = sngl(stf)
+                   else
+                      stf_used = stf
+                   endif
 
-                     !f0 = hdur(isource) !! using hdur as a FREQUENCY
-                     !if (it == 1 .and. myrank == 0) then
-                     !   write(IMAIN,*) 'using a source of dominant frequency ',f0
-                     !   write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
-                     !   write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
-                     !endif
+                   do k=1,NGLLZ
+                      do j=1,NGLLY
+                         do i=1,NGLLX
+                            iglob = ibool(i,j,k,ispec)
+                            b_accel(:,iglob) = b_accel(:,iglob) + sourcearrays(isource,:,i,j,k) * stf_used
+                         enddo
+                      enddo
+                   enddo
 
-                     if( USE_RICKER_TIME_FUNCTION ) then
-                        stf = comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur(isource))
-                     else
-                        stf = comp_source_time_function_gauss(dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_tiny(isource))
-                     endif
+                else
 
-                    ! add the inclined force source array
-                    ! distinguish between single and double precision for reals
-                     if(CUSTOM_REAL == SIZE_REAL) then
-                        stf_used = sngl(stf)
-                     else
-                        stf_used = stf
-                     endif
+                   ! see note above: time step corresponds now to NSTEP-it
+                   ! (also compare to it-1 for forward simulation)
+                   if( USE_RICKER_TIME_FUNCTION ) then
+                     stf = comp_source_time_function_rickr( &
+                                    dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
+                   else
+                     stf = comp_source_time_function( &
+                                    dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
+                   endif
 
-                     do k=1,NGLLZ
-                        do j=1,NGLLY
-                           do i=1,NGLLX
-                              iglob = ibool(i,j,k,ispec)
-                              b_accel(:,iglob) = b_accel(:,iglob) + sourcearrays(isource,:,i,j,k) * stf_used
-                           enddo
-                        enddo
-                     enddo
+                   ! distinguish between single and double precision for reals
+                   if(CUSTOM_REAL == SIZE_REAL) then
+                      stf_used = sngl(stf)
+                   else
+                      stf_used = stf
+                   endif
 
-                  else
+                   !  add source array
+                   do k=1,NGLLZ
+                      do j=1,NGLLY
+                         do i=1,NGLLX
+                            iglob = ibool(i,j,k,ispec_selected_source(isource))
+                            b_accel(:,iglob) = b_accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
+                         enddo
+                      enddo
+                   enddo
+                endif ! USE_FORCE_POINT_SOURCE
 
-                     ! see note above: time step corresponds now to NSTEP-it
-                     ! (also compare to it-1 for forward simulation)
-                     if( USE_RICKER_TIME_FUNCTION ) then
-                       stf = comp_source_time_function_rickr( &
-                                      dble(it-1)*DT-t0-tshift_src(isource),hdur(isource))
-                     else
-                       stf = comp_source_time_function( &
-                                      dble(NSTEP-it)*DT-t0-tshift_src(isource),hdur_gaussian(isource))
-                     endif
+                stf_used_total = stf_used_total + stf_used
 
-                     ! distinguish between single and double precision for reals
-                     if(CUSTOM_REAL == SIZE_REAL) then
-                        stf_used = sngl(stf)
-                     else
-                        stf_used = stf
-                     endif
-
-                     !  add source array
-                     do k=1,NGLLZ
-                        do j=1,NGLLY
-                           do i=1,NGLLX
-                              iglob = ibool(i,j,k,ispec_selected_source(isource))
-                              b_accel(:,iglob) = b_accel(:,iglob) + sourcearrays(isource,:,i,j,k)*stf_used
-                           enddo
-                        enddo
-                     enddo
-                  endif ! USE_FORCE_POINT_SOURCE
-
-                  stf_used_total = stf_used_total + stf_used
-
-               endif ! elastic
-            endif ! phase_inner
-         endif ! myrank
-
-      enddo ! NSOURCES
-    endif ! GPU_MODE
+             endif ! elastic
+          endif ! phase_inner
+       endif ! myrank
+    enddo ! NSOURCES
   endif ! adjoint
 
   ! master prints out source time function to file
@@ -536,27 +455,19 @@
        ! hence, instead of a moment tensor 'sourcearrays', a 'noise_sourcearray' for a point force is needed.
        ! furthermore, the CMTSOLUTION needs to be zero, i.e., no earthquakes.
        ! now this must be manually set in DATA/CMTSOLUTION, by USERS.
-       if(.NOT. GPU_MODE) then
-          call add_source_master_rec_noise(myrank,nrec, &
+        call add_source_master_rec_noise(myrank,nrec, &
                NSTEP,accel,noise_sourcearray, &
                ibool,islice_selected_rec,ispec_selected_rec, &
                it,irec_master_noise, &
                NSPEC_AB,NGLOB_AB)
-       else ! GPU_MODE == .true.
-          call add_source_master_rec_noise_cu(Mesh_pointer, myrank, it, irec_master_noise, islice_selected_rec)
-       endif
     else if ( NOISE_TOMOGRAPHY == 2 ) then
        ! second step of noise tomography, i.e., read the surface movie saved at every timestep
        ! use the movie to drive the ensemble forward wavefield
-       call noise_read_add_surface_movie(NGLLSQUARE*num_free_surface_faces, &
-                              accel, &
+       call noise_read_add_surface_movie(NGLLSQUARE*num_free_surface_faces,accel, &
                               normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
-                              ibool,noise_surface_movie, &
-                              NSTEP-it+1, &
-                              NSPEC_AB,NGLOB_AB, &
+                              ibool,noise_surface_movie,NSTEP-it+1,NSPEC_AB,NGLOB_AB, &
                               num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
-                              free_surface_jacobian2Dw,&
-                              Mesh_pointer,GPU_MODE,NOISE_TOMOGRAPHY)
+                              free_surface_jacobian2Dw)
         ! be careful, since ensemble forward sources are reversals of generating wavefield "eta"
         ! hence the "NSTEP-it+1", i.e., start reading from the last timestep
         ! note the ensemble forward sources are generally distributed on the surface of the earth
@@ -567,15 +478,11 @@
         ! use the movie to reconstruct the ensemble forward wavefield
         ! the ensemble adjoint wavefield is done as usual
         ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal
-        call noise_read_add_surface_movie(NGLLSQUARE*num_free_surface_faces, &
-                              b_accel, &
+        call noise_read_add_surface_movie(NGLLSQUARE*num_free_surface_faces,b_accel, &
                               normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
-                              ibool,noise_surface_movie, &
-                              it, &
-                              NSPEC_AB,NGLOB_AB, &
+                              ibool,noise_surface_movie,it,NSPEC_AB,NGLOB_AB, &
                               num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
-                              free_surface_jacobian2Dw,&
-                               Mesh_pointer,GPU_MODE,NOISE_TOMOGRAPHY)
+                              free_surface_jacobian2Dw)
     endif
   endif
 

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90	2013-06-08 10:25:46 UTC (rev 22192)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90	2013-06-08 12:12:16 UTC (rev 22193)
@@ -67,23 +67,17 @@
   logical:: phase_is_inner
 
   ! enforces free surface (zeroes potentials at free surface)
-  if(.NOT. GPU_MODE) then
-    ! on CPU
-    call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB,STACEY_INSTEAD_OF_FREE_SURFACE, &
+  call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB,STACEY_INSTEAD_OF_FREE_SURFACE, &
                         potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
                         ibool,free_surface_ijk,free_surface_ispec, &
                         num_free_surface_faces,ispec_is_acoustic)
 
-    ! adjoint simulations
-    if( SIMULATION_TYPE == 3 ) &
-      call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT,STACEY_INSTEAD_OF_FREE_SURFACE, &
+  ! adjoint simulations
+  if( SIMULATION_TYPE == 3 ) &
+    call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT,STACEY_INSTEAD_OF_FREE_SURFACE, &
                         b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
                         ibool,free_surface_ijk,free_surface_ispec, &
                         num_free_surface_faces,ispec_is_acoustic)
-  else
-    ! on GPU
-    call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE)
-  endif
 
   ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions
   do iphase=1,2
@@ -96,11 +90,9 @@
     endif
 
     ! acoustic pressure term
-    if(.NOT. GPU_MODE) then
-      ! on CPU
-      if(USE_DEVILLE_PRODUCTS) then
-        ! uses Deville (2002) optimizations
-        call compute_forces_acoustic_Dev(iphase,NSPEC_AB,NGLOB_AB, &
+    if(USE_DEVILLE_PRODUCTS) then
+      ! uses Deville (2002) optimizations
+      call compute_forces_acoustic_Dev(iphase,NSPEC_AB,NGLOB_AB, &
                         potential_acoustic,potential_dot_dot_acoustic, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                         hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
@@ -108,8 +100,8 @@
                         rhostore,jacobian,ibool, &
                         num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
                         phase_ispec_inner_acoustic)
-      else
-        call compute_forces_acoustic_noDev(iphase,NSPEC_AB,NGLOB_AB, &
+    else
+      call compute_forces_acoustic_noDev(iphase,NSPEC_AB,NGLOB_AB, &
                         potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                         hprime_xx,hprime_yy,hprime_zz, &
@@ -119,13 +111,13 @@
                         num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
                         phase_ispec_inner_acoustic,ELASTIC_SIMULATION,&
                         .false.,potential_dot_dot_acoustic_interface)  
-      endif
+    endif
 
-      ! adjoint simulations
-      if( SIMULATION_TYPE == 3 ) then
-        if(USE_DEVILLE_PRODUCTS) then
-          ! uses Deville (2002) optimizations
-          call compute_forces_acoustic_Dev(iphase,NSPEC_ADJOINT,NGLOB_ADJOINT, &
+    ! adjoint simulations
+    if( SIMULATION_TYPE == 3 ) then
+      if(USE_DEVILLE_PRODUCTS) then
+        ! uses Deville (2002) optimizations
+        call compute_forces_acoustic_Dev(iphase,NSPEC_ADJOINT,NGLOB_ADJOINT, &
                         b_potential_acoustic,b_potential_dot_dot_acoustic, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                         hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
@@ -133,8 +125,8 @@
                         rhostore,jacobian,ibool, &
                         num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
                         phase_ispec_inner_acoustic)
-        else
-          call compute_forces_acoustic_noDev(iphase,NSPEC_ADJOINT,NGLOB_ADJOINT, &
+      else
+        call compute_forces_acoustic_noDev(iphase,NSPEC_ADJOINT,NGLOB_ADJOINT, &
                         b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                         hprime_xx,hprime_yy,hprime_zz, &
@@ -144,14 +136,7 @@
                         num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
                         phase_ispec_inner_acoustic,ELASTIC_SIMULATION,&
                         .true.,potential_dot_dot_acoustic_interface) 
-        endif
       endif
-
-    else
-      ! on GPU
-      ! includes code for SIMULATION_TYPE==3
-      call compute_forces_acoustic_cuda(Mesh_pointer, iphase, &
-                                        nspec_outer_acoustic, nspec_inner_acoustic)
     endif
 
     ! ! Stacey absorbing boundary conditions
@@ -163,17 +148,15 @@
                          num_abs_boundary_faces,rhostore,kappastore,ispec_is_acoustic, &
                          SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it,NGLOB_ADJOINT, &
                          b_potential_dot_dot_acoustic,b_reclen_potential, &
-                         b_absorb_potential,b_num_abs_boundary_faces, &
-                         GPU_MODE,Mesh_pointer)
+                         b_absorb_potential,b_num_abs_boundary_faces)
     endif
 
     ! elastic coupling
     if(ELASTIC_SIMULATION ) then
       if( num_coupling_ac_el_faces > 0 ) then
-        if( .NOT. GPU_MODE ) then
-          if( SIMULATION_TYPE == 1 ) then
-            ! forward definition: \bfs=\frac{1}{\rho}\bfnabla\phi
-            call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
+        if( SIMULATION_TYPE == 1 ) then
+          ! forward definition: \bfs=\frac{1}{\rho}\bfnabla\phi
+          call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
                               ibool,displ,potential_dot_dot_acoustic, &
                               num_coupling_ac_el_faces, &
                               coupling_ac_el_ispec,coupling_ac_el_ijk, &
@@ -183,10 +166,10 @@
                               PML_CONDITIONS,spec_to_CPML,is_CPML,&
                               potential_dot_dot_acoustic_interface,veloc,rmemory_coupling_ac_el_displ)
 
-          else
-            ! handles adjoint runs coupling between adjoint potential and adjoint elastic wavefield
-            ! adjoint definition: \partial_t^2 \bfs^\dagger=-\frac{1}{\rho}\bfnabla\phi^\dagger
-            call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
+        else
+          ! handles adjoint runs coupling between adjoint potential and adjoint elastic wavefield
+          ! adjoint definition: \partial_t^2 \bfs^\dagger=-\frac{1}{\rho}\bfnabla\phi^\dagger
+          call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
                               ibool,-accel_adj_coupling,potential_dot_dot_acoustic, &
                               num_coupling_ac_el_faces, &
                               coupling_ac_el_ispec,coupling_ac_el_ijk, &
@@ -195,10 +178,10 @@
                               ispec_is_inner,phase_is_inner,& 
                               PML_CONDITIONS,spec_to_CPML,is_CPML,&
                               potential_dot_dot_acoustic_interface,veloc,rmemory_coupling_ac_el_displ)
-          endif
-          ! adjoint/kernel simulations
-          if( SIMULATION_TYPE == 3 ) &
-            call compute_coupling_acoustic_el(NSPEC_ADJOINT,NGLOB_ADJOINT, &
+        endif
+        ! adjoint/kernel simulations
+        if( SIMULATION_TYPE == 3 ) &
+          call compute_coupling_acoustic_el(NSPEC_ADJOINT,NGLOB_ADJOINT, &
                             ibool,b_displ,b_potential_dot_dot_acoustic, &
                             num_coupling_ac_el_faces, &
                             coupling_ac_el_ispec,coupling_ac_el_ijk, &
@@ -207,12 +190,6 @@
                             ispec_is_inner,phase_is_inner,& 
                             PML_CONDITIONS,spec_to_CPML,is_CPML,&
                             potential_dot_dot_acoustic_interface,veloc,rmemory_coupling_ac_el_displ) 
-
-        else
-          ! on GPU
-          call compute_coupling_ac_el_cuda(Mesh_pointer,phase_is_inner, &
-                                              num_coupling_ac_el_faces)
-        endif ! GPU_MODE
       endif
     endif
 
@@ -246,128 +223,62 @@
                         SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
                         nrec,islice_selected_rec,ispec_selected_rec, &
                         nadj_rec_local,adj_sourcearrays,b_potential_dot_dot_acoustic, &
-                        NTSTEP_BETWEEN_READ_ADJSRC, &
-                        GPU_MODE, Mesh_pointer)
+                        NTSTEP_BETWEEN_READ_ADJSRC)
 
     ! assemble all the contributions between slices using MPI
     if( phase_is_inner .eqv. .false. ) then
       ! sends potential_dot_dot_acoustic values to corresponding MPI interface neighbors (non-blocking)
-      if(.NOT. GPU_MODE) then
-        call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
+      call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
                         buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
                         num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
                         nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
                         my_neighbours_ext_mesh, &
                         request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
-      else
-        ! on GPU
-        call transfer_boun_pot_from_device(NGLOB_AB, Mesh_pointer, &
-                                            potential_dot_dot_acoustic, &
-                                            buffer_send_scalar_ext_mesh, &
-                                            num_interfaces_ext_mesh, &
-                                            max_nibool_interfaces_ext_mesh, &
-                                            nibool_interfaces_ext_mesh, &
-                                            ibool_interfaces_ext_mesh, &
-                                            1) ! <-- 1 == fwd accel
-        call assemble_MPI_scalar_send_cuda(NPROC, &
-                        buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
-                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,&
-                        my_neighbours_ext_mesh, &
-                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
-      endif
 
       ! adjoint simulations
       if( SIMULATION_TYPE == 3 ) then
-        if(.NOT. GPU_MODE) then
-          call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
+        call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
                         b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, &
                         num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
                         nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
                         my_neighbours_ext_mesh, &
                         b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
-        else
-          ! on GPU
-          call transfer_boun_pot_from_device(NGLOB_AB, Mesh_pointer, &
-                                                  b_potential_dot_dot_acoustic, &
-                                                  b_buffer_send_scalar_ext_mesh,&
-                                                  num_interfaces_ext_mesh, &
-                                                  max_nibool_interfaces_ext_mesh, &
-                                                  nibool_interfaces_ext_mesh, &
-                                                  ibool_interfaces_ext_mesh, &
-                                                  3) ! <-- 3 == adjoint b_accel
-
-          call assemble_MPI_scalar_send_cuda(NPROC, &
-                          b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, &
-                          num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                          nibool_interfaces_ext_mesh,&
-                          my_neighbours_ext_mesh, &
-                          b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
-
-        endif
       endif
 
     else
 
       ! waits for send/receive requests to be completed and assembles values
-      if(.NOT. GPU_MODE) then
-        call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
+      call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
                         buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
                         max_nibool_interfaces_ext_mesh, &
                         nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
                         request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
-      else
-        ! on GPU
-        call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
-                        Mesh_pointer,&
-                        buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
-                        max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
-                        1)
-      endif
 
       ! adjoint simulations
       if( SIMULATION_TYPE == 3 ) then
-        if(.NOT. GPU_MODE) then
-          call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
+        call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
                         b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
                         max_nibool_interfaces_ext_mesh, &
                         nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
                         b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
-        else
-          ! on GPU
-          call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, &
-                        Mesh_pointer, &
-                        b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
-                        max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                        b_request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
-                        3)
-        endif
       endif
     endif !phase_is_inner
 
   enddo
 
-  if(.NOT. GPU_MODE) then
-    ! divides pressure with mass matrix
-    potential_dot_dot_acoustic(:) = potential_dot_dot_acoustic(:) * rmass_acoustic(:)
+  ! divides pressure with mass matrix
+  potential_dot_dot_acoustic(:) = potential_dot_dot_acoustic(:) * rmass_acoustic(:)
     
-    if(PML_CONDITIONS)then
-      if(ELASTIC_SIMULATION ) then
-        potential_dot_dot_acoustic_interface(:) = potential_dot_dot_acoustic_interface(:) * & 
+  if(PML_CONDITIONS)then
+    if(ELASTIC_SIMULATION ) then
+      potential_dot_dot_acoustic_interface(:) = potential_dot_dot_acoustic_interface(:) * & 
                                                   rmass_acoustic_interface(:) 
-      endif
     endif
+  endif
 
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) &
+  ! adjoint simulations
+  if (SIMULATION_TYPE == 3) &
       b_potential_dot_dot_acoustic(:) = b_potential_dot_dot_acoustic(:) * rmass_acoustic(:)
-  else
-    ! on GPU
-    call kernel_3_a_acoustic_cuda(Mesh_pointer,NGLOB_AB)
-  endif
 
 ! The outer boundary condition to use for PML elements in fluid layers is Neumann for the potential
 ! because we need Dirichlet conditions for the displacement vector, which means Neumann for the potential.
@@ -422,42 +333,31 @@
 !
 ! corrector:
 !   updates the chi_dot term which requires chi_dot_dot(t+delta)
- if( .NOT. GPU_MODE ) then
-    ! corrector
-    potential_dot_acoustic(:) = potential_dot_acoustic(:) + deltatover2*potential_dot_dot_acoustic(:)
+  ! corrector
+  potential_dot_acoustic(:) = potential_dot_acoustic(:) + deltatover2*potential_dot_dot_acoustic(:)
 
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) &
+  ! adjoint simulations
+  if (SIMULATION_TYPE == 3) &
       b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) + b_deltatover2*b_potential_dot_dot_acoustic(:)
-  else
-    ! on GPU
-    call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,b_deltatover2)
-  endif
 
 ! enforces free surface (zeroes potentials at free surface)
-  if(.NOT. GPU_MODE) then
-    ! on CPU
-    call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB,STACEY_INSTEAD_OF_FREE_SURFACE, &
+  call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB,STACEY_INSTEAD_OF_FREE_SURFACE, &
                         potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
                         ibool,free_surface_ijk,free_surface_ispec, &
                         num_free_surface_faces,ispec_is_acoustic)
 
-    if( SIMULATION_TYPE /= 1 ) then
-      potential_acoustic_adj_coupling(:) = potential_acoustic(:) &
+  if( SIMULATION_TYPE /= 1 ) then
+    potential_acoustic_adj_coupling(:) = potential_acoustic(:) &
                             + deltat * potential_dot_acoustic(:) &
                             + deltatsqover2 * potential_dot_dot_acoustic(:)
-    endif
+  endif
 
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) &
-      call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT,STACEY_INSTEAD_OF_FREE_SURFACE, &
+  ! adjoint simulations
+  if (SIMULATION_TYPE == 3) &
+    call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT,STACEY_INSTEAD_OF_FREE_SURFACE, &
                         b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
                         ibool,free_surface_ijk,free_surface_ispec, &
                         num_free_surface_faces,ispec_is_acoustic)
-  else
-    ! on GPU
-    call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE)
-  endif
 
   if(PML_CONDITIONS)then
     if(SIMULATION_TYPE == 1 .and. SAVE_FORWARD)then

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90	2013-06-08 10:25:46 UTC (rev 22192)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90	2013-06-08 12:12:16 UTC (rev 22193)
@@ -54,18 +54,16 @@
 
 
 ! elastic term
-    if( .NOT. GPU_MODE ) then
-      if(USE_DEVILLE_PRODUCTS) then
-        ! uses Deville (2002) optimizations
-        call compute_forces_viscoelastic_Dev_sim1(iphase)
+    if(USE_DEVILLE_PRODUCTS) then
+      ! uses Deville (2002) optimizations
+      call compute_forces_viscoelastic_Dev_sim1(iphase)
 
-        ! adjoint simulations: backward/reconstructed wavefield
-        if( SIMULATION_TYPE == 3 ) &
-          call compute_forces_viscoelastic_Dev_sim3(iphase)
+      ! adjoint simulations: backward/reconstructed wavefield
+      if( SIMULATION_TYPE == 3 ) call compute_forces_viscoelastic_Dev_sim3(iphase)
 
-      else
-        ! no optimizations used
-        call compute_forces_viscoelastic_noDev(iphase,NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
+    else
+      ! no optimizations used
+      call compute_forces_viscoelastic_noDev(iphase,NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                         hprime_xx,hprime_yy,hprime_zz, &
                         hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
@@ -92,9 +90,9 @@
                         num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic, &
                         phase_ispec_inner_elastic,.false.)
 
-        ! adjoint simulations: backward/reconstructed wavefield
-        if( SIMULATION_TYPE == 3 ) &
-          call compute_forces_viscoelastic_noDev(iphase,NSPEC_AB,NGLOB_AB, &
+      ! adjoint simulations: backward/reconstructed wavefield
+      if( SIMULATION_TYPE == 3 ) &
+        call compute_forces_viscoelastic_noDev(iphase,NSPEC_AB,NGLOB_AB, &
                         b_displ,b_veloc,b_accel, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                         hprime_xx,hprime_yy,hprime_zz, &
@@ -122,41 +120,9 @@
                         num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic, &
                         phase_ispec_inner_elastic,.true.)
 
-      endif
+    endif
 
-    else
-      ! on GPU
-      ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
-      call compute_forces_viscoelastic_cuda(Mesh_pointer, iphase, deltat, &
-                                      nspec_outer_elastic, &
-                                      nspec_inner_elastic, &
-                                      COMPUTE_AND_STORE_STRAIN,ATTENUATION,ANISOTROPY)
 
-      if(phase_is_inner .eqv. .true.) then
-         ! while Inner elements compute "Kernel_2", we wait for MPI to
-         ! finish and transfer the boundary terms to the device
-         ! asynchronously
-
-         !daniel: todo - this avoids calling the fortran vector send from CUDA routine
-         ! wait for asynchronous copy to finish
-         call sync_copy_from_device(Mesh_pointer,iphase,buffer_send_vector_ext_mesh)
-         ! sends mpi buffers
-         call assemble_MPI_vector_send_cuda(NPROC, &
-                  buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
-                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                  nibool_interfaces_ext_mesh,&
-                  my_neighbours_ext_mesh, &
-                  request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
-
-         ! transfers mpi buffers onto GPU
-         call transfer_boundary_to_device(NPROC,Mesh_pointer,buffer_recv_vector_ext_mesh, &
-                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                  request_recv_vector_ext_mesh)
-      endif ! inner elements
-
-   endif ! GPU_MODE
-
-
 ! adds elastic absorbing boundary term to acceleration (Stacey conditions)
     if( STACEY_ABSORBING_CONDITIONS ) then
        call compute_stacey_viscoelastic(NSPEC_AB,NGLOB_AB,accel, &
@@ -168,17 +134,16 @@
                         ispec_is_elastic,SIMULATION_TYPE,SAVE_FORWARD, &
                         NSTEP,it,NGLOB_ADJOINT,b_accel, &
                         b_num_abs_boundary_faces,b_reclen_field,b_absorb_field,&
-                        GPU_MODE,Mesh_pointer,it_dsm,Veloc_dsm_boundary,Tract_dsm_boundary)
+                        it_dsm,Veloc_dsm_boundary,Tract_dsm_boundary)
     endif
 
 
 ! acoustic coupling
     if( ACOUSTIC_SIMULATION ) then
       if( num_coupling_ac_el_faces > 0 ) then
-        if( .NOT. GPU_MODE ) then
-          if( SIMULATION_TYPE == 1 ) then
-            ! forward definition: pressure=-potential_dot_dot
-            call compute_coupling_viscoelastic_ac(NSPEC_AB,NGLOB_AB, &
+         if( SIMULATION_TYPE == 1 ) then
+           ! forward definition: pressure=-potential_dot_dot
+           call compute_coupling_viscoelastic_ac(NSPEC_AB,NGLOB_AB, &
                         ibool,accel,potential_dot_dot_acoustic, &
                         num_coupling_ac_el_faces, &
                         coupling_ac_el_ispec,coupling_ac_el_ijk, &
@@ -186,10 +151,10 @@
                         coupling_ac_el_jacobian2Dw, &
                         ispec_is_inner,phase_is_inner,& 
                         PML_CONDITIONS,is_CPML,potential_dot_dot_acoustic_interface) 
-          else
-            ! handles adjoint runs coupling between adjoint potential and adjoint elastic wavefield
-            ! adoint definition: pressure^\dagger=potential^\dagger
-            call compute_coupling_viscoelastic_ac(NSPEC_AB,NGLOB_AB, &
+         else
+           ! handles adjoint runs coupling between adjoint potential and adjoint elastic wavefield
+           ! adoint definition: pressure^\dagger=potential^\dagger
+           call compute_coupling_viscoelastic_ac(NSPEC_AB,NGLOB_AB, &
                               ibool,accel,-potential_acoustic_adj_coupling, &
                               num_coupling_ac_el_faces, &
                               coupling_ac_el_ispec,coupling_ac_el_ijk, &
@@ -197,11 +162,11 @@
                               coupling_ac_el_jacobian2Dw, &
                               ispec_is_inner,phase_is_inner,& 
                               PML_CONDITIONS,is_CPML,potential_dot_dot_acoustic_interface) 
-          endif
+         endif
 
-        ! adjoint simulations
-        if( SIMULATION_TYPE == 3 ) &
-          call compute_coupling_viscoelastic_ac(NSPEC_ADJOINT,NGLOB_ADJOINT, &
+         ! adjoint simulations
+         if( SIMULATION_TYPE == 3 ) &
+           call compute_coupling_viscoelastic_ac(NSPEC_ADJOINT,NGLOB_ADJOINT, &
                         ibool,b_accel,b_potential_dot_dot_acoustic, &
                         num_coupling_ac_el_faces, &
                         coupling_ac_el_ispec,coupling_ac_el_ijk, &
@@ -210,11 +175,6 @@
                         ispec_is_inner,phase_is_inner,& 
                         PML_CONDITIONS,is_CPML,potential_dot_dot_acoustic_interface) 
 
-        else
-          ! on GPU
-          call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, &
-                                           num_coupling_ac_el_faces)
-        endif ! GPU_MODE
       endif ! num_coupling_ac_el_faces
     endif
 
@@ -250,84 +210,44 @@
                         ispec_is_elastic,SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
                         nrec,islice_selected_rec,ispec_selected_rec, &
                         nadj_rec_local,adj_sourcearrays,b_accel, &
-                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, &
-                        GPU_MODE, Mesh_pointer )
+                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
 
     ! assemble all the contributions between slices using MPI
     if( phase_is_inner .eqv. .false. ) then
        ! sends accel values to corresponding MPI interface neighbors
-       if(.NOT. GPU_MODE) then
-          call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,accel, &
+       call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,accel, &
                buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
                num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
                nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
                my_neighbours_ext_mesh, &
                request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
-       else ! GPU_MODE==1
-          ! transfers boundary region to host asynchronously. The
-          ! MPI-send is done from within compute_forces_viscoelastic_cuda,
-          ! once the inner element kernels are launched, and the
-          ! memcpy has finished. see compute_forces_viscoelastic_cuda:1655
-          call transfer_boundary_from_device_a(Mesh_pointer,nspec_outer_elastic)
-       endif ! GPU_MODE
 
        ! adjoint simulations
        if( SIMULATION_TYPE == 3 ) then
-          if(.NOT. GPU_MODE) then
-             call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_accel, &
+          call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_accel, &
                   b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
                   num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
                   nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
                   my_neighbours_ext_mesh, &
                   b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
-          else ! GPU_MODE == 1
-             call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
-                       b_buffer_send_vector_ext_mesh,&
-                       num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
-                       nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
-             call assemble_MPI_vector_send_cuda(NPROC, &
-                  b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
-                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                  nibool_interfaces_ext_mesh,&
-                  my_neighbours_ext_mesh, &
-                  b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
-          endif ! GPU
        endif !adjoint
 
     else
       ! waits for send/receive requests to be completed and assembles values
-      if(.NOT. GPU_MODE) then
-         call assemble_MPI_vector_ext_mesh_w_ordered(NPROC,NGLOB_AB,accel, &
+      call assemble_MPI_vector_ext_mesh_w_ordered(NPROC,NGLOB_AB,accel, &
                             buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
                             max_nibool_interfaces_ext_mesh, &
                             nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
                             request_send_vector_ext_mesh,request_recv_vector_ext_mesh, &
                             my_neighbours_ext_mesh,myrank)
-
-      else ! GPU_MODE == 1
-         call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
-                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                            max_nibool_interfaces_ext_mesh, &
-                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
-      endif
       ! adjoint simulations
       if( SIMULATION_TYPE == 3 ) then
-         if(.NOT. GPU_MODE) then
-            call assemble_MPI_vector_ext_mesh_w_ordered(NPROC,NGLOB_ADJOINT,b_accel, &
+         call assemble_MPI_vector_ext_mesh_w_ordered(NPROC,NGLOB_ADJOINT,b_accel, &
                              b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
                              max_nibool_interfaces_ext_mesh, &
                              nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
                              b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh, &
                              my_neighbours_ext_mesh,myrank)
-
-         else ! GPU_MODE == 1
-            call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
-                              b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                              max_nibool_interfaces_ext_mesh, &
-                              nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                              b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
-         endif
       endif !adjoint
 
     endif
@@ -341,33 +261,24 @@
   if (SIMULATION_TYPE_KIN) call bc_kinflt_set_all(accel,veloc,displ)
 
  ! multiplies with inverse of mass matrix (note: rmass has been inverted already)
- if(.NOT. GPU_MODE) then
-    accel(1,:) = accel(1,:)*rmassx(:)
-    accel(2,:) = accel(2,:)*rmassy(:)
-    accel(3,:) = accel(3,:)*rmassz(:)
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) then
-       b_accel(1,:) = b_accel(1,:)*rmassx(:)
-       b_accel(2,:) = b_accel(2,:)*rmassy(:)
-       b_accel(3,:) = b_accel(3,:)*rmassz(:)
-    endif !adjoint
- else ! GPU_MODE == 1
-    call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,b_deltatover2,APPROXIMATE_OCEAN_LOAD)
- endif
+  accel(1,:) = accel(1,:)*rmassx(:)
+  accel(2,:) = accel(2,:)*rmassy(:)
+  accel(3,:) = accel(3,:)*rmassz(:)
+  ! adjoint simulations
+  if (SIMULATION_TYPE == 3) then
+     b_accel(1,:) = b_accel(1,:)*rmassx(:)
+     b_accel(2,:) = b_accel(2,:)*rmassy(:)
+     b_accel(3,:) = b_accel(3,:)*rmassz(:)
+  endif !adjoint
 
 ! updates acceleration with ocean load term
   if(APPROXIMATE_OCEAN_LOAD) then
-    if( .NOT. GPU_MODE ) then
-      call compute_coupling_ocean(NSPEC_AB,NGLOB_AB, &
-                                  ibool,rmassx,rmassy,rmassz, &
-                                  rmass_ocean_load,accel, &
-                                  free_surface_normal,free_surface_ijk,free_surface_ispec, &
-                                  num_free_surface_faces,SIMULATION_TYPE, &
-                                  NGLOB_ADJOINT,b_accel)
-    else
-      ! on GPU
-      call compute_coupling_ocean_cuda(Mesh_pointer)
-    endif
+    call compute_coupling_ocean(NSPEC_AB,NGLOB_AB, &
+                                ibool,rmassx,rmassy,rmassz, &
+                                rmass_ocean_load,accel, &
+                                free_surface_normal,free_surface_ijk,free_surface_ispec, &
+                                num_free_surface_faces,SIMULATION_TYPE, &
+                                NGLOB_ADJOINT,b_accel)
   endif
 
   ! C-PML boundary
@@ -413,14 +324,9 @@
 !
 ! corrector:
 !   updates the velocity term which requires a(t+delta)
-! GPU_MODE: this is handled in 'kernel_3' at the same time as accel*rmass
-  if(.NOT. GPU_MODE) then
-     veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
-     ! adjoint simulations
-     if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
-  else ! GPU_MODE == 1
-    if( APPROXIMATE_OCEAN_LOAD ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,b_deltatover2)
-  endif
+  veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
+  ! adjoint simulations
+  if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
 
   if(PML_CONDITIONS)then
     if(SIMULATION_TYPE == 1 .and. SAVE_FORWARD)then
@@ -432,8 +338,6 @@
   endif
 
 end subroutine compute_forces_viscoelastic
-
-
 !
 !-------------------------------------------------------------------------------------------------
 !

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_acoustic.f90	2013-06-08 10:25:46 UTC (rev 22192)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_acoustic.f90	2013-06-08 12:12:16 UTC (rev 22193)
@@ -33,8 +33,7 @@
                             num_abs_boundary_faces,rhostore,kappastore,ispec_is_acoustic,&
                             SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it,NGLOB_ADJOINT, &
                             b_potential_dot_dot_acoustic,b_reclen_potential, &
-                            b_absorb_potential,b_num_abs_boundary_faces, &
-                            GPU_MODE,Mesh_pointer)
+                            b_absorb_potential,b_num_abs_boundary_faces)
 
   implicit none
 
@@ -68,10 +67,6 @@
   real(kind=CUSTOM_REAL),dimension(NGLLSQUARE,b_num_abs_boundary_faces):: b_absorb_potential
   logical:: SAVE_FORWARD
 
-  ! GPU_MODE variables
-  integer(kind=8) :: Mesh_pointer
-  logical :: GPU_MODE
-
 ! local parameters
   real(kind=CUSTOM_REAL) :: rhol,cpl,jacobianw,absorbl
   integer :: ispec,iglob,i,j,k,iface,igll
@@ -95,62 +90,50 @@
   endif !adjoint
 
   ! absorbs absorbing-boundary surface using Sommerfeld condition (vanishing field in the outer-space)
-  if( .NOT. GPU_MODE ) then
-    ! on CPU
-    do iface=1,num_abs_boundary_faces
+  do iface=1,num_abs_boundary_faces
 
-      ispec = abs_boundary_ispec(iface)
+    ispec = abs_boundary_ispec(iface)
 
-      if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+    if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
 
-        if( ispec_is_acoustic(ispec) ) then
+      if( ispec_is_acoustic(ispec) ) then
 
-          ! reference gll points on boundary face
-          do igll = 1,NGLLSQUARE
+        ! reference gll points on boundary face
+        do igll = 1,NGLLSQUARE
 
-            ! gets local indices for GLL point
-            i = abs_boundary_ijk(1,igll,iface)
-            j = abs_boundary_ijk(2,igll,iface)
-            k = abs_boundary_ijk(3,igll,iface)
+          ! gets local indices for GLL point
+          i = abs_boundary_ijk(1,igll,iface)
+          j = abs_boundary_ijk(2,igll,iface)
+          k = abs_boundary_ijk(3,igll,iface)
 
-            ! gets global index
-            iglob=ibool(i,j,k,ispec)
+          ! gets global index
+          iglob=ibool(i,j,k,ispec)
 
-            ! determines bulk sound speed
-            rhol = rhostore(i,j,k,ispec)
-            cpl = sqrt( kappastore(i,j,k,ispec) / rhol )
+          ! determines bulk sound speed
+          rhol = rhostore(i,j,k,ispec)
+          cpl = sqrt( kappastore(i,j,k,ispec) / rhol )
 
-            ! gets associated, weighted jacobian
-            jacobianw = abs_boundary_jacobian2Dw(igll,iface)
+          ! gets associated, weighted jacobian
+          jacobianw = abs_boundary_jacobian2Dw(igll,iface)
 
-            ! Sommerfeld condition
-            absorbl = potential_dot_acoustic(iglob) * jacobianw / cpl / rhol
-            potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
-                                                - absorbl
+          ! Sommerfeld condition
+          absorbl = potential_dot_acoustic(iglob) * jacobianw / cpl / rhol
+          potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) - absorbl
 
-            ! adjoint simulations
-            if (SIMULATION_TYPE == 3) then
-              b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
+          ! adjoint simulations
+          if(SIMULATION_TYPE == 3) then
+             b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
                                                     - b_absorb_potential(igll,iface)
+          else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+              b_absorb_potential(igll,iface) = absorbl
 
-            else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
-                b_absorb_potential(igll,iface) = absorbl
+          endif !adjoint
 
-            endif !adjoint
+         enddo
+      endif ! ispec_is_acoustic
+    endif ! ispec_is_inner
+  enddo ! num_abs_boundary_faces
 
-           enddo
-
-        endif ! ispec_is_acoustic
-      endif ! ispec_is_inner
-    enddo ! num_abs_boundary_faces
-  else
-    ! GPU_MODE == .true.
-    if( num_abs_boundary_faces > 0 ) &
-      call compute_stacey_acoustic_cuda(Mesh_pointer, phase_is_inner, &
-                                        SAVE_FORWARD,b_absorb_potential)
-
-  endif
-
   ! adjoint simulations: stores absorbed wavefield part
   if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) then
     ! writes out absorbing boundary value only when second phase is running

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_viscoelastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_viscoelastic.f90	2013-06-08 10:25:46 UTC (rev 22192)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_stacey_viscoelastic.f90	2013-06-08 12:12:16 UTC (rev 22193)
@@ -37,7 +37,7 @@
                         ispec_is_elastic,SIMULATION_TYPE,SAVE_FORWARD, &
                         NSTEP,it,NGLOB_ADJOINT,b_accel, &
                         b_num_abs_boundary_faces,b_reclen_field,b_absorb_field, &
-                        GPU_MODE,Mesh_pointer,it_dsm,Veloc_dsm_boundary,Tract_dsm_boundary)
+                        it_dsm,Veloc_dsm_boundary,Tract_dsm_boundary)
 
   implicit none
 
@@ -75,10 +75,6 @@
   real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_ADJOINT):: b_accel
   logical:: SAVE_FORWARD
 
-  ! GPU_MODE variables
-  integer(kind=8) :: Mesh_pointer
-  logical :: GPU_MODE
-
 ! local parameters
   real(kind=CUSTOM_REAL) vx,vy,vz,nx,ny,nz,tx,ty,tz,vn,jacobianw
   integer :: ispec,iglob,i,j,k,iface,igll
@@ -120,84 +116,74 @@
     endif
   endif !adjoint
 
+  ! absorbs absorbing-boundary surface using Stacey condition (Clayton & Enquist)
+  do iface=1,num_abs_boundary_faces
 
-  if(.NOT. GPU_MODE) then
+     ispec = abs_boundary_ispec(iface)
 
-     ! absorbs absorbing-boundary surface using Stacey condition (Clayton & Enquist)
-     do iface=1,num_abs_boundary_faces
+     if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
 
-        ispec = abs_boundary_ispec(iface)
+        if( ispec_is_elastic(ispec) ) then
 
-        if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+           ! reference gll points on boundary face
+           do igll = 1,NGLLSQUARE
 
-           if( ispec_is_elastic(ispec) ) then
+              ! gets local indices for GLL point
+              i = abs_boundary_ijk(1,igll,iface)
+              j = abs_boundary_ijk(2,igll,iface)
+              k = abs_boundary_ijk(3,igll,iface)
 
-              ! reference gll points on boundary face
-              do igll = 1,NGLLSQUARE
+              ! gets velocity
+              iglob=ibool(i,j,k,ispec)
+              vx=veloc(1,iglob)
+              vy=veloc(2,iglob)
+              vz=veloc(3,iglob)
+              if (OLD_TEST_TO_FIX_ONE_DAY) then
+                  vx = vx - Veloc_dsm_boundary(1,it_dsm,igll,iface)
+                  vy = vy - Veloc_dsm_boundary(2,it_dsm,igll,iface)
+                  vz = vz - Veloc_dsm_boundary(3,it_dsm,igll,iface)
+              endif
+              ! gets associated normal
+              nx = abs_boundary_normal(1,igll,iface)
+              ny = abs_boundary_normal(2,igll,iface)
+              nz = abs_boundary_normal(3,igll,iface)
 
-                 ! gets local indices for GLL point
-                 i = abs_boundary_ijk(1,igll,iface)
-                 j = abs_boundary_ijk(2,igll,iface)
-                 k = abs_boundary_ijk(3,igll,iface)
+              ! velocity component in normal direction (normal points out of element)
+              vn = vx*nx + vy*ny + vz*nz
 
-                 ! gets velocity
-                 iglob=ibool(i,j,k,ispec)
-                 vx=veloc(1,iglob)
-                 vy=veloc(2,iglob)
-                 vz=veloc(3,iglob)
-                 if (OLD_TEST_TO_FIX_ONE_DAY) then
-                     vx = vx - Veloc_dsm_boundary(1,it_dsm,igll,iface)
-                     vy = vy - Veloc_dsm_boundary(2,it_dsm,igll,iface)
-                     vz = vz - Veloc_dsm_boundary(3,it_dsm,igll,iface)
-                 endif
-                 ! gets associated normal
-                 nx = abs_boundary_normal(1,igll,iface)
-                 ny = abs_boundary_normal(2,igll,iface)
-                 nz = abs_boundary_normal(3,igll,iface)
+              ! stacey term: velocity vector component * vp * rho in normal direction + vs * rho component tangential to it
+              tx = rho_vp(i,j,k,ispec)*vn*nx + rho_vs(i,j,k,ispec)*(vx-vn*nx)
+              ty = rho_vp(i,j,k,ispec)*vn*ny + rho_vs(i,j,k,ispec)*(vy-vn*ny)
+              tz = rho_vp(i,j,k,ispec)*vn*nz + rho_vs(i,j,k,ispec)*(vz-vn*nz)
 
-                 ! velocity component in normal direction (normal points out of element)
-                 vn = vx*nx + vy*ny + vz*nz
+              if (OLD_TEST_TO_FIX_ONE_DAY) then
+                  tx = tx -Tract_dsm_boundary(1,it_dsm,igll,iface)
+                  ty = ty -Tract_dsm_boundary(2,it_dsm,igll,iface)
+                  tz = tz -Tract_dsm_boundary(3,it_dsm,igll,iface)
+              endif
 
-                 ! stacey term: velocity vector component * vp * rho in normal direction + vs * rho component tangential to it
-                 tx = rho_vp(i,j,k,ispec)*vn*nx + rho_vs(i,j,k,ispec)*(vx-vn*nx)
-                 ty = rho_vp(i,j,k,ispec)*vn*ny + rho_vs(i,j,k,ispec)*(vy-vn*ny)
-                 tz = rho_vp(i,j,k,ispec)*vn*nz + rho_vs(i,j,k,ispec)*(vz-vn*nz)
+              ! gets associated, weighted jacobian
+              jacobianw = abs_boundary_jacobian2Dw(igll,iface)
 
-                 if (OLD_TEST_TO_FIX_ONE_DAY) then
-                     tx = tx -Tract_dsm_boundary(1,it_dsm,igll,iface)
-                     ty = ty -Tract_dsm_boundary(2,it_dsm,igll,iface)
-                     tz = tz -Tract_dsm_boundary(3,it_dsm,igll,iface)
-                 endif
+              ! adds stacey term (weak form)
+              accel(1,iglob) = accel(1,iglob) - tx*jacobianw
+              accel(2,iglob) = accel(2,iglob) - ty*jacobianw
+              accel(3,iglob) = accel(3,iglob) - tz*jacobianw
 
-                 ! gets associated, weighted jacobian
-                 jacobianw = abs_boundary_jacobian2Dw(igll,iface)
+              ! adjoint simulations
+              if (SIMULATION_TYPE == 3) then
+                 b_accel(:,iglob) = b_accel(:,iglob) - b_absorb_field(:,igll,iface)
+              else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+                 b_absorb_field(1,igll,iface) = tx*jacobianw
+                 b_absorb_field(2,igll,iface) = ty*jacobianw
+                 b_absorb_field(3,igll,iface) = tz*jacobianw
+              endif !adjoint
 
-                 ! adds stacey term (weak form)
-                 accel(1,iglob) = accel(1,iglob) - tx*jacobianw
-                 accel(2,iglob) = accel(2,iglob) - ty*jacobianw
-                 accel(3,iglob) = accel(3,iglob) - tz*jacobianw
+           enddo
+        endif ! ispec_is_elastic
+     endif ! ispec_is_inner
+  enddo
 
-                 ! adjoint simulations
-                 if (SIMULATION_TYPE == 3) then
-                    b_accel(:,iglob) = b_accel(:,iglob) - b_absorb_field(:,igll,iface)
-                 else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
-                    b_absorb_field(1,igll,iface) = tx*jacobianw
-                    b_absorb_field(2,igll,iface) = ty*jacobianw
-                    b_absorb_field(3,igll,iface) = tz*jacobianw
-                 endif !adjoint
-
-              enddo
-           endif ! ispec_is_elastic
-        endif ! ispec_is_inner
-     enddo
-
-  else
-    ! GPU_MODE == .true.
-    if( num_abs_boundary_faces > 0 ) &
-      call compute_stacey_viscoelastic_cuda(Mesh_pointer,phase_is_inner, &
-                                       SAVE_FORWARD,b_absorb_field)
-  endif
-
   ! adjoint simulations: stores absorbed wavefield part
   if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) then
     ! writes out absorbing boundary value only when second phase is running

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/noise_tomography.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/noise_tomography.f90	2013-06-08 10:25:46 UTC (rev 22192)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/noise_tomography.f90	2013-06-08 12:12:16 UTC (rev 22193)
@@ -696,16 +696,11 @@
 ! read surface movie (displacement) at every time steps, injected as the source of "ensemble forward wavefield"
 ! in step 2, call noise_read_add_surface_movie(..., NSTEP-it+1 ,...)
 ! in step 3, call noise_read_add_surface_movie(..., it ,...)
-  subroutine noise_read_add_surface_movie(nmovie_points, &
-                  accel, &
+  subroutine noise_read_add_surface_movie(nmovie_points,accel, &
                   normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
-                  ibool, &
-                  noise_surface_movie, &
-                  it, &
-                  NSPEC_AB_VAL,NGLOB_AB_VAL, &
+                  ibool,noise_surface_movie,it,NSPEC_AB_VAL,NGLOB_AB_VAL, &
                   num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
-                  free_surface_jacobian2Dw, &
-                  Mesh_pointer,GPU_MODE,NOISE_TOMOGRAPHY)
+                  free_surface_jacobian2Dw)
   implicit none
   include "constants.h"
   ! input parameters
@@ -733,53 +728,41 @@
   real(kind=CUSTOM_REAL) :: eta
   real(kind=CUSTOM_REAL), dimension(NDIM,NGLLSQUARE,num_free_surface_faces) :: noise_surface_movie
 
-  ! GPU_MODE parameters
-  integer(kind=8) :: Mesh_pointer
-  logical :: GPU_MODE
-  integer :: NOISE_TOMOGRAPHY
-
   ! reads in ensemble noise sources at surface
   if( num_free_surface_faces > 0 ) then
 
     ! read surface movie
     call read_abs(2,noise_surface_movie,CUSTOM_REAL*NDIM*NGLLSQUARE*num_free_surface_faces,it)
 
-    if(GPU_MODE) then
-       call noise_read_add_surface_movie_cu(Mesh_pointer, noise_surface_movie,NOISE_TOMOGRAPHY)
-    else ! GPU_MODE==0
+    ! get coordinates of surface mesh and surface displacement
+    ipoin = 0
 
-       ! get coordinates of surface mesh and surface displacement
-       ipoin = 0
+    ! loops over surface points
+    ! puts noise distrubution and direction onto the surface points
+    do iface = 1, num_free_surface_faces
 
-       ! loops over surface points
-       ! puts noise distrubution and direction onto the surface points
-       do iface = 1, num_free_surface_faces
+       ispec = free_surface_ispec(iface)
 
-          ispec = free_surface_ispec(iface)
+       do igll = 1, NGLLSQUARE
+          i = free_surface_ijk(1,igll,iface)
+          j = free_surface_ijk(2,igll,iface)
+          k = free_surface_ijk(3,igll,iface)
 
-          do igll = 1, NGLLSQUARE
-             i = free_surface_ijk(1,igll,iface)
-             j = free_surface_ijk(2,igll,iface)
-             k = free_surface_ijk(3,igll,iface)
+          ipoin = ipoin + 1
+          iglob = ibool(i,j,k,ispec)
 
-             ipoin = ipoin + 1
-             iglob = ibool(i,j,k,ispec)
+          eta = noise_surface_movie(1,igll,iface) * normal_x_noise(ipoin) + &
+                noise_surface_movie(2,igll,iface) * normal_y_noise(ipoin) + &
+                noise_surface_movie(3,igll,iface) * normal_z_noise(ipoin)
 
-             eta = noise_surface_movie(1,igll,iface) * normal_x_noise(ipoin) + &
-                  noise_surface_movie(2,igll,iface) * normal_y_noise(ipoin) + &
-                  noise_surface_movie(3,igll,iface) * normal_z_noise(ipoin)
-
-             accel(1,iglob) = accel(1,iglob) + eta * mask_noise(ipoin) * normal_x_noise(ipoin) &
+          accel(1,iglob) = accel(1,iglob) + eta * mask_noise(ipoin) * normal_x_noise(ipoin) &
                   * free_surface_jacobian2Dw(igll,iface)
-             accel(2,iglob) = accel(2,iglob) + eta * mask_noise(ipoin) * normal_y_noise(ipoin) &
+          accel(2,iglob) = accel(2,iglob) + eta * mask_noise(ipoin) * normal_y_noise(ipoin) &
                   * free_surface_jacobian2Dw(igll,iface)
-             accel(3,iglob) = accel(3,iglob) + eta * mask_noise(ipoin) * normal_z_noise(ipoin) &
+          accel(3,iglob) = accel(3,iglob) + eta * mask_noise(ipoin) * normal_z_noise(ipoin) &
                   * free_surface_jacobian2Dw(igll,iface) ! wgllwgll_xy(i,j) * jacobian2D_top(i,j,iface)
-          enddo
-
        enddo
-    endif ! GPU_MODE
-
+    enddo
   endif
 
   end subroutine noise_read_add_surface_movie



More information about the CIG-COMMITS mailing list