[cig-commits] r19657 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER: DATA EXAMPLES/regional_Greece_small EXAMPLES/regional_Greece_small/DATA src/cuda src/specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Mon Feb 20 16:33:57 PST 2012
Author: danielpeter
Date: 2012-02-20 16:33:56 -0800 (Mon, 20 Feb 2012)
New Revision: 19657
Removed:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu
Modified:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/CMTSOLUTION
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/Par_file
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/STATIONS
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/process.sh
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
Log:
updates stacey and sources routines
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/CMTSOLUTION
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/CMTSOLUTION 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/CMTSOLUTION 2012-02-21 00:33:56 UTC (rev 19657)
@@ -1,13 +1,13 @@
-PDE 1994 6 9 0 33 16.40 -13.8300 -67.5600 637.0 6.9 6.8 NORTHERN BOLIVIA
-event name: 060994A
-time shift: 29.0000
-half duration: 20.0000
-latitude: -13.8200
-longitude: -67.2500
-depth: 647.1000
-Mrr: -7.590000e+27
-Mtt: 7.750000e+27
-Mpp: -1.600000e+26
-Mrt: -2.503000e+28
-Mrp: 4.200000e+26
-Mtp: -2.480000e+27
+PDEW 2008 1 6 5 14 23.70 37.2200 22.6900 75.0 6.1 6.2 SOUTHERN GREECE
+event name: 200801060514A
+time shift: 0.000
+half duration: 3.0000
+latitude: 36.9800
+longitude: 22.8700
+depth: 92.3900
+Mrr: 7.740000e+24
+Mtt: -1.830000e+25
+Mpp: 1.060000e+25
+Mrt: 1.290000e+25
+Mrp: -8.450000e+24
+Mtp: -4.430000e+24
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/Par_file
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/Par_file 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/Par_file 2012-02-21 00:33:56 UTC (rev 19657)
@@ -1,11 +1,11 @@
# forward or adjoint simulation
-SIMULATION_TYPE = 1
+SIMULATION_TYPE = 1
NOISE_TOMOGRAPHY = 0 # flag of noise tomography, three steps (1,2,3). If earthquake simulation, set it to 0.
SAVE_FORWARD = .false. # save last frame of forward simulation or not
# number of chunks (1,2,3 or 6)
-NCHUNKS = 1
+NCHUNKS = 1
# angular width of the first chunk (not used if full sphere with six chunks)
ANGULAR_WIDTH_XI_IN_DEGREES = 20.d0 # angular size of a chunk
@@ -16,37 +16,32 @@
# number of elements at the surface along the two sides of the first chunk
# (must be multiple of 16 and 8 * multiple of NPROC below)
-NEX_XI = 64
+NEX_XI = 64
NEX_ETA = 64
# number of MPI processors along the two sides of the first chunk
-NPROC_XI = 2
+NPROC_XI = 2
NPROC_ETA = 2
# 1D models with real structure:
-# 1D_isotropic_prem, 1D_transversely_isotropic_prem, 1D_iasp91, 1D_1066a, 1D_ak135, 1D_ref, 1D_ref_iso, 1D_jp3d,1D_sea99
+# 1D_isotropic_prem, 1D_transversely_isotropic_prem, 1D_iasp91, 1D_1066a, 1D_ak135, 1D_ref, 1D_ref_iso
#
# 1D models with only one fictitious averaged crustal layer:
# 1D_isotropic_prem_onecrust, 1D_transversely_isotropic_prem_onecrust, 1D_iasp91_onecrust, 1D_1066a_onecrust, 1D_ak135_onecrust
#
# fully 3D models:
# transversely_isotropic_prem_plus_3D_crust_2.0, 3D_anisotropic, 3D_attenuation,
-# s20rts, s40rts, s362ani, s362iso, s362wmani, s362ani_prem, s362ani_3DQ, s362iso_3DQ,
+# s20rts, s362ani, s362iso, s362wmani, s362ani_prem, s362ani_3DQ, s362iso_3DQ,
# s29ea, s29ea,sea99_jp3d1994,sea99,jp3d1994,heterogen
-#
-# 3D models with 1D crust: append "_1Dcrust" the the 3D model name
-# to take the 1D crustal model from the
-# associated reference model rather than the default 3D crustal model
-# e.g. s20rts_1Dcrust, s362ani_1Dcrust, etc.
MODEL = s362ani
# parameters describing the Earth model
-OCEANS = .true.
-ELLIPTICITY = .true.
-TOPOGRAPHY = .true.
-GRAVITY = .true.
-ROTATION = .true.
-ATTENUATION = .true.
+OCEANS = .false.
+ELLIPTICITY = .false.
+TOPOGRAPHY = .false.
+GRAVITY = .false.
+ROTATION = .false.
+ATTENUATION = .false.
# absorbing boundary conditions for a regional simulation
ABSORBING_CONDITIONS = .true.
@@ -55,12 +50,10 @@
RECORD_LENGTH_IN_MINUTES = 2.5d0
# save AVS or OpenDX movies
-#MOVIE_COARSE saves movie only at corners of elements (SURFACE OR VOLUME)
-#MOVIE_COARSE does not work with create_movie_AVS_DX
MOVIE_SURFACE = .false.
MOVIE_VOLUME = .false.
-MOVIE_COARSE = .false.
-NTSTEP_BETWEEN_FRAMES = 100
+MOVIE_COARSE = .true.
+NTSTEP_BETWEEN_FRAMES = 50
HDUR_MOVIE = 0.d0
# save movie in volume. Will save element if center of element is in prescribed volume
@@ -69,6 +62,7 @@
# start/stop: frames will be stored at MOVIE_START + i*NSTEP_BETWEEN_FRAMES, where i=(0,1,2..) and iNSTEP_BETWEEN_FRAMES <= MOVIE_STOP
# movie_volume_type: 1=strain, 2=time integral of strain, 3=\mu*time integral of strain
# type 4 saves the trace and deviatoric stress in the whole volume, 5=displacement, 6=velocity
+#MOVIE_VOLUME_COARSE saves movie only at corners of elements
MOVIE_VOLUME_TYPE = 2
MOVIE_TOP_KM = -100.0
MOVIE_BOTTOM_KM = 1000.0
@@ -80,7 +74,7 @@
MOVIE_STOP = 40000
# save mesh files to check the mesh
-SAVE_MESH_FILES = .false.
+SAVE_MESH_FILES = .true.
# restart files (number of runs can be 1, 2 or 3, choose 1 for no restart files)
NUMBER_OF_RUNS = 1
@@ -90,7 +84,7 @@
LOCAL_PATH = ./DATABASES_MPI
# interval at which we output time step info and max of norm of displacement
-NTSTEP_BETWEEN_OUTPUT_INFO = 100
+NTSTEP_BETWEEN_OUTPUT_INFO = 50
# interval in time steps for temporary writing of seismograms
NTSTEP_BETWEEN_OUTPUT_SEISMOS = 5000000
@@ -105,7 +99,7 @@
ROTATE_SEISMOGRAMS_RT = .false.
# decide if master process writes all the seismograms or if all processes do it in parallel
-WRITE_SEISMOGRAMS_BY_MASTER = .true.
+WRITE_SEISMOGRAMS_BY_MASTER = .false.
# save all seismograms in one large combined file instead of one file per seismogram
# to avoid overloading shared non-local file systems such as GPFS for instance
@@ -120,3 +114,4 @@
# set to true to use GPUs
GPU_MODE = .false.
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/STATIONS
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/STATIONS 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/STATIONS 2012-02-21 00:33:56 UTC (rev 19657)
@@ -1,129 +1,6 @@
-AAK II 42.6390 74.4940 1645.0 30.0
-ABKT II 37.9304 58.1189 678.0 7.0
-ABPO II -19.0180 47.2290 1528.0 5.3
-ALE II 82.5033 -62.3500 60.0 0.0
-ARU II 56.4302 58.5625 250.0 0.0
-ASCN II -7.9327 -14.3601 173.0 100.0
-BFO II 48.3319 8.3311 589.0 0.0
-BORG II 64.7474 -21.3268 110.0 95.0
-BRVK II 53.0581 70.2828 330.0 15.0
-CMLA II 37.7637 -25.5243 429.0 7.0
-COCO II -12.1901 96.8349 1.0 70.0
-DGAR II -7.4121 72.4525 1.0 2.0
-EFI II -51.6753 -58.0637 110.0 80.0
-ERM II 42.0150 143.1572 40.0 0.0
-ESK II 55.3167 -3.2050 242.0 0.0
-FFC II 54.7250 -101.9783 338.0 0.0
-GAR II 39.0000 70.3167 1300.0 0.0
-HOPE II -54.2836 -36.4879 20.0 0.0
-JTS II 10.2908 -84.9525 340.0 0.0
-KAPI II -5.0142 119.7517 300.0 100.0
-KDAK II 57.7828 -152.5835 152.0 5.5
-KIV II 43.9562 42.6888 1210.0 0.0
-KURK II 50.7154 78.6202 184.0 25.0
-KWAJ II 8.8019 167.6130 0.0 0.0
-LVZ II 67.8979 34.6514 630.0 200.0
-MBAR II -0.6019 30.7382 1390.0 100.0
-MSEY II -4.6737 55.4792 475.0 91.0
-MSVF II -17.7448 178.0528 801.1 100.0
-NIL II 33.6506 73.2686 629.0 68.0
-NNA II -11.9875 -76.8422 575.0 40.0
-NRIL II 69.5049 88.4414 92.0 506.0
-NVS II 54.8404 83.2346 150.0 0.0
-OBN II 55.1146 36.5674 160.0 30.0
-PALK II 7.2728 80.7022 460.0 90.0
-RAYN II 23.5225 45.5032 631.0 2.0
-RPN II -27.1267 -109.3344 110.0 0.0
-SACV II 14.9702 -23.6085 387.0 97.0
-SHEL II -15.9588 -5.7457 537.0 60.0
-SUR II -32.3797 20.8117 1770.0 0.0
-TAU II -42.9099 147.3204 132.0 0.0
-TLY II 51.6807 103.6438 579.0 20.0
-UOSS II 24.9453 56.2042 284.4 0.0
-WRAB II -19.9336 134.3600 366.0 100.0
-XPFO II 33.6107 -116.4555 1280.0 0.0
-AAE IU 9.0292 38.7656 2442.0 0.0
-ADK IU 51.8823 -176.6842 130.0 0.0
-AFI IU -13.9093 -171.7773 705.0 1.0
-ANMO IU 34.9459 -106.4572 1720.0 100.0
-ANTO IU 39.8680 32.7934 895.0 195.0
-BBSR IU 32.3713 -64.6963 -1.3 31.4
-BILL IU 68.0653 166.4531 320.0 0.0
-BOCO IU 4.5869 -74.0432 3137.0 23.0
-CASY IU -66.2792 110.5354 5.0 5.0
-CCM IU 38.0557 -91.2446 171.0 51.0
-CHTO IU 18.8141 98.9443 420.0 0.0
-COLA IU 64.8736 -147.8616 200.0 0.0
-COR IU 44.5855 -123.3046 110.0 0.0
-CTAO IU -20.0882 146.2545 320.0 37.0
-DAV IU 7.0697 125.5791 149.0 1.0
-DWPF IU 28.1103 -81.4327 -132.0 162.0
-FUNA IU -8.5259 179.1966 19.0 1.0
-FURI IU 8.8952 38.6798 2565.0 5.0
-GNI IU 40.1480 44.7410 1509.0 100.0
GRFO IU 49.6909 11.2203 384.0 116.0
-GUMO IU 13.5893 144.8684 61.0 109.0
-HKT IU 29.9618 -95.8384 -413.0 450.0
-HNR IU -9.4387 159.9475 0.0 100.0
-HRV IU 42.5064 -71.5583 200.0 0.0
-INCN IU 37.4776 126.6239 79.0 1.0
-JOHN IU 16.7329 -169.5292 -37.0 39.0
-KBS IU 78.9154 11.9385 90.0 3.0
-KEV IU 69.7565 27.0035 85.0 15.0
-KIEV IU 50.7012 29.2242 140.0 40.0
-KIP IU 21.4233 -158.0150 37.0 33.0
-KMBO IU -1.1271 37.2525 1930.0 20.0
-KNTN IU -2.7744 -171.7186 18.0 2.0
-KONO IU 59.6491 9.5982 216.0 340.0
-KOWA IU 14.4967 -4.0140 316.0 5.0
-LCO IU -29.0110 -70.7004 2300.0 0.0
-LSZ IU -15.2779 28.1882 1200.0 0.0
-LVC IU -22.6127 -68.9111 2930.0 30.0
-MA2 IU 59.5756 150.7700 337.0 2.0
-MAJO IU 36.5457 138.2041 405.0 0.0
-MAKZ IU 46.8080 81.9770 590.0 10.0
-MBWA IU -21.1590 119.7313 181.0 9.0
-MIDW IU 28.2155 -177.3697 17.8 1.0
-MSKU IU -1.6557 13.6116 287.0 25.0
-NAI IU -1.2739 36.8037 1692.0 0.0
-NWAO IU -32.9277 117.2390 370.9 9.1
-OTAV IU 0.2376 -78.4508 3495.0 15.0
-PAB IU 39.5446 -4.3499 950.0 0.0
-PAYG IU -0.6742 -90.2861 170.0 100.0
-PET IU 53.0233 158.6499 105.0 5.0
-PMG IU -9.4047 147.1597 90.0 0.0
-PMSA IU -64.7744 -64.0489 40.0 0.0
-POHA IU 19.7573 -155.5326 1910.0 80.0
-PTCN IU -25.0713 -130.0953 218.0 2.0
-PTGA IU -0.7308 -59.9666 141.0 96.0
-QSPA IU -89.9289 144.4382 2847.0 3.0
-RAIO IU 46.0403 -122.8851 11.0 0.0
-RAO IU -29.2450 -177.9290 59.5 0.5
-RAR IU -21.2125 -159.7733 -72.0 100.0
-RCBR IU -5.8274 -35.9014 291.0 109.0
-RSSD IU 44.1212 -104.0359 2022.7 67.3
-SAML IU -8.9489 -63.1831 120.0 0.0
-SBA IU -77.8492 166.7572 48.0 2.0
-SDV IU 8.8839 -70.6340 1588.0 32.0
-SFJD IU 66.9961 -50.6208 329.0 1.0
-SJG IU 18.1091 -66.1500 420.0 0.0
-SLBS IU 23.6858 -109.9443 825.0 0.0
-SNZO IU -41.3087 174.7043 20.0 100.0
-SSPA IU 40.6358 -77.8876 170.0 100.0
-TARA IU 1.3549 172.9229 19.0 1.0
-TATO IU 24.9735 121.4971 77.1 82.9
-TBT IU 28.6794 -17.9145 180.0 40.0
-TEIG IU 20.2263 -88.2763 15.0 25.0
-TIXI IU 71.6341 128.8667 40.0 0.0
-TOL IU 39.8814 -4.0485 480.0 0.0
-TRIS IU -37.0681 -12.3152 58.0 2.0
-TRQA IU -38.0568 -61.9787 439.0 101.0
-TSUM IU -19.2022 17.5838 1260.0 0.0
-TUC IU 32.3098 -110.7847 909.0 1.0
-ULN IU 47.8651 107.0532 1610.0 0.0
-WAKE IU 19.2834 166.6520 19.0 1.0
-WCI IU 38.2289 -86.2939 78.0 132.0
-WVT IU 36.1297 -87.8300 170.0 0.0
-XMAS IU 2.0448 -157.4457 19.0 1.0
-YAK IU 62.0310 129.6805 110.0 14.0
-YSS IU 46.9587 142.7604 148.0 2.0
+BEKI YL 41.31500 34.26300 1415.0 0.0
+BGIO SR 31.72200 35.08780 752.0 100.0
+LIT HT 40.10080 22.49000 480.0 0.0
+ZKR GE 35.11470 26.21700 270.0 0.0
+S001 XS 37.28300 21.71800 156.0 0.0
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file 2012-02-21 00:33:56 UTC (rev 19657)
@@ -33,15 +33,15 @@
# transversely_isotropic_prem_plus_3D_crust_2.0, 3D_anisotropic, 3D_attenuation,
# s20rts, s362ani, s362iso, s362wmani, s362ani_prem, s362ani_3DQ, s362iso_3DQ,
# s29ea, s29ea,sea99_jp3d1994,sea99,jp3d1994,heterogen
-MODEL = 1D_transversely_isotropic_prem
+MODEL = s362ani
# parameters describing the Earth model
-OCEANS = .true.
-ELLIPTICITY = .true.
-TOPOGRAPHY = .true.
-GRAVITY = .true.
-ROTATION = .true.
-ATTENUATION = .true.
+OCEANS = .false.
+ELLIPTICITY = .false.
+TOPOGRAPHY = .false.
+GRAVITY = .false.
+ROTATION = .false.
+ATTENUATION = .false.
# absorbing boundary conditions for a regional simulation
ABSORBING_CONDITIONS = .true.
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/process.sh
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/process.sh 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/process.sh 2012-02-21 00:33:56 UTC (rev 19657)
@@ -32,15 +32,15 @@
# using default configuration
cd ../../
# configures package with ifort compiler
-./configure F90=ifort MPIF90=/usr/local/openmpi-ifort/bin/mpif90 FLAGS_CHECK="-O3 -assume byterecl" FLAGS_NO_CHECK="-O3 -assume byterecl" > tmp.log
+#./configure F90=ifort MPIF90=/usr/local/openmpi-ifort/bin/mpif90 FLAGS_CHECK="-O3 -assume byterecl" FLAGS_NO_CHECK="-O3 -assume byterecl" > tmp.log
# configures package with gfortran compiler
#./configure F90=gfortran MPIF90=mpif90 FLAGS_CHECK="-O3" FLAGS_NO_CHECK="-O3"
# compiles for a forward simulation
cp $currentdir/DATA/Par_file DATA/Par_file
make clean
-make >& $currentdir/tmp_make_output.log
-make xcombine_vol_data >> $currentdir/tmp_make_output.log
+make #>& $currentdir/tmp_make_output.log
+make xcombine_vol_data #>> $currentdir/tmp_make_output.log
# backup of constants setup
cp setup/* $currentdir/OUTPUT_FILES/
Deleted: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu 2012-02-21 00:33:56 UTC (rev 19657)
@@ -1,369 +0,0 @@
-/*
- !=====================================================================
- !
- ! S p e c f e m 3 D V e r s i o n 2 . 0
- ! ---------------------------------------
- !
- ! Main authors: Dimitri Komatitsch and Jeroen Tromp
- ! Princeton University, USA and University of Pau / CNRS / INRIA
- ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
- ! April 2011
- !
- ! This program is free software; you can redistribute it and/or modify
- ! it under the terms of the GNU General Public License as published by
- ! the Free Software Foundation; either version 2 of the License, or
- ! (at your option) any later version.
- !
- ! This program is distributed in the hope that it will be useful,
- ! but WITHOUT ANY WARRANTY; without even the implied warranty of
- ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ! GNU General Public License for more details.
- !
- ! You should have received a copy of the GNU General Public License along
- ! with this program; if not, write to the Free Software Foundation, Inc.,
- ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- !
- !=====================================================================
- */
-
-#include <stdio.h>
-#include <cuda.h>
-#include <cublas.h>
-
-#include <sys/types.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include "config.h"
-#include "mesh_constants_cuda.h"
-
-
-/* ----------------------------------------------------------------------------------------------- */
-
-// acoustic sources
-
-/* ----------------------------------------------------------------------------------------------- */
-
-__global__ void compute_add_sources_acoustic_kernel(realw* potential_dot_dot_acoustic,
- int* ibool,
- int* ispec_is_inner,
- int phase_is_inner,
- realw* sourcearrays,
- double* stf_pre_compute,
- int myrank,
- int* islice_selected_source,
- int* ispec_selected_source,
- int* ispec_is_acoustic,
- realw* kappastore,
- int NSOURCES) {
- int i = threadIdx.x;
- int j = threadIdx.y;
- int k = threadIdx.z;
-
- int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx
-
- int ispec;
- int iglob;
- realw stf;
- realw kappal;
-
- if( isource < NSOURCES ){
-
- if(myrank == islice_selected_source[isource]) {
-
- ispec = ispec_selected_source[isource]-1;
-
- if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] ) {
-
- stf = (realw) stf_pre_compute[isource];
- iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
- kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
-
- atomicAdd(&potential_dot_dot_acoustic[iglob],
- -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal);
-
- // potential_dot_dot_acoustic[iglob] +=
- // -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal;
- }
- }
- }
-}
-
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
-void FC_FUNC_(compute_add_sources_ac_cuda,
- COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* NSOURCESf,
- int* SIMULATION_TYPEf,
- double* h_stf_pre_compute,
- int* myrankf) {
-
-TRACE("compute_add_sources_ac_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- // check if anything to do
- if( mp->nsources_local == 0 ) return;
-
- int phase_is_inner = *phase_is_innerf;
- int NSOURCES = *NSOURCESf;
- int myrank = *myrankf;
-
- int num_blocks_x = NSOURCES;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = (int) ceil(num_blocks_x*0.5f);
- num_blocks_y = num_blocks_y*2;
- }
-
- // copies pre-computed source time factors onto GPU
- print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
- NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(5,5,5);
-
- compute_add_sources_acoustic_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
- mp->d_ibool,
- mp->d_ispec_is_inner,
- phase_is_inner,
- mp->d_sourcearrays,
- mp->d_stf_pre_compute,
- myrank,
- mp->d_islice_selected_source,
- mp->d_ispec_selected_source,
- mp->d_ispec_is_acoustic,
- mp->d_kappastore,
- NSOURCES);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_add_sources_ac_cuda");
-#endif
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
-void FC_FUNC_(compute_add_sources_ac_s3_cuda,
- COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* NSOURCESf,
- int* SIMULATION_TYPEf,
- double* h_stf_pre_compute,
- int* myrankf) {
-
-TRACE("compute_add_sources_ac_s3_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- // check if anything to do
- if( mp->nsources_local == 0 ) return;
-
- int phase_is_inner = *phase_is_innerf;
- int NSOURCES = *NSOURCESf;
- int myrank = *myrankf;
-
- int num_blocks_x = NSOURCES;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = (int) ceil(num_blocks_x*0.5f);
- num_blocks_y = num_blocks_y*2;
- }
-
- // copies source time factors onto GPU
- print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
- NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(5,5,5);
-
- compute_add_sources_acoustic_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
- mp->d_ibool,
- mp->d_ispec_is_inner,
- phase_is_inner,
- mp->d_sourcearrays,
- mp->d_stf_pre_compute,
- myrank,
- mp->d_islice_selected_source,
- mp->d_ispec_selected_source,
- mp->d_ispec_is_acoustic,
- mp->d_kappastore,
- NSOURCES);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_add_sources_ac_s3_cuda");
-#endif
-}
-
-
-/* ----------------------------------------------------------------------------------------------- */
-
-// acoustic adjoint sources
-
-/* ----------------------------------------------------------------------------------------------- */
-
-__global__ void add_sources_ac_SIM_TYPE_2_OR_3_kernel(realw* potential_dot_dot_acoustic,
- int nrec,
- realw* adj_sourcearrays,
- int* ibool,
- int* ispec_is_inner,
- int* ispec_is_acoustic,
- int* ispec_selected_rec,
- int phase_is_inner,
- int* pre_computed_irec,
- int nadj_rec_local,
- realw* kappastore) {
-
- int irec_local = blockIdx.x + gridDim.x*blockIdx.y;
-
- // because of grid shape, irec_local can be too big
- if(irec_local < nadj_rec_local) {
-
- int irec = pre_computed_irec[irec_local];
-
- int ispec = ispec_selected_rec[irec]-1;
- if( ispec_is_acoustic[ispec] ){
-
- // checks if element is in phase_is_inner run
- if(ispec_is_inner[ispec] == phase_is_inner) {
- int i = threadIdx.x;
- int j = threadIdx.y;
- int k = threadIdx.z;
-
- int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-
- //kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
-
- //potential_dot_dot_acoustic[iglob] += adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
- // pre_computed_irec_local_index[irec],
- // pre_computed_index,
- // 0,
- // i,j,k)]/kappal;
-
- // beware, for acoustic medium, a pressure source would be taking the negative
- // and divide by Kappa of the fluid;
- // this would have to be done when constructing the adjoint source.
- //
- // note: we take the first component of the adj_sourcearrays
- // the idea is to have e.g. a pressure source, where all 3 components would be the same
- realw stf = adj_sourcearrays[INDEX5(5,5,5,3,i,j,k,0,irec_local)]; // / kappal
-
- atomicAdd(&potential_dot_dot_acoustic[iglob],stf);
-
- //+adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
- // pre_computed_irec_local_index[irec],pre_computed_index-1,
- // 0,i,j,k)] // / kappal
- // );
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-
-extern "C"
-void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda,
- ADD_SOURCES_AC_SIM_2_OR_3_CUDA)(long* Mesh_pointer,
- realw* h_adj_sourcearrays,
- int* phase_is_inner,
- int* h_ispec_is_inner,
- int* h_ispec_is_acoustic,
- int* h_ispec_selected_rec,
- int* myrank,
- int* nrec,
- int* time_index,
- int* h_islice_selected_rec,
- int* nadj_rec_local,
- int* NTSTEP_BETWEEN_READ_ADJSRC) {
-
-TRACE("add_sources_ac_sim_2_or_3_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-
- // checks
- if( *nadj_rec_local != mp->nadj_rec_local) exit_on_cuda_error("add_sources_ac_sim_type_2_or_3: nadj_rec_local not equal\n");
-
- // make sure grid dimension is less than 65535 in x dimension
- int num_blocks_x = mp->nadj_rec_local;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = (int) ceil(num_blocks_x*0.5f);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y,1);
- dim3 threads(5,5,5);
-
- // build slice of adj_sourcearrays because full array is *very* large.
- // note: this extracts array values for local adjoint sources at given time step "time_index"
- // from large adj_sourcearrays array into h_adj_sourcearrays_slice
- int ispec,i,j,k;
- int irec_local = 0;
- for(int irec = 0; irec < *nrec; irec++) {
- if(*myrank == h_islice_selected_rec[irec]) {
- irec_local++;
-
- // takes only acoustic sources
- ispec = h_ispec_selected_rec[irec]-1;
- if( h_ispec_is_acoustic[ispec] ){
-
- if( h_ispec_is_inner[ispec] == *phase_is_inner) {
- for(k=0;k<5;k++) {
- for(j=0;j<5;j++) {
- for(i=0;i<5;i++) {
- mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,0,irec_local-1)]
- = h_adj_sourcearrays[INDEX6(mp->nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,(*time_index)-1,
- 0,i,j,k)];
-
- mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,1,irec_local-1)]
- = h_adj_sourcearrays[INDEX6(mp->nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,(*time_index)-1,
- 1,i,j,k)];
-
- mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,2,irec_local-1)]
- = h_adj_sourcearrays[INDEX6(mp->nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,(*time_index)-1,
- 2,i,j,k)];
- }
- }
- }
- } // phase_is_inner
- } // h_ispec_is_acoustic
- }
- }
- // check all local sources were added
- if( irec_local != mp->nadj_rec_local) exit_on_error("irec_local not equal to nadj_rec_local\n");
-
- // copies extracted array values onto GPU
- print_CUDA_error_if_any(cudaMemcpy(mp->d_adj_sourcearrays, mp->h_adj_sourcearrays_slice,
- (mp->nadj_rec_local)*3*NGLL3*sizeof(realw),cudaMemcpyHostToDevice),99099);
-
- // launches cuda kernel for acoustic adjoint sources
- add_sources_ac_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
- *nrec,
- mp->d_adj_sourcearrays,
- mp->d_ibool,
- mp->d_ispec_is_inner,
- mp->d_ispec_is_acoustic,
- mp->d_ispec_selected_rec,
- *phase_is_inner,
- mp->d_pre_computed_irec,
- mp->nadj_rec_local,
- mp->d_kappastore);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("add_sources_acoustic_SIM_TYPE_2_OR_3_kernel");
-#endif
-}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu 2012-02-21 00:33:56 UTC (rev 19657)
@@ -47,23 +47,20 @@
__global__ void compute_add_sources_kernel(realw* accel,
int* ibool,
- int* ispec_is_inner,
- int phase_is_inner,
realw* sourcearrays,
double* stf_pre_compute,
int myrank,
int* islice_selected_source,
int* ispec_selected_source,
- int* ispec_is_elastic,
int NSOURCES) {
+ int ispec,iglob;
+ realw stf;
+
int i = threadIdx.x;
int j = threadIdx.y;
int k = threadIdx.z;
int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx
- int ispec;
- int iglob;
- realw stf;
if(isource < NSOURCES) { // when NSOURCES > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
@@ -71,21 +68,19 @@
ispec = ispec_selected_source[isource]-1;
- if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] ) {
+ stf = (realw) stf_pre_compute[isource];
+ iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
- stf = (realw) stf_pre_compute[isource];
- iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-
- atomicAdd(&accel[iglob*3],
- sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf);
- atomicAdd(&accel[iglob*3+1],
- sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 1, i,j,k)]*stf);
- atomicAdd(&accel[iglob*3+2],
- sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)]*stf);
- }
+ // note: for global version, sourcearrays has dimensions
+ // sourcearrays(NDIM,NGLLX,NGLLY,NGLLZ,NSOURCES)
+ atomicAdd(&accel[iglob*3],
+ sourcearrays[INDEX5(3,5,5,5, 0,i,j,k,isource)]*stf);
+ atomicAdd(&accel[iglob*3+1],
+ sourcearrays[INDEX5(3,5,5,5, 1,i,j,k,isource)]*stf);
+ atomicAdd(&accel[iglob*3+2],
+ sourcearrays[INDEX5(3,5,5,5, 2,i,j,k,isource)]*stf);
}
}
-
}
@@ -94,21 +89,17 @@
extern "C"
void FC_FUNC_(compute_add_sources_el_cuda,
COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* NSOURCESf,
- double* h_stf_pre_compute,
- int* myrankf) {
+ int* NSOURCESf,
+ double* h_stf_pre_compute) {
-TRACE("compute_add_sources_el_cuda");
+ TRACE("compute_add_sources_el_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- // check if anything to do
+ // checks if anything to do
if( mp->nsources_local == 0 ) return;
- int phase_is_inner = *phase_is_innerf;
int NSOURCES = *NSOURCESf;
- int myrank = *myrankf;
int num_blocks_x = NSOURCES;
int num_blocks_y = 1;
@@ -116,24 +107,21 @@
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(5,5,5);
- //double* d_stf_pre_compute;
+ // copies source time function buffer values to GPU
print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(5,5,5);
-
- compute_add_sources_kernel<<<grid,threads>>>(mp->d_accel,
- mp->d_ibool,
- mp->d_ispec_is_inner,
- phase_is_inner,
+ // adds source contributions
+ compute_add_sources_kernel<<<grid,threads>>>(mp->d_accel_crust_mantle,
+ mp->d_ibool_crust_mantle,
mp->d_sourcearrays,
mp->d_stf_pre_compute,
- myrank,
+ mp->myrank,
mp->d_islice_selected_source,
mp->d_ispec_selected_source,
- mp->d_ispec_is_elastic,
NSOURCES);
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -145,42 +133,38 @@
extern "C"
void FC_FUNC_(compute_add_sources_el_s3_cuda,
- COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
- double* h_stf_pre_compute,
+ COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer_f,
int* NSOURCESf,
- int* phase_is_inner,
- int* myrank) {
+ double* h_stf_pre_compute) {
TRACE("compute_add_sources_el_s3_cuda");
- // EPIK_TRACER("compute_add_sources_el_s3_cuda");
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ // checks if anything to do
+ if( mp->nsources_local == 0 ) return;
+
int NSOURCES = *NSOURCESf;
- print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
- NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_add_sources_el_s3_cuda");
-#endif
-
int num_blocks_x = NSOURCES;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
-
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(5,5,5);
- compute_add_sources_kernel<<<grid,threads>>>(mp->d_b_accel,mp->d_ibool,
- mp->d_ispec_is_inner, *phase_is_inner,
+ // copies source time function buffer values to GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+ NSOURCES*sizeof(double),cudaMemcpyHostToDevice),19);
+
+ compute_add_sources_kernel<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
+ mp->d_ibool_crust_mantle,
mp->d_sourcearrays,
mp->d_stf_pre_compute,
- *myrank,
- mp->d_islice_selected_source,mp->d_ispec_selected_source,
- mp->d_ispec_is_elastic,
+ mp->myrank,
+ mp->d_islice_selected_source,
+ mp->d_ispec_selected_source,
NSOURCES);
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -219,7 +203,6 @@
extern "C"
void FC_FUNC_(add_source_master_rec_noise_cu,
ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
- int* myrank_f,
int* it_f,
int* irec_master_noise_f,
int* islice_selected_rec) {
@@ -230,12 +213,11 @@
int it = *it_f-1; // -1 for Fortran -> C indexing differences
int irec_master_noise = *irec_master_noise_f;
- int myrank = *myrank_f;
dim3 grid(1,1,1);
dim3 threads(NGLL3,1,1);
- if(myrank == islice_selected_rec[irec_master_noise-1]) {
+ if(mp->myrank == islice_selected_rec[irec_master_noise-1]) {
add_source_master_rec_noise_cuda_kernel<<<grid,threads>>>(mp->d_ibool,
mp->d_ispec_selected_rec,
irec_master_noise,
@@ -259,47 +241,35 @@
int nrec,
realw* adj_sourcearrays,
int* ibool,
- int* ispec_is_inner,
- int* ispec_is_elastic,
int* ispec_selected_rec,
- int phase_is_inner,
int* pre_computed_irec,
int nadj_rec_local) {
+ int ispec,iglob;
+ int irec,i,j,k;
+
int irec_local = blockIdx.x + gridDim.x*blockIdx.y;
if(irec_local < nadj_rec_local) { // when nrec > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
- int irec = pre_computed_irec[irec_local];
+ irec = pre_computed_irec[irec_local];
+ ispec = ispec_selected_rec[irec]-1;
- int ispec = ispec_selected_rec[irec]-1;
- if( ispec_is_elastic[ispec] ){
+ i = threadIdx.x;
+ j = threadIdx.y;
+ k = threadIdx.z;
+ iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
- if(ispec_is_inner[ispec] == phase_is_inner) {
- int i = threadIdx.x;
- int j = threadIdx.y;
- int k = threadIdx.z;
- int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+ // atomic operations are absolutely necessary for correctness!
+ atomicAdd(&accel[3*iglob],adj_sourcearrays[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
+ 0,i,j,k,irec_local)]);
- // atomic operations are absolutely necessary for correctness!
- atomicAdd(&accel[3*iglob],adj_sourcearrays[INDEX5(5,5,5,3,
- i,j,k,
- 0,
- irec_local)]);
+ atomicAdd(&accel[1+3*iglob], adj_sourcearrays[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
+ 1,i,j,k,irec_local)]);
- atomicAdd(&accel[1+3*iglob], adj_sourcearrays[INDEX5(5,5,5,3,
- i,j,k,
- 1,
- irec_local)]);
-
- atomicAdd(&accel[2+3*iglob],adj_sourcearrays[INDEX5(5,5,5,3,
- i,j,k,
- 2,
- irec_local)]);
- }
- } // ispec_is_elastic
+ atomicAdd(&accel[2+3*iglob],adj_sourcearrays[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
+ 2,i,j,k,irec_local)]);
}
-
}
/* ----------------------------------------------------------------------------------------------- */
@@ -307,24 +277,18 @@
extern "C"
void FC_FUNC_(add_sources_el_sim_type_2_or_3,
ADD_SOURCES_EL_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
- realw* h_adj_sourcearrays,
- int* phase_is_inner,
- int* h_ispec_is_inner,
- int* h_ispec_is_elastic,
- int* h_ispec_selected_rec,
- int* myrank,
- int* nrec,
- int* time_index,
- int* h_islice_selected_rec,
- int* nadj_rec_local,
- int* NTSTEP_BETWEEN_READ_ADJSRC) {
+ int* nrec,
+ realw* h_adj_sourcearrays,
+ int* h_islice_selected_rec,
+ int* h_ispec_selected_rec,
+ int* time_index) {
-TRACE("add_sources_el_sim_type_2_or_3");
+ TRACE("add_sources_el_sim_type_2_or_3");
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- // checks
- if( *nadj_rec_local != mp->nadj_rec_local) exit_on_error("add_sources_el_sim_type_2_or_3: nadj_rec_local not equal\n");
+ // check if anything to do
+ if( mp->nadj_rec_local == 0 ) return;
// make sure grid dimension is less than 65535 in x dimension
int num_blocks_x = mp->nadj_rec_local;
@@ -333,62 +297,42 @@
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
-
dim3 grid(num_blocks_x,num_blocks_y,1);
dim3 threads(5,5,5);
// build slice of adj_sourcearrays because full array is *very* large.
// note: this extracts array values for local adjoint sources at given time step "time_index"
// from large adj_sourcearrays array into h_adj_sourcearrays_slice
- int ispec,i,j,k;
+ int i,j,k;
int irec_local = 0;
for(int irec = 0; irec < *nrec; irec++) {
- if(*myrank == h_islice_selected_rec[irec]) {
+ if(mp->myrank == h_islice_selected_rec[irec]) {
irec_local++;
- // takes only elastic sources
- ispec = h_ispec_selected_rec[irec]-1;
- if( h_ispec_is_elastic[ispec] ){
+ // takes only local sources
+ for(k=0;k<NGLLX;k++) {
+ for(j=0;j<NGLLX;j++) {
+ for(i=0;i<NGLLX;i++) {
- if( h_ispec_is_inner[ispec] == *phase_is_inner) {
- for(k=0;k<5;k++) {
- for(j=0;j<5;j++) {
- for(i=0;i<5;i++) {
+ // note: global version uses dimensions
+ // h_adj_sourcearrays(NDIM,NGLLX,NGLLY,NGLLZ,nadj_rec_local,NTSTEP_BETWEEN_READ_ADJSRC)
+ mp->h_adj_sourcearrays_slice[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
+ 0,i,j,k,irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(NDIM,NGLLX,NGLLX,NGLLX,mp->nadj_rec_local,
+ 0,i,j,k,irec_local-1,*time_index-1)];
- mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
- i,j,k,0,
- irec_local-1)]
- = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,
- *time_index-1,
- 0,i,j,k)];
+ mp->h_adj_sourcearrays_slice[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
+ 1,i,j,k,irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(NDIM,NGLLX,NGLLX,NGLLX,mp->nadj_rec_local,
+ 1,i,j,k,irec_local-1,*time_index-1)];
- mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
- i,j,k,1,
- irec_local-1)]
- = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,
- *time_index-1,
- 1,i,j,k)];
-
- mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
- i,j,k,2,
- irec_local-1)]
- = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,
- *time_index-1,
- 2,i,j,k)];
- }
- }
+ mp->h_adj_sourcearrays_slice[INDEX5(NDIM,NGLLX,NGLLX,NGLLX,
+ 2,i,j,k,irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(NDIM,NGLLX,NGLLX,NGLLX,mp->nadj_rec_local,
+ 2,i,j,k,irec_local-1,*time_index-1)];
}
- } // phase_is_inner
- } // h_ispec_is_elastic
+ }
+ }
}
}
// check all local sources were added
@@ -402,15 +346,11 @@
// the irec_local variable needs to be precomputed (as
// h_pre_comp..), because normally it is in the loop updating accel,
// and due to how it's incremented, it cannot be parallelized
-
- add_sources_el_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_accel,
+ add_sources_el_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_accel_crust_mantle,
*nrec,
mp->d_adj_sourcearrays,
- mp->d_ibool,
- mp->d_ispec_is_inner,
- mp->d_ispec_is_elastic,
+ mp->d_ibool_crust_mantle,
mp->d_ispec_selected_rec,
- *phase_is_inner,
mp->d_pre_computed_irec,
mp->nadj_rec_local);
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2012-02-21 00:33:56 UTC (rev 19657)
@@ -41,29 +41,27 @@
__global__ void compute_stacey_acoustic_kernel(realw* potential_dot_acoustic,
realw* potential_dot_dot_acoustic,
+ int interface_type,
+ int num_abs_boundary_faces,
int* abs_boundary_ispec,
- int* abs_boundary_ijk,
- realw* abs_boundary_jacobian2Dw,
+ int* nkmin_xi, int* nkmin_eta,
+ int* njmin, int* njmax,
+ int* nimin, int* nimax,
+ realw* abs_boundary_jacobian2D,
+ realw* wgllwgll,
int* ibool,
- realw* rhostore,
- realw* kappastore,
- int* ispec_is_inner,
- int* ispec_is_acoustic,
- int phase_is_inner,
- int SIMULATION_TYPE, int SAVE_FORWARD,
- int num_abs_boundary_faces,
- realw* b_potential_dot_acoustic,
+ realw* vpstore,
+ int SIMULATION_TYPE,
+ int SAVE_FORWARD,
realw* b_potential_dot_dot_acoustic,
- realw* b_absorb_potential,
- int gravity) {
+ realw* b_absorb_potential) {
int igll = threadIdx.x;
int iface = blockIdx.x + gridDim.x*blockIdx.y;
int i,j,k,iglob,ispec;
- realw rhol,kappal,cpl;
- realw jacobianw;
- realw vel;
+ realw sn;
+ realw jacobianw,fac1;
// don't compute points outside NGLLSQUARE==NGLL2==25
// way 2: no further check needed since blocksize = 25
@@ -74,46 +72,99 @@
// "-1" from index values to convert from Fortran-> C indexing
ispec = abs_boundary_ispec[iface]-1;
- if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] ) {
+ // determines indices i,j,k depending on absorbing boundary type
+ switch( interface_type ){
+ case 4:
+ // xmin
+ if( nkmin_xi[INDEX2(2,0,iface)] == 0 || njmin[INDEX2(2,0,iface)] == 0 ) return;
+
+ i = 0; // index -1
+ k = (igll/NGLLX);
+ j = (igll-k*NGLLX);
+
+ if( k < nkmin_xi[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
+ if( j < njmin[INDEX2(2,0,iface)]-1 || j > njmax[INDEX2(2,0,iface)]-1 ) return;
+
+ fac1 = wgllwgll[k*NGLLX+j];
+ break;
+
+ case 5:
+ // xmax
+ if( nkmin_xi[INDEX2(2,1,iface)] == 0 || njmin[INDEX2(2,1,iface)] == 0 ) return;
+
+ i = NGLLX-1;
+ k = (igll/NGLLX);
+ j = (igll-k*NGLLX);
+
+ if( k < nkmin_xi[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
+ if( j < njmin[INDEX2(2,1,iface)]-1 || j > njmax[INDEX2(2,1,iface)]-1 ) return;
+
+ fac1 = wgllwgll[k*NGLLX+j];
+ break;
+
+ case 6:
+ // ymin
+ if( nkmin_eta[INDEX2(2,0,iface)] == 0 || nimin[INDEX2(2,0,iface)] == 0 ) return;
+
+ j = 0;
+ k = (igll/NGLLX);
+ i = (igll-k*NGLLX);
+
+ if( k < nkmin_eta[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
+ if( i < nimin[INDEX2(2,0,iface)]-1 || i > nimax[INDEX2(2,0,iface)]-1 ) return;
+
+ fac1 = wgllwgll[k*NGLLX+i];
+ break;
+
+ case 7:
+ // ymax
+ if( nkmin_eta[INDEX2(2,1,iface)] == 0 || nimin[INDEX2(2,1,iface)] == 0 ) return;
+
+ j = NGLLX-1;
+ k = (igll/NGLLX);
+ i = (igll-k*NGLLX);
+
+ if( k < nkmin_eta[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
+ if( i < nimin[INDEX2(2,1,iface)]-1 || i > nimax[INDEX2(2,1,iface)]-1 ) return;
+
+ fac1 = wgllwgll[k*NGLLX+i];
+ break;
- i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
- j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
- k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
- iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+ case 8:
+ // zmin
+ k = 0;
+ j = (igll/NGLLX);
+ i = (igll-j*NGLLX);
+
+ if( j < 0 || j > NGLLX-1 ) return;
+ if( i < 0 || i > NGLLX-1 ) return;
+
+ fac1 = wgllwgll[j*NGLLX+i];
+ break;
+
+ }
+
+ iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
- // determines bulk sound speed
- rhol = rhostore[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+ // determines bulk sound speed
+ // velocity
+ sn = potential_dot_acoustic[iglob] / vpstore[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] ;
- kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
+ // gets associated, weighted jacobian
+ jacobianw = abs_boundary_jacobian2D[INDEX2(NGLL2,igll,iface)]*fac1;
- cpl = sqrt( kappal / rhol );
+ // Sommerfeld condition
+ atomicAdd(&potential_dot_dot_acoustic[iglob],-sn*jacobianw);
- // velocity
- if( gravity ){
- // daniel: TODO - check gravity and stacey condition here...
- // uses a potential definition of: s = grad(chi)
- vel = potential_dot_acoustic[iglob] / rhol ;
- }else{
- // uses a potential definition of: s = 1/rho grad(chi)
- vel = potential_dot_acoustic[iglob] / rhol;
- }
-
- // gets associated, weighted jacobian
- jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
-
+ // adjoint simulations
+ if( SIMULATION_TYPE == 3 ){
// Sommerfeld condition
- atomicAdd(&potential_dot_dot_acoustic[iglob],-vel*jacobianw/cpl);
+ atomicAdd(&b_potential_dot_dot_acoustic[iglob],-b_absorb_potential[INDEX2(NGLL2,igll,iface)]);
+ }else if( SIMULATION_TYPE == 1 && SAVE_FORWARD ){
+ // saves boundary values
+ b_absorb_potential[INDEX2(NGLL2,igll,iface)] = sn*jacobianw;
+ }
- // adjoint simulations
- if( SIMULATION_TYPE == 3 ){
- // Sommerfeld condition
- atomicAdd(&b_potential_dot_dot_acoustic[iglob],-b_absorb_potential[INDEX2(NGLL2,igll,iface)]);
- }else if( SIMULATION_TYPE == 1 && SAVE_FORWARD ){
- // saves boundary values
- b_absorb_potential[INDEX2(NGLL2,igll,iface)] = vel*jacobianw/cpl;
- }
- }
-// }
}
}
@@ -121,20 +172,76 @@
extern "C"
void FC_FUNC_(compute_stacey_acoustic_cuda,
- COMPUTE_STACEY_ACOUSTIC_CUDA)(
- long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* SIMULATION_TYPEf,
- int* SAVE_FORWARDf,
- realw* h_b_absorb_potential) {
+ COMPUTE_STACEY_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+ realw* absorb_potential,
+ int* itype) {
TRACE("compute_stacey_acoustic_cuda");
//double start_time = get_time();
+ int num_abs_boundary_faces;
+ int* d_abs_boundary_ispec;
+ realw* d_abs_boundary_jacobian2D;
+ realw* d_wgllwgll;
+ realw* d_b_absorb_potential;
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- int phase_is_inner = *phase_is_innerf;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
- int SAVE_FORWARD = *SAVE_FORWARDf;
+
+ // absorbing boundary type
+ int interface_type = *itype;
+ switch( interface_type ){
+ case 4:
+ // xmin
+ num_abs_boundary_faces = mp->nspec2D_xmin_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_xmin_outer_core;
+ d_abs_boundary_jacobian2D = mp->d_jacobian2D_xmin_outer_core;
+ d_b_absorb_potential = mp->d_absorb_xmin_outer_core;
+ d_wgllwgll = mp->d_wgllwgll_yz;
+ break;
+
+ case 5:
+ // xmax
+ num_abs_boundary_faces = mp->nspec2D_xmax_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_xmax_outer_core;
+ d_abs_boundary_jacobian2D = mp->d_jacobian2D_xmax_outer_core;
+ d_b_absorb_potential = mp->d_absorb_xmax_outer_core;
+ d_wgllwgll = mp->d_wgllwgll_yz;
+ break;
+
+ case 6:
+ // ymin
+ num_abs_boundary_faces = mp->nspec2D_ymin_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_ymin_outer_core;
+ d_abs_boundary_jacobian2D = mp->d_jacobian2D_ymin_outer_core;
+ d_b_absorb_potential = mp->d_absorb_ymin_outer_core;
+ d_wgllwgll = mp->d_wgllwgll_xz;
+ break;
+
+ case 7:
+ // ymax
+ num_abs_boundary_faces = mp->nspec2D_ymax_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_ymax_outer_core;
+ d_abs_boundary_jacobian2D = mp->d_jacobian2D_ymax_outer_core;
+ d_b_absorb_potential = mp->d_absorb_ymax_outer_core;
+ d_wgllwgll = mp->d_wgllwgll_xz;
+ break;
+ case 8:
+ // zmin
+ num_abs_boundary_faces = mp->nspec2D_zmin_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_zmin_outer_core;
+ d_abs_boundary_jacobian2D = mp->d_jacobian2D_zmin_outer_core;
+ d_b_absorb_potential = mp->d_absorb_zmin_outer_core;
+ d_wgllwgll = mp->d_wgllwgll_xy;
+ break;
+
+ default:
+ exit_on_cuda_error("compute_stacey_acoustic_cuda: unknown interface type");
+ break;
+ }
+
+ // checks if anything to do
+ if( num_abs_boundary_faces == 0 ) return;
+
// way 1: Elapsed time: 4.385948e-03
// > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
// int blocksize = 32;
@@ -143,46 +250,47 @@
// > NGLLSQUARE==NGLL2==25, no further check inside kernel
int blocksize = NGLL2;
- int num_blocks_x = mp->d_num_abs_boundary_faces;
+ int num_blocks_x = num_abs_boundary_faces;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
-
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- // adjoint simulations: reads in absorbing boundary
- if (SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0 ){
+ // adjoint simulations: needs absorbing boundary buffer
+ if (mp->simulation_type == 3 && num_abs_boundary_faces > 0 ){
// copies array to GPU
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,h_b_absorb_potential,
- mp->d_b_reclen_potential,cudaMemcpyHostToDevice),7700);
+ print_CUDA_error_if_any(cudaMemcpy(d_b_absorb_potential,absorb_potential,
+ NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyHostToDevice),7700);
}
- compute_stacey_acoustic_kernel<<<grid,threads>>>(mp->d_potential_dot_acoustic,
- mp->d_potential_dot_dot_acoustic,
- mp->d_abs_boundary_ispec,
- mp->d_abs_boundary_ijk,
- mp->d_abs_boundary_jacobian2Dw,
- mp->d_ibool,
- mp->d_rhostore,
- mp->d_kappastore,
- mp->d_ispec_is_inner,
- mp->d_ispec_is_acoustic,
- phase_is_inner,
- SIMULATION_TYPE,SAVE_FORWARD,
- mp->d_num_abs_boundary_faces,
- mp->d_b_potential_dot_acoustic,
- mp->d_b_potential_dot_dot_acoustic,
- mp->d_b_absorb_potential,
- mp->gravity);
+ compute_stacey_acoustic_kernel<<<grid,threads>>>(mp->d_veloc_outer_core,
+ mp->d_accel_outer_core,
+ interface_type,
+ num_abs_boundary_faces,
+ d_abs_boundary_ispec,
+ mp->d_nkmin_xi_outer_core,
+ mp->d_nkmin_eta_outer_core,
+ mp->d_njmin_outer_core,
+ mp->d_njmax_outer_core,
+ mp->d_nimin_outer_core,
+ mp->d_nimax_outer_core,
+ d_abs_boundary_jacobian2D,
+ d_wgllwgll,
+ mp->d_ibool_outer_core,
+ mp->d_vp_outer_core,
+ mp->simulation_type,
+ mp->save_forward,
+ mp->d_b_accel_outer_core,
+ d_b_absorb_potential);
// adjoint simulations: stores absorbed wavefield part
- if (SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ){
+ if (mp->simulation_type == 1 && mp->save_forward && num_abs_boundary_faces > 0 ){
// copies array to CPU
- print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_potential,mp->d_b_absorb_potential,
- mp->d_b_reclen_potential,cudaMemcpyDeviceToHost),7701);
+ print_CUDA_error_if_any(cudaMemcpy(absorb_potential,d_b_absorb_potential,
+ NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyDeviceToHost),7701);
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2012-02-21 00:33:56 UTC (rev 19657)
@@ -41,19 +41,20 @@
__global__ void compute_stacey_elastic_kernel(realw* veloc,
realw* accel,
+ int interface_type,
+ int num_abs_boundary_faces,
int* abs_boundary_ispec,
- int* abs_boundary_ijk,
+ int* nkmin_xi, int* nkmin_eta,
+ int* njmin, int* njmax,
+ int* nimin, int* nimax,
realw* abs_boundary_normal,
- realw* abs_boundary_jacobian2Dw,
+ realw* abs_boundary_jacobian2D,
+ realw* wgllwgll,
int* ibool,
realw* rho_vp,
realw* rho_vs,
- int* ispec_is_inner,
- int* ispec_is_elastic,
- int phase_is_inner,
int SIMULATION_TYPE,
int SAVE_FORWARD,
- int num_abs_boundary_faces,
realw* b_accel,
realw* b_absorb_field) {
@@ -66,63 +67,117 @@
realw rho_vp_temp,rho_vs_temp;
realw tx,ty,tz;
realw jacobianw;
+ realw fac1;
- // don't compute points outside NGLLSQUARE==NGLL2==25
- // way 2: no further check needed since blocksize = 25
+ // don't compute surface faces outside of range
+ // and don't compute points outside NGLLSQUARE==NGLL2==25
+ //if(igll < NGLL2 && iface < num_abs_boundary_faces) {
+
+ // way 2: only check face, no further check needed since blocksize = 25
if( iface < num_abs_boundary_faces){
- //if(igll < NGLL2 && iface < num_abs_boundary_faces) {
-
// "-1" from index values to convert from Fortran-> C indexing
ispec = abs_boundary_ispec[iface]-1;
+
+ // determines indices i,j,k depending on absorbing boundary type
+ switch( interface_type ){
+ case 0:
+ // xmin
+ if( nkmin_xi[INDEX2(2,0,iface)] == 0 || njmin[INDEX2(2,0,iface)] == 0 ) return;
+
+ i = 0; // index -1
+ k = (igll/NGLLX);
+ j = (igll-k*NGLLX);
+
+ if( k < nkmin_xi[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
+ if( j < njmin[INDEX2(2,0,iface)]-1 || j > NGLLX-1 ) return;
+
+ fac1 = wgllwgll[k*NGLLX+j];
+ break;
+
+ case 1:
+ // xmax
+ if( nkmin_xi[INDEX2(2,1,iface)] == 0 || njmin[INDEX2(2,1,iface)] == 0 ) return;
+
+ i = NGLLX-1;
+ k = (igll/NGLLX);
+ j = (igll-k*NGLLX);
+
+ if( k < nkmin_xi[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
+ if( j < njmin[INDEX2(2,1,iface)]-1 || j > njmax[INDEX2(2,1,iface)]-1 ) return;
- if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] ) {
+ fac1 = wgllwgll[k*NGLLX+j];
+ break;
+
+ case 2:
+ // ymin
+ if( nkmin_eta[INDEX2(2,0,iface)] == 0 || nimin[INDEX2(2,0,iface)] == 0 ) return;
+
+ j = 0;
+ k = (igll/NGLLX);
+ i = (igll-k*NGLLX);
+
+ if( k < nkmin_eta[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
+ if( i < nimin[INDEX2(2,0,iface)]-1 || i > nimax[INDEX2(2,0,iface)]-1 ) return;
+
+ fac1 = wgllwgll[k*NGLLX+i];
+ break;
- i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
- j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
- k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
- iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
+ case 3:
+ // ymax
+ if( nkmin_eta[INDEX2(2,1,iface)] == 0 || nimin[INDEX2(2,1,iface)] == 0 ) return;
+
+ j = NGLLX-1;
+ k = (igll/NGLLX);
+ i = (igll-k*NGLLX);
+
+ if( k < nkmin_eta[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
+ if( i < nimin[INDEX2(2,1,iface)]-1 || i > nimax[INDEX2(2,1,iface)]-1 ) return;
+
+ fac1 = wgllwgll[k*NGLLX+i];
+ break;
+ }
+
+ iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
- // gets associated velocity
+ // gets associated velocity
+ vx = veloc[iglob*3+0];
+ vy = veloc[iglob*3+1];
+ vz = veloc[iglob*3+2];
- vx = veloc[iglob*3+0];
- vy = veloc[iglob*3+1];
- vz = veloc[iglob*3+2];
+ // gets associated normal
+ nx = abs_boundary_normal[INDEX3(NDIM,NGLL2,0,igll,iface)];
+ ny = abs_boundary_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
+ nz = abs_boundary_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
- // gets associated normal
- nx = abs_boundary_normal[INDEX3(NDIM,NGLL2,0,igll,iface)];
- ny = abs_boundary_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
- nz = abs_boundary_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
+ // // velocity component in normal direction (normal points out of element)
+ vn = vx*nx + vy*ny + vz*nz;
- // // velocity component in normal direction (normal points out of element)
- vn = vx*nx + vy*ny + vz*nz;
+ rho_vp_temp = rho_vp[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+ rho_vs_temp = rho_vs[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
- rho_vp_temp = rho_vp[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
- rho_vs_temp = rho_vs[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+ tx = rho_vp_temp*vn*nx + rho_vs_temp*(vx-vn*nx);
+ ty = rho_vp_temp*vn*ny + rho_vs_temp*(vy-vn*ny);
+ tz = rho_vp_temp*vn*nz + rho_vs_temp*(vz-vn*nz);
- tx = rho_vp_temp*vn*nx + rho_vs_temp*(vx-vn*nx);
- ty = rho_vp_temp*vn*ny + rho_vs_temp*(vy-vn*ny);
- tz = rho_vp_temp*vn*nz + rho_vs_temp*(vz-vn*nz);
+ jacobianw = abs_boundary_jacobian2D[INDEX2(NGLL2,igll,iface)]*fac1;
- jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+ atomicAdd(&accel[iglob*3],-tx*jacobianw);
+ atomicAdd(&accel[iglob*3+1],-ty*jacobianw);
+ atomicAdd(&accel[iglob*3+2],-tz*jacobianw);
- atomicAdd(&accel[iglob*3],-tx*jacobianw);
- atomicAdd(&accel[iglob*3+1],-ty*jacobianw);
- atomicAdd(&accel[iglob*3+2],-tz*jacobianw);
-
- if(SIMULATION_TYPE == 3) {
- atomicAdd(&b_accel[iglob*3 ],-b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)]);
- atomicAdd(&b_accel[iglob*3+1],-b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)]);
- atomicAdd(&b_accel[iglob*3+2],-b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)]);
- }
- else if(SAVE_FORWARD && SIMULATION_TYPE == 1) {
- b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)] = tx*jacobianw;
- b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)] = ty*jacobianw;
- b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)] = tz*jacobianw;
- } // SIMULATION_TYPE
+ if(SIMULATION_TYPE == 3) {
+ atomicAdd(&b_accel[iglob*3 ],-b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)]);
+ atomicAdd(&b_accel[iglob*3+1],-b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)]);
+ atomicAdd(&b_accel[iglob*3+2],-b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)]);
}
+ else if(SAVE_FORWARD && SIMULATION_TYPE == 1) {
+ b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)] = tx*jacobianw;
+ b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)] = ty*jacobianw;
+ b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)] = tz*jacobianw;
+ } // SIMULATION_TYPE
+
} // num_abs_boundary_faces
-
}
/* ----------------------------------------------------------------------------------------------- */
@@ -131,22 +186,71 @@
extern "C"
void FC_FUNC_(compute_stacey_elastic_cuda,
COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* SIMULATION_TYPEf,
- int* SAVE_FORWARDf,
- realw* h_b_absorb_field) {
+ realw* absorb_field,
+ int* itype) {
TRACE("compute_stacey_elastic_cuda");
+ int num_abs_boundary_faces;
+ int* d_abs_boundary_ispec;
+ realw* d_abs_boundary_normal;
+ realw* d_abs_boundary_jacobian2D;
+ realw* d_wgllwgll;
+ realw* d_b_absorb_field;
+
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- // check
- if( mp->d_num_abs_boundary_faces == 0 ) return;
+ // absorbing boundary type
+ int interface_type = *itype;
+ switch( interface_type ){
+ case 0:
+ // xmin
+ num_abs_boundary_faces = mp->nspec2D_xmin_crust_mantle;
+ d_abs_boundary_ispec = mp->d_ibelm_xmin_crust_mantle;
+ d_abs_boundary_normal = mp->d_normal_xmin_crust_mantle;
+ d_abs_boundary_jacobian2D = mp->d_jacobian2D_xmin_crust_mantle;
+ d_b_absorb_field = mp->d_absorb_xmin_crust_mantle;
+ d_wgllwgll = mp->d_wgllwgll_yz;
+ break;
+
+ case 1:
+ // xmax
+ num_abs_boundary_faces = mp->nspec2D_xmax_crust_mantle;
+ d_abs_boundary_ispec = mp->d_ibelm_xmax_crust_mantle;
+ d_abs_boundary_normal = mp->d_normal_xmax_crust_mantle;
+ d_abs_boundary_jacobian2D = mp->d_jacobian2D_xmax_crust_mantle;
+ d_b_absorb_field = mp->d_absorb_xmax_crust_mantle;
+ d_wgllwgll = mp->d_wgllwgll_yz;
+ break;
+
+ case 2:
+ // ymin
+ num_abs_boundary_faces = mp->nspec2D_ymin_crust_mantle;
+ d_abs_boundary_ispec = mp->d_ibelm_ymin_crust_mantle;
+ d_abs_boundary_normal = mp->d_normal_ymin_crust_mantle;
+ d_abs_boundary_jacobian2D = mp->d_jacobian2D_ymin_crust_mantle;
+ d_b_absorb_field = mp->d_absorb_ymin_crust_mantle;
+ d_wgllwgll = mp->d_wgllwgll_xz;
+ break;
+
+ case 3:
+ // ymax
+ num_abs_boundary_faces = mp->nspec2D_ymax_crust_mantle;
+ d_abs_boundary_ispec = mp->d_ibelm_ymax_crust_mantle;
+ d_abs_boundary_normal = mp->d_normal_ymax_crust_mantle;
+ d_abs_boundary_jacobian2D = mp->d_jacobian2D_ymax_crust_mantle;
+ d_b_absorb_field = mp->d_absorb_ymax_crust_mantle;
+ d_wgllwgll = mp->d_wgllwgll_xz;
+ break;
+
+ default:
+ exit_on_cuda_error("compute_stacey_elastic_cuda: unknown interface type");
+ break;
+ }
+
+ // checks if anything to do
+ if( num_abs_boundary_faces == 0 ) return;
- int phase_is_inner = *phase_is_innerf;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
- int SAVE_FORWARD = *SAVE_FORWARDf;
-
// way 1
// > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
//int blocksize = 32;
@@ -154,61 +258,56 @@
// way 2: seems sligthly faster
// > NGLLSQUARE==NGLL2==25, no further check inside kernel
int blocksize = NGLL2;
-
- int num_blocks_x = mp->d_num_abs_boundary_faces;
+
+ int num_blocks_x = num_abs_boundary_faces;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
-
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- if(SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0) {
- // The read is done in fortran
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field,h_b_absorb_field,
- mp->d_b_reclen_field,cudaMemcpyHostToDevice),7700);
+ // adjoint simulations: needs absorbing boundary buffer
+ if(mp->simulation_type == 3 && num_abs_boundary_faces > 0) {
+ // copies array to GPU
+ print_CUDA_error_if_any(cudaMemcpy(d_b_absorb_field,absorb_field,
+ NDIM*NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyHostToDevice),7700);
}
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("between cudamemcpy and compute_stacey_elastic_kernel");
-#endif
+ // absorbing boundary contributions
+ compute_stacey_elastic_kernel<<<grid,threads>>>(mp->d_veloc_crust_mantle,
+ mp->d_accel_crust_mantle,
+ interface_type,
+ num_abs_boundary_faces,
+ d_abs_boundary_ispec,
+ mp->d_nkmin_xi_crust_mantle,
+ mp->d_nkmin_eta_crust_mantle,
+ mp->d_njmin_crust_mantle,
+ mp->d_njmax_crust_mantle,
+ mp->d_nimin_crust_mantle,
+ mp->d_nimax_crust_mantle,
+ d_abs_boundary_normal,
+ d_abs_boundary_jacobian2D,
+ d_wgllwgll,
+ mp->d_ibool_crust_mantle,
+ mp->d_rho_vp_crust_mantle,
+ mp->d_rho_vs_crust_mantle,
+ mp->simulation_type,
+ mp->save_forward,
+ mp->d_b_accel_crust_mantle,
+ d_b_absorb_field);
- compute_stacey_elastic_kernel<<<grid,threads>>>(mp->d_veloc,
- mp->d_accel,
- mp->d_abs_boundary_ispec,
- mp->d_abs_boundary_ijk,
- mp->d_abs_boundary_normal,
- mp->d_abs_boundary_jacobian2Dw,
- mp->d_ibool,
- mp->d_rho_vp,
- mp->d_rho_vs,
- mp->d_ispec_is_inner,
- mp->d_ispec_is_elastic,
- phase_is_inner,
- SIMULATION_TYPE,SAVE_FORWARD,
- mp->d_num_abs_boundary_faces,
- mp->d_b_accel,
- mp->d_b_absorb_field);
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_stacey_elastic_kernel");
-#endif
-
- // ! adjoint simulations: stores absorbed wavefield part
- // if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) &
- // write(IOABS,rec=it) b_reclen_field,b_absorb_field,b_reclen_field
-
- if(SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ) {
- print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_field,mp->d_b_absorb_field,
- mp->d_b_reclen_field,cudaMemcpyDeviceToHost),7701);
- // The write is done in fortran
- // write_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&it);
+ // adjoint simulations: stores absorbed wavefield part
+ if(mp->simulation_type == 1 && mp->save_forward && num_abs_boundary_faces > 0 ) {
+ // copies array to CPU
+ print_CUDA_error_if_any(cudaMemcpy(absorb_field,d_b_absorb_field,
+ NDIM*NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyDeviceToHost),7701);
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after compute_stacey_elastic after cudamemcpy");
+ exit_on_cuda_error("compute_stacey_elastic_cuda");
#endif
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-02-21 00:33:56 UTC (rev 19657)
@@ -418,6 +418,8 @@
int anisotropic_inner_core;
int save_boundary_mesh;
+ int myrank;
+
// ------------------------------------------------------------------ //
// gravity
// ------------------------------------------------------------------ //
@@ -490,7 +492,55 @@
int* d_ibool_interfaces_outer_core;
realw* d_send_accel_buffer_outer_core;
+ // ------------------------------------------------------------------ //
+ // absorbing boundaries
+ // ------------------------------------------------------------------ //
+
+ int nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle;
+ int nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle;
+ int* d_nimin_crust_mantle, *d_nimax_crust_mantle;
+ int* d_njmin_crust_mantle, *d_njmax_crust_mantle;
+ int* d_nkmin_xi_crust_mantle, *d_nkmin_eta_crust_mantle;
+
+ int* d_ibelm_xmin_crust_mantle, *d_ibelm_xmax_crust_mantle;
+ int* d_ibelm_ymin_crust_mantle, *d_ibelm_ymax_crust_mantle;
+
+ realw* d_normal_xmin_crust_mantle, *d_normal_xmax_crust_mantle;
+ realw* d_normal_ymin_crust_mantle, *d_normal_ymax_crust_mantle;
+
+ realw* d_jacobian2D_xmin_crust_mantle, *d_jacobian2D_xmax_crust_mantle;
+ realw* d_jacobian2D_ymin_crust_mantle, *d_jacobian2D_ymax_crust_mantle;
+
+ realw* d_absorb_xmin_crust_mantle, *d_absorb_xmax_crust_mantle;
+ realw* d_absorb_ymin_crust_mantle, *d_absorb_ymax_crust_mantle;
+
+ realw* d_rho_vp_crust_mantle;
+ realw* d_rho_vs_crust_mantle;
+
+ int nspec2D_xmin_outer_core,nspec2D_xmax_outer_core;
+ int nspec2D_ymin_outer_core,nspec2D_ymax_outer_core;
+ int nspec2D_zmin_outer_core;
+
+ int* d_nimin_outer_core, *d_nimax_outer_core;
+ int* d_njmin_outer_core, *d_njmax_outer_core;
+ int* d_nkmin_xi_outer_core, *d_nkmin_eta_outer_core;
+
+ int* d_ibelm_xmin_outer_core, *d_ibelm_xmax_outer_core;
+ int* d_ibelm_ymin_outer_core, *d_ibelm_ymax_outer_core;
+ int* d_ibelm_zmin_outer_core;
+
+ realw* d_jacobian2D_xmin_outer_core, *d_jacobian2D_xmax_outer_core;
+ realw* d_jacobian2D_ymin_outer_core, *d_jacobian2D_ymax_outer_core;
+ realw* d_jacobian2D_zmin_outer_core;
+
+ realw* d_absorb_xmin_outer_core, *d_absorb_xmax_outer_core;
+ realw* d_absorb_ymin_outer_core, *d_absorb_ymax_outer_core;
+ realw* d_absorb_zmin_outer_core;
+
+ realw* d_vp_outer_core;
+
+
// ------------------------------------------------------------------ //
//daniel: TODO - former code...
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-02-21 00:33:56 UTC (rev 19657)
@@ -168,7 +168,7 @@
// outputs initial memory infos via cudaMemGetInfo()
double free_db,used_db,total_db;
get_free_memory(&free_db,&used_db,&total_db);
- fprintf(fp,"%d: GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank,
+ fprintf(fp,"%d: GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n",myrank,
used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
fclose(fp);
@@ -183,18 +183,18 @@
extern "C"
void FC_FUNC_(prepare_constants_device,
- PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
+ PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
+ int* myrank_f,
int* h_NGLLX,
realw* h_hprime_xx,realw* h_hprime_yy,realw* h_hprime_zz,
realw* h_hprimewgll_xx,realw* h_hprimewgll_yy,realw* h_hprimewgll_zz,
realw* h_wgllwgll_xy,realw* h_wgllwgll_xz,realw* h_wgllwgll_yz,
int* NSOURCES,int* nsources_local,
realw* h_sourcearrays,
- int* h_islice_selected_source,
- int* h_ispec_selected_source,
+ int* h_islice_selected_source,int* h_ispec_selected_source,
int* h_number_receiver_global,
- int* h_ispec_selected_rec,
- int* nrec,int* nrec_local,
+ int* h_islice_selected_rec,int* h_ispec_selected_rec,
+ int* nrec,int* nrec_local, int* nadj_rec_local,
int* NSPEC_CRUST_MANTLE, int* NGLOB_CRUST_MANTLE,
int* NSPEC_OUTER_CORE, int* NGLOB_OUTER_CORE,
int* NSPEC_INNER_CORE, int* NGLOB_INNER_CORE,
@@ -257,6 +257,7 @@
mp->anisotropic_inner_core = *ANISOTROPIC_INNER_CORE_f;
mp->save_boundary_mesh = *SAVE_BOUNDARY_MESH_f;
+ mp->myrank = *myrank_f;
// mesh coloring flag
#ifdef USE_MESH_COLORING_GPU
@@ -277,7 +278,7 @@
sizeof(realw)* *NSOURCES*3*NGLL3),1301);
print_CUDA_error_if_any(cudaMemcpy(mp->d_sourcearrays, h_sourcearrays,
sizeof(realw)* *NSOURCES*3*NGLL3,cudaMemcpyHostToDevice),1302);
-
+ // buffer for source time function values
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_stf_pre_compute,
*NSOURCES*sizeof(double)),1303);
}
@@ -306,8 +307,42 @@
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_selected_rec),(*nrec)*sizeof(int)),1513);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_rec,h_ispec_selected_rec,
(*nrec)*sizeof(int),cudaMemcpyHostToDevice),1514);
-
-
+
+ // receiver adjoint source arrays only used for noise and adjoint simulations
+ // adjoint source arrays
+ mp->nadj_rec_local = *nadj_rec_local;
+ if( mp->nadj_rec_local > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_adj_sourcearrays,
+ (mp->nadj_rec_local)*3*NGLL3*sizeof(realw)),6003);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_pre_computed_irec,
+ (mp->nadj_rec_local)*sizeof(int)),6004);
+
+ // prepares local irec array:
+ // the irec_local variable needs to be precomputed (as
+ // h_pre_comp..), because normally it is in the loop updating accel,
+ // and due to how it's incremented, it cannot be parallelized
+ int* h_pre_computed_irec = (int*) malloc( (mp->nadj_rec_local)*sizeof(int) );
+ if( h_pre_computed_irec == NULL ) exit_on_error("h_pre_computed_irec not allocated\n");
+
+ int irec_local = 0;
+ for(int irec = 0; irec < *nrec; irec++) {
+ if(mp->myrank == h_islice_selected_rec[irec]) {
+ irec_local++;
+ h_pre_computed_irec[irec_local-1] = irec;
+ }
+ }
+ if( irec_local != mp->nadj_rec_local ) exit_on_error("prepare_sim2_or_3_const_device: irec_local not equal\n");
+ // copies values onto GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_pre_computed_irec,h_pre_computed_irec,
+ (mp->nadj_rec_local)*sizeof(int),cudaMemcpyHostToDevice),6010);
+ free(h_pre_computed_irec);
+
+ // temporary array to prepare extracted source array values
+ mp->h_adj_sourcearrays_slice = (realw*) malloc( (mp->nadj_rec_local)*3*NGLL3*sizeof(realw) );
+ if( mp->h_adj_sourcearrays_slice == NULL ) exit_on_error("h_adj_sourcearrays_slice not allocated\n");
+ }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("prepare_constants_device");
#endif
@@ -761,9 +796,344 @@
}
}
+/* ----------------------------------------------------------------------------------------------- */
+// STRAIN simulations
+
/* ----------------------------------------------------------------------------------------------- */
+extern "C"
+void FC_FUNC_(prepare_fields_absorb_device,
+ PREPARE_FIELDS_ABSORB_DEVICE)(long* Mesh_pointer_f,
+ int* nspec2D_xmin_crust_mantle,int* nspec2D_xmax_crust_mantle,
+ int* nspec2D_ymin_crust_mantle,int* nspec2D_ymax_crust_mantle,
+ int* NSPEC2DMAX_XMIN_XMAX_CM,int* NSPEC2DMAX_YMIN_YMAX_CM,
+ int* nimin_crust_mantle,int* nimax_crust_mantle,
+ int* njmin_crust_mantle,int* njmax_crust_mantle,
+ int* nkmin_xi_crust_mantle,int* nkmin_eta_crust_mantle,
+ int* ibelm_xmin_crust_mantle,int* ibelm_xmax_crust_mantle,
+ int* ibelm_ymin_crust_mantle,int* ibelm_ymax_crust_mantle,
+ realw* normal_xmin_crust_mantle,realw* normal_xmax_crust_mantle,
+ realw* normal_ymin_crust_mantle,realw* normal_ymax_crust_mantle,
+ realw* jacobian2D_xmin_crust_mantle, realw* jacobian2D_xmax_crust_mantle,
+ realw* jacobian2D_ymin_crust_mantle, realw* jacobian2D_ymax_crust_mantle,
+ realw* rho_vp_crust_mantle,
+ realw* rho_vs_crust_mantle,
+ int* nspec2D_xmin_outer_core,int* nspec2D_xmax_outer_core,
+ int* nspec2D_ymin_outer_core,int* nspec2D_ymax_outer_core,
+ int* nspec2D_zmin_outer_core,
+ int* NSPEC2DMAX_XMIN_XMAX_OC,int* NSPEC2DMAX_YMIN_YMAX_OC,
+ int* nimin_outer_core,int* nimax_outer_core,
+ int* njmin_outer_core,int* njmax_outer_core,
+ int* nkmin_xi_outer_core,int* nkmin_eta_outer_core,
+ int* ibelm_xmin_outer_core,int* ibelm_xmax_outer_core,
+ int* ibelm_ymin_outer_core,int* ibelm_ymax_outer_core,
+ int* ibelm_bottom_outer_core,
+ realw* jacobian2D_xmin_outer_core, realw* jacobian2D_xmax_outer_core,
+ realw* jacobian2D_ymin_outer_core, realw* jacobian2D_ymax_outer_core,
+ realw* jacobian2D_bottom_outer_core,
+ realw* vp_outer_core
+ ) {
+
+ TRACE("prepare_fields_absorb_device");
+ int size;
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // checks flag
+ if( ! mp->absorbing_conditions ){ exit_on_cuda_error("prepare_fields_absorb_device absorbing_conditions not properly initialized"); }
+
+ // crust_mantle
+ mp->nspec2D_xmin_crust_mantle = *nspec2D_xmin_crust_mantle;
+ mp->nspec2D_xmax_crust_mantle = *nspec2D_xmax_crust_mantle;
+ mp->nspec2D_ymin_crust_mantle = *nspec2D_ymin_crust_mantle;
+ mp->nspec2D_ymax_crust_mantle = *nspec2D_ymax_crust_mantle;
+
+ // vp & vs
+ size = NGLL3*(mp->NSPEC_CRUST_MANTLE);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rho_vp_crust_mantle,
+ size*sizeof(realw)),2201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp_crust_mantle,rho_vp_crust_mantle,
+ size*sizeof(realw),cudaMemcpyHostToDevice),2202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rho_vs_crust_mantle,
+ size*sizeof(realw)),2201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs_crust_mantle,rho_vs_crust_mantle,
+ size*sizeof(realw),cudaMemcpyHostToDevice),2202);
+
+ // ijk index arrays
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nkmin_xi_crust_mantle,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_xi_crust_mantle,nkmin_xi_crust_mantle,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nkmin_eta_crust_mantle,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_eta_crust_mantle,nkmin_eta_crust_mantle,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmin_crust_mantle,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_njmin_crust_mantle,njmin_crust_mantle,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmax_crust_mantle,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_njmax_crust_mantle,njmax_crust_mantle,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nimin_crust_mantle,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nimin_crust_mantle,nimin_crust_mantle,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nimax_crust_mantle,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nimax_crust_mantle,nimax_crust_mantle,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+
+ // xmin
+ if( mp->nspec2D_xmin_crust_mantle > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_xmin_crust_mantle,
+ (mp->nspec2D_xmin_crust_mantle)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmin_crust_mantle,ibelm_xmin_crust_mantle,
+ (mp->nspec2D_xmin_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_xmin_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_xmin_crust_mantle,normal_xmin_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmin_crust_mantle,
+ NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmin_crust_mantle,jacobian2D_xmin_crust_mantle,
+ NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ // boundary buffer
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmin_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw)),1202);
+ }
+ }
+
+ // xmax
+ if( mp->nspec2D_xmax_crust_mantle > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_xmax_crust_mantle,
+ (mp->nspec2D_xmax_crust_mantle)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmax_crust_mantle,ibelm_xmax_crust_mantle,
+ (mp->nspec2D_xmax_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_xmax_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_xmax_crust_mantle,normal_xmax_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmax_crust_mantle,
+ NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmax_crust_mantle,jacobian2D_xmax_crust_mantle,
+ NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ // boundary buffer
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmax_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw)),1202);
+ }
+ }
+
+ // ymin
+ if( mp->nspec2D_ymin_crust_mantle > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_ymin_crust_mantle,
+ (mp->nspec2D_ymin_crust_mantle)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymin_crust_mantle,ibelm_ymin_crust_mantle,
+ (mp->nspec2D_ymin_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_ymin_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_ymin_crust_mantle,normal_ymin_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymin_crust_mantle,
+ NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymin_crust_mantle,jacobian2D_ymin_crust_mantle,
+ NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ // boundary buffer
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymin_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw)),1202);
+ }
+ }
+
+ // ymax
+ if( mp->nspec2D_ymax_crust_mantle > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_ymax_crust_mantle,
+ (mp->nspec2D_ymax_crust_mantle)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymax_crust_mantle,ibelm_ymax_crust_mantle,
+ (mp->nspec2D_ymax_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_ymax_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_ymax_crust_mantle,normal_ymax_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymax_crust_mantle,
+ NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymax_crust_mantle,jacobian2D_ymax_crust_mantle,
+ NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ // boundary buffer
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymax_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw)),1202);
+ }
+ }
+
+
+ // outer_core
+ mp->nspec2D_xmin_outer_core = *nspec2D_xmin_outer_core;
+ mp->nspec2D_xmax_outer_core = *nspec2D_xmax_outer_core;
+ mp->nspec2D_ymin_outer_core = *nspec2D_ymin_outer_core;
+ mp->nspec2D_ymax_outer_core = *nspec2D_ymax_outer_core;
+ mp->nspec2D_zmin_outer_core = *nspec2D_zmin_outer_core;
+
+ // vp
+ size = NGLL3*(mp->NSPEC_OUTER_CORE);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_vp_outer_core,
+ size*sizeof(realw)),2201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_vp_outer_core,vp_outer_core,
+ size*sizeof(realw),cudaMemcpyHostToDevice),2202);
+
+ // ijk index arrays
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nkmin_xi_outer_core,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_xi_outer_core,nkmin_xi_outer_core,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nkmin_eta_outer_core,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_eta_outer_core,nkmin_eta_outer_core,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmin_outer_core,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_njmin_outer_core,njmin_outer_core,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmax_outer_core,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_njmax_outer_core,njmax_outer_core,
+ 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nimin_outer_core,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nimin_outer_core,nimin_outer_core,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nimax_outer_core,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nimax_outer_core,nimax_outer_core,
+ 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ // xmin
+ if( mp->nspec2D_xmin_outer_core > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_xmin_outer_core,
+ (mp->nspec2D_xmin_outer_core)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmin_outer_core,ibelm_xmin_outer_core,
+ (mp->nspec2D_xmin_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmin_outer_core,
+ NGLL2*(mp->nspec2D_xmin_outer_core)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmin_outer_core,jacobian2D_xmin_outer_core,
+ NGLL2*(mp->nspec2D_xmin_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ // boundary buffer
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmin_outer_core,
+ NGLL2*(mp->nspec2D_xmin_outer_core)*sizeof(realw)),1202);
+ }
+ }
+
+ // xmax
+ if( mp->nspec2D_xmax_outer_core > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_xmax_outer_core,
+ (mp->nspec2D_xmax_outer_core)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmax_outer_core,ibelm_xmax_outer_core,
+ (mp->nspec2D_xmax_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmax_outer_core,
+ NGLL2*(mp->nspec2D_xmax_outer_core)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmax_outer_core,jacobian2D_xmax_outer_core,
+ NGLL2*(mp->nspec2D_xmax_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ // boundary buffer
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmax_outer_core,
+ NGLL2*(mp->nspec2D_xmax_outer_core)*sizeof(realw)),1202);
+ }
+ }
+
+ // ymin
+ if( mp->nspec2D_ymin_outer_core > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_ymin_outer_core,
+ (mp->nspec2D_ymin_outer_core)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymin_outer_core,ibelm_ymin_outer_core,
+ (mp->nspec2D_ymin_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymin_outer_core,
+ NGLL2*(mp->nspec2D_ymin_outer_core)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymin_outer_core,jacobian2D_ymin_outer_core,
+ NGLL2*(mp->nspec2D_ymin_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ // boundary buffer
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymin_outer_core,
+ NGLL2*(mp->nspec2D_ymin_outer_core)*sizeof(realw)),1202);
+ }
+ }
+
+ // ymax
+ if( mp->nspec2D_ymax_outer_core > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_ymax_outer_core,
+ (mp->nspec2D_ymax_outer_core)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymax_outer_core,ibelm_ymax_outer_core,
+ (mp->nspec2D_ymax_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymax_outer_core,
+ NGLL2*(mp->nspec2D_ymax_outer_core)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymax_outer_core,jacobian2D_ymax_outer_core,
+ NGLL2*(mp->nspec2D_ymax_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ // boundary buffer
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymax_outer_core,
+ NGLL2*(mp->nspec2D_ymax_outer_core)*sizeof(realw)),1202);
+ }
+ }
+
+ // zmin
+ if( mp->nspec2D_zmin_outer_core > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_zmin_outer_core,
+ (mp->nspec2D_zmin_outer_core)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_zmin_outer_core,ibelm_bottom_outer_core,
+ (mp->nspec2D_zmin_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_zmin_outer_core,
+ NGLL2*(mp->nspec2D_zmin_outer_core)*sizeof(realw)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_zmin_outer_core,jacobian2D_bottom_outer_core,
+ NGLL2*(mp->nspec2D_zmin_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
+
+ // boundary buffer
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_zmin_outer_core,
+ NGLL2*(mp->nspec2D_zmin_outer_core)*sizeof(realw)),1202);
+ }
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
// MPI interfaces
/* ----------------------------------------------------------------------------------------------- */
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-02-21 00:33:56 UTC (rev 19657)
@@ -122,78 +122,32 @@
//
-// src/cuda/compute_add_sources_acoustic_cuda.cu
-//
-
-void FC_FUNC_(compute_add_sources_ac_cuda,
- COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* NSOURCESf,
- int* SIMULATION_TYPEf,
- double* h_stf_pre_compute,
- int* myrankf) {}
-
-void FC_FUNC_(compute_add_sources_ac_s3_cuda,
- COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* NSOURCESf,
- int* SIMULATION_TYPEf,
- double* h_stf_pre_compute,
- int* myrankf) {}
-
-void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda,
- ADD_SOURCES_AC_SIM_2_OR_3_CUDA)(long* Mesh_pointer,
- realw* h_adj_sourcearrays,
- int* phase_is_inner,
- int* h_ispec_is_inner,
- int* h_ispec_is_acoustic,
- int* h_ispec_selected_rec,
- int* myrank,
- int* nrec,
- int* time_index,
- int* h_islice_selected_rec,
- int* nadj_rec_local,
- int* NTSTEP_BETWEEN_READ_ADJSRC) {}
-
-
-//
// src/cuda/compute_add_sources_elastic_cuda.cu
//
void FC_FUNC_(compute_add_sources_el_cuda,
COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* NSOURCESf,
- double* h_stf_pre_compute,
- int* myrankf) {}
+ int* NSOURCESf,
+ double* h_stf_pre_compute) {}
void FC_FUNC_(compute_add_sources_el_s3_cuda,
- COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
- double* h_stf_pre_compute,
+ COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer_f,
int* NSOURCESf,
- int* phase_is_inner,
- int* myrank) {}
+ double* h_stf_pre_compute) {}
void FC_FUNC_(add_source_master_rec_noise_cu,
ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
- int* myrank_f,
int* it_f,
int* irec_master_noise_f,
int* islice_selected_rec) {}
void FC_FUNC_(add_sources_el_sim_type_2_or_3,
ADD_SOURCES_EL_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
- realw* h_adj_sourcearrays,
- int* phase_is_inner,
- int* h_ispec_is_inner,
- int* h_ispec_is_elastic,
- int* h_ispec_selected_rec,
- int* myrank,
- int* nrec,
- int* time_index,
- int* h_islice_selected_rec,
- int* nadj_rec_local,
- int* NTSTEP_BETWEEN_READ_ADJSRC) {}
+ int* nrec,
+ realw* h_adj_sourcearrays,
+ int* h_islice_selected_rec,
+ int* h_ispec_selected_rec,
+ int* time_index) {}
//
@@ -278,12 +232,9 @@
//
void FC_FUNC_(compute_stacey_acoustic_cuda,
- COMPUTE_STACEY_ACOUSTIC_CUDA)(
- long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* SIMULATION_TYPEf,
- int* SAVE_FORWARDf,
- realw* h_b_absorb_potential) {}
+ COMPUTE_STACEY_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+ realw* absorb_potential,
+ int* itype) {}
//
@@ -292,10 +243,8 @@
void FC_FUNC_(compute_stacey_elastic_cuda,
COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* SIMULATION_TYPEf,
- int* SAVE_FORWARDf,
- realw* h_b_absorb_field) {}
+ realw* absorb_field,
+ int* itype) {}
//
@@ -381,17 +330,17 @@
void FC_FUNC_(prepare_constants_device,
PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
+ int* myrank_f,
int* h_NGLLX,
realw* h_hprime_xx,realw* h_hprime_yy,realw* h_hprime_zz,
realw* h_hprimewgll_xx,realw* h_hprimewgll_yy,realw* h_hprimewgll_zz,
realw* h_wgllwgll_xy,realw* h_wgllwgll_xz,realw* h_wgllwgll_yz,
int* NSOURCES,int* nsources_local,
realw* h_sourcearrays,
- int* h_islice_selected_source,
- int* h_ispec_selected_source,
+ int* h_islice_selected_source,int* h_ispec_selected_source,
int* h_number_receiver_global,
- int* h_ispec_selected_rec,
- int* nrec,int* nrec_local,
+ int* h_islice_selected_rec,int* h_ispec_selected_rec,
+ int* nrec,int* nrec_local, int* nadj_rec_local,
int* NSPEC_CRUST_MANTLE, int* NGLOB_CRUST_MANTLE,
int* NSPEC_OUTER_CORE, int* NGLOB_OUTER_CORE,
int* NSPEC_INNER_CORE, int* NGLOB_INNER_CORE,
@@ -480,6 +429,38 @@
realw* b_eps_trace_over_3_inner_core
) {}
+void FC_FUNC_(prepare_fields_absorb_device,
+ PREPARE_FIELDS_ABSORB_DEVICE)(long* Mesh_pointer_f,
+ int* nspec2D_xmin_crust_mantle,int* nspec2D_xmax_crust_mantle,
+ int* nspec2D_ymin_crust_mantle,int* nspec2D_ymax_crust_mantle,
+ int* NSPEC2DMAX_XMIN_XMAX_CM,int* NSPEC2DMAX_YMIN_YMAX_CM,
+ int* nimin_crust_mantle,int* nimax_crust_mantle,
+ int* njmin_crust_mantle,int* njmax_crust_mantle,
+ int* nkmin_xi_crust_mantle,int* nkmin_eta_crust_mantle,
+ int* ibelm_xmin_crust_mantle,int* ibelm_xmax_crust_mantle,
+ int* ibelm_ymin_crust_mantle,int* ibelm_ymax_crust_mantle,
+ realw* normal_xmin_crust_mantle,realw* normal_xmax_crust_mantle,
+ realw* normal_ymin_crust_mantle,realw* normal_ymax_crust_mantle,
+ realw* jacobian2D_xmin_crust_mantle, realw* jacobian2D_xmax_crust_mantle,
+ realw* jacobian2D_ymin_crust_mantle, realw* jacobian2D_ymax_crust_mantle,
+ realw* rho_vp_crust_mantle,
+ realw* rho_vs_crust_mantle,
+ int* nspec2D_xmin_outer_core,int* nspec2D_xmax_outer_core,
+ int* nspec2D_ymin_outer_core,int* nspec2D_ymax_outer_core,
+ int* nspec2D_zmin_outer_core,
+ int* NSPEC2DMAX_XMIN_XMAX_OC,int* NSPEC2DMAX_YMIN_YMAX_OC,
+ int* nimin_outer_core,int* nimax_outer_core,
+ int* njmin_outer_core,int* njmax_outer_core,
+ int* nkmin_xi_outer_core,int* nkmin_eta_outer_core,
+ int* ibelm_xmin_outer_core,int* ibelm_xmax_outer_core,
+ int* ibelm_ymin_outer_core,int* ibelm_ymax_outer_core,
+ int* ibelm_bottom_outer_core,
+ realw* jacobian2D_xmin_outer_core, realw* jacobian2D_xmax_outer_core,
+ realw* jacobian2D_ymin_outer_core, realw* jacobian2D_ymax_outer_core,
+ realw* jacobian2D_bottom_outer_core,
+ realw* vp_outer_core
+ ) {}
+
void FC_FUNC_(prepare_mpi_buffers_device,
PREPARE_MPI_BUFFERS_DEVICE)(long* Mesh_pointer_f,
int* num_interfaces_crust_mantle,
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in 2012-02-21 00:33:56 UTC (rev 19657)
@@ -187,7 +187,6 @@
$O/assemble_MPI_scalar_cuda.cuda.o \
$O/assemble_MPI_vector_cuda.cuda.o \
$O/check_fields_cuda.cuda.o \
- $O/compute_add_sources_acoustic_cuda.cuda.o \
$O/compute_add_sources_elastic_cuda.cuda.o \
$O/compute_coupling_cuda.cuda.o \
$O/compute_forces_crust_mantle_cuda.cuda.o \
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90 2012-02-21 00:33:56 UTC (rev 19657)
@@ -25,159 +25,123 @@
!
!=====================================================================
- subroutine compute_add_sources(myrank,NSOURCES, &
- accel_crust_mantle,sourcearrays, &
- DT,t0,tshift_cmt,hdur_gaussian,ibool_crust_mantle, &
- islice_selected_source,ispec_selected_source,it, &
- hdur,xi_source,eta_source,gamma_source,nu_source)
+ subroutine compute_add_sources()
+ use specfem_par
+ use specfem_par_crustmantle,only: accel_crust_mantle,ibool_crust_mantle
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
- integer myrank,NSOURCES
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
- accel_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ,NSOURCES) :: sourcearrays
-
- double precision, dimension(NSOURCES) :: tshift_cmt,hdur_gaussian
-
- double precision :: DT,t0
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
-
- integer, dimension(NSOURCES) :: islice_selected_source,ispec_selected_source
- integer :: it
-
- ! needed for point force sources
- double precision, dimension(NSOURCES) :: xi_source,eta_source,gamma_source
- double precision, dimension(NDIM,NDIM,NSOURCES) :: nu_source
- double precision, dimension(NSOURCES) :: hdur
-
! local parameters
double precision :: stf
real(kind=CUSTOM_REAL) :: stf_used
integer :: isource,i,j,k,iglob,ispec
- double precision, external :: comp_source_time_function
double precision :: f0
+ double precision, dimension(NSOURCES) :: stf_pre_compute
+
+ double precision, external :: comp_source_time_function
double precision, external :: comp_source_time_function_rickr
- do isource = 1,NSOURCES
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ do isource = 1,NSOURCES
+ ! add only if this proc carries the source
+ if(myrank == islice_selected_source(isource)) then
- ! add only if this proc carries the source
- if(myrank == islice_selected_source(isource)) then
+ if(USE_FORCE_POINT_SOURCE) then
- if(USE_FORCE_POINT_SOURCE) then
+ ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+ iglob = ibool_crust_mantle(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)), &
+ ispec_selected_source(isource))
- ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
- iglob = ibool_crust_mantle(nint(xi_source(isource)), &
- nint(eta_source(isource)), &
- nint(gamma_source(isource)), &
- ispec_selected_source(isource))
+ f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
- f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
+ !if (it == 1 .and. myrank == 0) then
+ ! write(IMAIN,*) 'using a source of dominant frequency ',f0
+ ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+ ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+ !endif
- !if (it == 1 .and. myrank == 0) then
- ! write(IMAIN,*) 'using a source of dominant frequency ',f0
- ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
- ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
- !endif
+ ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
+ stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
- ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
- stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
+ ! we use a force in a single direction along one of the components:
+ ! x/y/z or E/N/Z-direction would correspond to 1/2/3 = COMPONENT_FORCE_SOURCE
+ ! e.g. nu_source(3,:) here would be a source normal to the surface (z-direction).
+ accel_crust_mantle(:,iglob) = accel_crust_mantle(:,iglob) &
+ + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
- ! we use a force in a single direction along one of the components:
- ! x/y/z or E/N/Z-direction would correspond to 1/2/3 = COMPONENT_FORCE_SOURCE
- ! e.g. nu_source(3,:) here would be a source normal to the surface (z-direction).
- accel_crust_mantle(:,iglob) = accel_crust_mantle(:,iglob) &
- + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
+ else
- else
+ stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ stf_used = sngl(stf)
+ else
+ stf_used = stf
+ endif
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- stf_used = sngl(stf)
- else
- stf_used = stf
- endif
+ ! add source array
+ ispec = ispec_selected_source(isource)
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool_crust_mantle(i,j,k,ispec)
- ! add source array
- ispec = ispec_selected_source(isource)
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool_crust_mantle(i,j,k,ispec)
+ accel_crust_mantle(:,iglob) = accel_crust_mantle(:,iglob) &
+ + sourcearrays(:,i,j,k,isource)*stf_used
- accel_crust_mantle(:,iglob) = accel_crust_mantle(:,iglob) &
- + sourcearrays(:,i,j,k,isource)*stf_used
-
+ enddo
enddo
enddo
- enddo
- endif ! USE_FORCE_POINT_SOURCE
+ endif ! USE_FORCE_POINT_SOURCE
+ endif
+
+ enddo
+
+ else
+ ! on GPU
+ call load_GPU_elastic()
+
+ ! prepares buffer with source time function values, to be copied onto GPU
+ if(USE_FORCE_POINT_SOURCE) then
+ do isource = 1,NSOURCES
+ stf_pre_compute(isource) = &
+ FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
+ enddo
+ else
+ do isource = 1,NSOURCES
+ stf_pre_compute(isource) = &
+ comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ enddo
endif
+ ! adds sources: only implements SIMTYPE=1 and NOISE_TOM=0
+ call compute_add_sources_el_cuda(Mesh_pointer,NSOURCES,stf_pre_compute)
- enddo
+ call load_CPU_elastic()
+ endif
+
end subroutine compute_add_sources
!
!-------------------------------------------------------------------------------------------------
!
- subroutine compute_add_sources_adjoint(myrank,nrec, &
- nadj_rec_local,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC, &
- accel_crust_mantle,adj_sourcearrays, &
- nu,xi_receiver,eta_receiver,gamma_receiver, &
- xigll,yigll,zigll,ibool_crust_mantle, &
- islice_selected_rec,ispec_selected_rec, &
- NSTEP_SUB_ADJ,iadjsrc_len,iadjsrc,iadj_vec, &
- it,it_begin,station_name,network_name,DT)
+ subroutine compute_add_sources_adjoint()
+ use specfem_par
+ use specfem_par_crustmantle,only: accel_crust_mantle,ibool_crust_mantle
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
- integer myrank,nrec,nadj_rec_local,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC
-
- real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
- accel_crust_mantle
-
- real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ,nadj_rec_local,NTSTEP_BETWEEN_READ_ADJSRC) :: &
- adj_sourcearrays
-
- double precision, dimension(NDIM,NDIM,nrec) :: nu
- double precision, dimension(nrec) :: xi_receiver,eta_receiver,gamma_receiver
- double precision, dimension(NGLLX) :: xigll
- double precision, dimension(NGLLY) :: yigll
- double precision, dimension(NGLLZ) :: zigll
- double precision :: DT
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
- integer, dimension(nrec) :: islice_selected_rec,ispec_selected_rec
-
- integer NSTEP_SUB_ADJ
- integer, dimension(NSTEP_SUB_ADJ) :: iadjsrc_len
- integer, dimension(NSTEP_SUB_ADJ,2) :: iadjsrc ! to read input in chunks
- integer, dimension(NSTEP) :: iadj_vec
-
- integer :: it,it_begin,itime
-
- character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
- character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
-
! local parameters
real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: adj_sourcearray
- integer :: irec,irec_local,i,j,k,iglob,it_sub_adj
+ integer :: irec,irec_local,i,j,k,iglob,it_sub_adj,itime
character(len=150) :: adj_source_file
logical :: ibool_read_adj_arrays
@@ -228,88 +192,101 @@
endif
- irec_local = 0
- do irec = 1,nrec
+ ! adds adjoint sources
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ irec_local = 0
+ do irec = 1,nrec
- ! adds source (only if this proc carries the source)
- if(myrank == islice_selected_rec(irec)) then
- irec_local = irec_local + 1
+ ! adds source (only if this proc carries the source)
+ if(myrank == islice_selected_rec(irec)) then
+ irec_local = irec_local + 1
- ! adds source contributions
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool_crust_mantle(i,j,k,ispec_selected_rec(irec))
+ ! adds source contributions
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool_crust_mantle(i,j,k,ispec_selected_rec(irec))
- ! adds adjoint source acting at this time step (it):
- !
- ! note: we use index iadj_vec(it) which is the corresponding time step
- ! for the adjoint source acting at this time step (it)
- !
- ! see routine: setup_sources_receivers_adjindx() how this adjoint index array is set up
- !
- ! e.g. total length NSTEP = 3000, chunk length NTSTEP_BETWEEN_READ_ADJSRC= 1000
- ! then for it=1,..1000, first block has iadjsrc(1,1) with start = 2001 and end = 3000;
- ! corresponding iadj_vec(it) goes from
- ! iadj_vec(1) = 1000, iadj_vec(2) = 999 to iadj_vec(1000) = 1,
- ! that is, originally the idea was
- ! adj_sourcearrays(.. iadj_vec(1) ) corresponds to adjoint source trace at time index 3000
- ! adj_sourcearrays(.. iadj_vec(2) ) corresponds to adjoint source trace at time index 2999
- ! ..
- ! adj_sourcearrays(.. iadj_vec(1000) ) corresponds to adjoint source trace at time index 2001
- ! then a new block will be read, etc, and it is going down till to adjoint source trace at time index 1
- !
- ! now comes the tricky part:
- ! adjoint source traces are based on the seismograms from the forward run;
- ! such seismograms have a time step index 1 which corresponds to time -t0
- ! then time step index 2 which corresponds to -t0 + DT, and
- ! the last time step in the file at time step NSTEP corresponds to time -t0 + (NSTEP-1)*DT
- ! (see how we add the sources to the simulation in compute_add_sources() and
- ! how we write/save the seismograms and wavefields at the end of the time loop).
- !
- ! then you use that seismogram and take e.g. the velocity of it for a travetime adjoint source
- !
- ! now we read it in again, and remember the last time step in
- ! the file at NSTEP corresponds to -t0 + (NSTEP-1)*DT
- !
- ! the same time step is saved for the forward wavefields to reconstruct them;
- ! however, the Newmark time scheme acts at the very beginning of this time loop
- ! such that we have the backward/reconstructed wavefield updated by
- ! a single time step into the direction -DT and b_displ(it=1) would corresponds to -t0 + (NSTEP-1)*DT - DT
- ! after the Newmark (predictor) time step update.
- ! however, we will read the backward/reconstructed wavefield at the end of the first time loop,
- ! such that b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT (which is the one saved in the files).
- !
- ! for the kernel calculations, we want:
- ! adjoint wavefield at time t, starting from 0 to T
- ! and forward wavefield at time T-t, starting from T down to 0
- ! let's say time 0 corresponds to -t0 = -t0 + (it - 1)*DT at it=1
- ! and time T corresponds to -t0 + (NSTEP-1)*DT at it = NSTEP
- !
- ! as seen before, the time for the forward wavefield b_displ(it=1) would then
- ! correspond to time -t0 + (NSTEP-1)*DT - DT, which is T - DT.
- ! the corresponding time for the adjoint wavefield thus would be 0 + DT
- ! and the adjoint source index would be iadj_vec(it+1)
- ! however, iadj_vec(it+1) which would go from 999 down to 0. 0 is out of bounds.
- ! we thus would have to read in the adjoint source trace beginning from 2999 down to 0.
- ! index 0 is not defined in the adjoint source trace, and would be set to zero.
- !
- ! however, since this complicates things, we read the backward/reconstructed
- ! wavefield at the end of the first time loop, such that b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT.
- ! assuming that until that end the backward/reconstructed wavefield and adjoint fields
- ! have a zero contribution to adjoint kernels.
- accel_crust_mantle(:,iglob) = accel_crust_mantle(:,iglob) &
- + adj_sourcearrays(:,i,j,k,irec_local,iadj_vec(it))
+ ! adds adjoint source acting at this time step (it):
+ !
+ ! note: we use index iadj_vec(it) which is the corresponding time step
+ ! for the adjoint source acting at this time step (it)
+ !
+ ! see routine: setup_sources_receivers_adjindx() how this adjoint index array is set up
+ !
+ ! e.g. total length NSTEP = 3000, chunk length NTSTEP_BETWEEN_READ_ADJSRC= 1000
+ ! then for it=1,..1000, first block has iadjsrc(1,1) with start = 2001 and end = 3000;
+ ! corresponding iadj_vec(it) goes from
+ ! iadj_vec(1) = 1000, iadj_vec(2) = 999 to iadj_vec(1000) = 1,
+ ! that is, originally the idea was
+ ! adj_sourcearrays(.. iadj_vec(1) ) corresponds to adjoint source trace at time index 3000
+ ! adj_sourcearrays(.. iadj_vec(2) ) corresponds to adjoint source trace at time index 2999
+ ! ..
+ ! adj_sourcearrays(.. iadj_vec(1000) ) corresponds to adjoint source trace at time index 2001
+ ! then a new block will be read, etc, and it is going down till to adjoint source trace at time index 1
+ !
+ ! now comes the tricky part:
+ ! adjoint source traces are based on the seismograms from the forward run;
+ ! such seismograms have a time step index 1 which corresponds to time -t0
+ ! then time step index 2 which corresponds to -t0 + DT, and
+ ! the last time step in the file at time step NSTEP corresponds to time -t0 + (NSTEP-1)*DT
+ ! (see how we add the sources to the simulation in compute_add_sources() and
+ ! how we write/save the seismograms and wavefields at the end of the time loop).
+ !
+ ! then you use that seismogram and take e.g. the velocity of it for a travetime adjoint source
+ !
+ ! now we read it in again, and remember the last time step in
+ ! the file at NSTEP corresponds to -t0 + (NSTEP-1)*DT
+ !
+ ! the same time step is saved for the forward wavefields to reconstruct them;
+ ! however, the Newmark time scheme acts at the very beginning of this time loop
+ ! such that we have the backward/reconstructed wavefield updated by
+ ! a single time step into the direction -DT and b_displ(it=1) would corresponds to -t0 + (NSTEP-1)*DT - DT
+ ! after the Newmark (predictor) time step update.
+ ! however, we will read the backward/reconstructed wavefield at the end of the first time loop,
+ ! such that b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT (which is the one saved in the files).
+ !
+ ! for the kernel calculations, we want:
+ ! adjoint wavefield at time t, starting from 0 to T
+ ! and forward wavefield at time T-t, starting from T down to 0
+ ! let's say time 0 corresponds to -t0 = -t0 + (it - 1)*DT at it=1
+ ! and time T corresponds to -t0 + (NSTEP-1)*DT at it = NSTEP
+ !
+ ! as seen before, the time for the forward wavefield b_displ(it=1) would then
+ ! correspond to time -t0 + (NSTEP-1)*DT - DT, which is T - DT.
+ ! the corresponding time for the adjoint wavefield thus would be 0 + DT
+ ! and the adjoint source index would be iadj_vec(it+1)
+ ! however, iadj_vec(it+1) which would go from 999 down to 0. 0 is out of bounds.
+ ! we thus would have to read in the adjoint source trace beginning from 2999 down to 0.
+ ! index 0 is not defined in the adjoint source trace, and would be set to zero.
+ !
+ ! however, since this complicates things, we read the backward/reconstructed
+ ! wavefield at the end of the first time loop, such that b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT.
+ ! assuming that until that end the backward/reconstructed wavefield and adjoint fields
+ ! have a zero contribution to adjoint kernels.
+ accel_crust_mantle(:,iglob) = accel_crust_mantle(:,iglob) &
+ + adj_sourcearrays(:,i,j,k,irec_local,iadj_vec(it))
+ enddo
enddo
enddo
- enddo
- endif
+ endif
- enddo
+ enddo
+ else
+ ! on GPU
+ call load_GPU_elastic()
+
+ call add_sources_el_sim_type_2_or_3(Mesh_pointer,nrec,adj_sourcearrays, &
+ islice_selected_rec,ispec_selected_rec, &
+ iadj_vec(it))
+ call load_CPU_elastic()
+ endif
+
end subroutine compute_add_sources_adjoint
!
@@ -317,117 +294,116 @@
!
- subroutine compute_add_sources_backward(myrank,NSOURCES,NSTEP, &
- b_accel_crust_mantle,sourcearrays, &
- DT,t0,tshift_cmt,hdur_gaussian,ibool_crust_mantle, &
- islice_selected_source,ispec_selected_source,it, &
- hdur,xi_source,eta_source,gamma_source,nu_source)
+ subroutine compute_add_sources_backward()
+ use specfem_par
+ use specfem_par_crustmantle,only: b_accel_crust_mantle,ibool_crust_mantle
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
- integer myrank,NSOURCES,NSTEP
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
- b_accel_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ,NSOURCES) :: sourcearrays
-
- double precision, dimension(NSOURCES) :: tshift_cmt,hdur_gaussian
-
- double precision :: DT,t0
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
- integer, dimension(NSOURCES) :: islice_selected_source,ispec_selected_source
- integer :: it
- ! needed for point force sources
- double precision, dimension(NSOURCES) :: xi_source,eta_source,gamma_source
- double precision, dimension(NDIM,NDIM,NSOURCES) :: nu_source
- double precision, dimension(NSOURCES) :: hdur
-
! local parameters
double precision :: stf
real(kind=CUSTOM_REAL) :: stf_used
integer :: isource,i,j,k,iglob,ispec
- double precision, external :: comp_source_time_function
double precision :: f0
+ double precision, dimension(NSOURCES) :: stf_pre_compute
+
+ double precision, external :: comp_source_time_function
double precision, external :: comp_source_time_function_rickr
- do isource = 1,NSOURCES
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ do isource = 1,NSOURCES
- ! add the source (only if this proc carries the source)
- if(myrank == islice_selected_source(isource)) then
+ ! add the source (only if this proc carries the source)
+ if(myrank == islice_selected_source(isource)) then
-! note on backward/reconstructed wavefields:
-! time for b_displ( it ) corresponds to (NSTEP - (it-1) - 1 )*DT - t0 ...
-! as we start with saved wavefields b_displ( 1 ) = displ( NSTEP ) which correspond
-! to a time (NSTEP - 1)*DT - t0
-! (see sources for simulation_type 1 and seismograms)
-!
-! now, at the beginning of the time loop, the numerical Newmark time scheme updates
-! the wavefields, that is b_displ( it=1) would correspond to time (NSTEP -1 - 1)*DT - t0.
-! however, we read in the backward/reconstructed wavefields at the end of the Newmark time scheme
-! in the first (it=1) time loop.
-! this leads to the timing (NSTEP-(it-1)-1)*DT-t0-tshift_cmt for the source time function here
+ ! note on backward/reconstructed wavefields:
+ ! time for b_displ( it ) corresponds to (NSTEP - (it-1) - 1 )*DT - t0 ...
+ ! as we start with saved wavefields b_displ( 1 ) = displ( NSTEP ) which correspond
+ ! to a time (NSTEP - 1)*DT - t0
+ ! (see sources for simulation_type 1 and seismograms)
+ !
+ ! now, at the beginning of the time loop, the numerical Newmark time scheme updates
+ ! the wavefields, that is b_displ( it=1) would correspond to time (NSTEP -1 - 1)*DT - t0.
+ ! however, we read in the backward/reconstructed wavefields at the end of the Newmark time scheme
+ ! in the first (it=1) time loop.
+ ! this leads to the timing (NSTEP-(it-1)-1)*DT-t0-tshift_cmt for the source time function here
+ if(USE_FORCE_POINT_SOURCE) then
- if(USE_FORCE_POINT_SOURCE) then
+ ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+ iglob = ibool_crust_mantle(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)), &
+ ispec_selected_source(isource))
- ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
- iglob = ibool_crust_mantle(nint(xi_source(isource)), &
- nint(eta_source(isource)), &
- nint(gamma_source(isource)), &
- ispec_selected_source(isource))
+ f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
- f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
+ !if (it == 1 .and. myrank == 0) then
+ ! write(IMAIN,*) 'using a source of dominant frequency ',f0
+ ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+ ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+ !endif
- !if (it == 1 .and. myrank == 0) then
- ! write(IMAIN,*) 'using a source of dominant frequency ',f0
- ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
- ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
- !endif
+ ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
+ stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),f0)
- ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
- stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),f0)
+ ! e.g. we use nu_source(3,:) here if we want a source normal to the surface.
+ ! note: time step is now at NSTEP-it
+ b_accel_crust_mantle(:,iglob) = b_accel_crust_mantle(:,iglob) &
+ + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
- ! e.g. we use nu_source(3,:) here if we want a source normal to the surface.
- ! note: time step is now at NSTEP-it
- b_accel_crust_mantle(:,iglob) = b_accel_crust_mantle(:,iglob) &
- + sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
+ else
- else
+ ! see note above: time step corresponds now to NSTEP-it
+ stf = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- ! see note above: time step corresponds now to NSTEP-it
- stf = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ stf_used = sngl(stf)
+ else
+ stf_used = stf
+ endif
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- stf_used = sngl(stf)
- else
- stf_used = stf
- endif
+ ! add source array
+ ispec = ispec_selected_source(isource)
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool_crust_mantle(i,j,k,ispec)
- ! add source array
- ispec = ispec_selected_source(isource)
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool_crust_mantle(i,j,k,ispec)
+ b_accel_crust_mantle(:,iglob) = b_accel_crust_mantle(:,iglob) &
+ + sourcearrays(:,i,j,k,isource)*stf_used
- b_accel_crust_mantle(:,iglob) = b_accel_crust_mantle(:,iglob) &
- + sourcearrays(:,i,j,k,isource)*stf_used
-
+ enddo
enddo
enddo
- enddo
- endif ! USE_FORCE_POINT_SOURCE
+ endif ! USE_FORCE_POINT_SOURCE
- endif
+ endif
- enddo
+ enddo
- end subroutine compute_add_sources_backward
+ else
+ ! on GPU
+ call load_GPU_elastic()
+
+ ! prepares buffer with source time function values, to be copied onto GPU
+ if(USE_FORCE_POINT_SOURCE) then
+ do isource = 1,NSOURCES
+ stf_pre_compute(isource) = &
+ FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),f0)
+ enddo
+ else
+ do isource = 1,NSOURCES
+ stf_pre_compute(isource) = &
+ comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ enddo
+ endif
+ ! adds sources: only implements SIMTYPE=3 (and NOISE_TOM=0)
+ call compute_add_sources_el_s3_cuda(Mesh_pointer,NSOURCES,stf_pre_compute)
+ call load_CPU_elastic()
+ endif
+ end subroutine compute_add_sources_backward
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90 2012-02-21 00:33:56 UTC (rev 19657)
@@ -356,7 +356,7 @@
use specfem_par_outercore
implicit none
- ! daniel: TODO - temporary transfers to the GPU
+ ! daniel: TODO - temporary transfers to the CPU
call transfer_fields_oc_from_device(NGLOB_OUTER_CORE,displ_outer_core, &
veloc_outer_core,accel_outer_core,Mesh_pointer)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 2012-02-21 00:33:56 UTC (rev 19657)
@@ -207,35 +207,18 @@
if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_crust_mantle()
! add the sources
- if (SIMULATION_TYPE == 1) &
- call compute_add_sources(myrank,NSOURCES, &
- accel_crust_mantle,sourcearrays, &
- DT,t0,tshift_cmt,hdur_gaussian,ibool_crust_mantle, &
- islice_selected_source,ispec_selected_source,it, &
- hdur,xi_source,eta_source,gamma_source,nu_source)
+ if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) &
+ call compute_add_sources()
! add adjoint sources
if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
- if( nadj_rec_local > 0 ) &
- call compute_add_sources_adjoint(myrank,nrec, &
- nadj_rec_local,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC, &
- accel_crust_mantle,adj_sourcearrays, &
- nu,xi_receiver,eta_receiver,gamma_receiver, &
- xigll,yigll,zigll,ibool_crust_mantle, &
- islice_selected_rec,ispec_selected_rec, &
- NSTEP_SUB_ADJ,iadjsrc_len,iadjsrc,iadj_vec, &
- it,it_begin,station_name,network_name,DT)
+ if( nadj_rec_local > 0 ) call compute_add_sources_adjoint()
endif
! add sources for backward/reconstructed wavefield
- if (SIMULATION_TYPE == 3) &
- call compute_add_sources_backward(myrank,NSOURCES,NSTEP, &
- b_accel_crust_mantle,sourcearrays, &
- DT,t0,tshift_cmt,hdur_gaussian,ibool_crust_mantle, &
- islice_selected_source,ispec_selected_source,it, &
- hdur,xi_source,eta_source,gamma_source,nu_source)
+ if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) &
+ call compute_add_sources_backward()
-
! NOISE_TOMOGRAPHY
if ( NOISE_TOMOGRAPHY == 1 ) then
! the first step of noise tomography is to use |S(\omega)|^2 as a point force source at one of the receivers.
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90 2012-02-21 00:33:56 UTC (rev 19657)
@@ -32,6 +32,8 @@
use specfem_par,only: &
ichunk,SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it, &
wgllwgll_xz,wgllwgll_yz,wgllwgll_xy
+
+ use specfem_par,only: GPU_MODE,Mesh_pointer
use specfem_par_crustmantle, only: &
veloc_crust_mantle,accel_crust_mantle,b_accel_crust_mantle, &
@@ -50,7 +52,6 @@
nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
reclen_xmin_crust_mantle,reclen_xmax_crust_mantle, &
reclen_ymin_crust_mantle,reclen_ymax_crust_mantle, &
- nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm, &
absorb_xmin_crust_mantle,absorb_xmax_crust_mantle, &
absorb_ymin_crust_mantle,absorb_ymax_crust_mantle
@@ -72,7 +73,8 @@
! crust & mantle
-
+ if( GPU_MODE ) call load_GPU_elastic()
+
! xmin
! if two chunks exclude this face for one of them
if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC) then
@@ -84,54 +86,64 @@
call read_abs(0,absorb_xmin_crust_mantle,reclen_xmin_crust_mantle,NSTEP-it+1)
endif
- do ispec2D=1,nspec2D_xmin_crust_mantle
+ if( .NOT. GPU_MODE) then
+ ! on CPU
+ do ispec2D=1,nspec2D_xmin_crust_mantle
- ispec=ibelm_xmin_crust_mantle(ispec2D)
+ ispec=ibelm_xmin_crust_mantle(ispec2D)
- ! exclude elements that are not on absorbing edges
- if(nkmin_xi_crust_mantle(1,ispec2D) == 0 .or. njmin_crust_mantle(1,ispec2D) == 0) cycle
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_xi_crust_mantle(1,ispec2D) == 0 .or. njmin_crust_mantle(1,ispec2D) == 0) cycle
- i=1
- do k=nkmin_xi_crust_mantle(1,ispec2D),NGLLZ
- do j=njmin_crust_mantle(1,ispec2D),njmax_crust_mantle(1,ispec2D)
- iglob=ibool_crust_mantle(i,j,k,ispec)
+ i=1
+ do k=nkmin_xi_crust_mantle(1,ispec2D),NGLLZ
+ do j=njmin_crust_mantle(1,ispec2D),njmax_crust_mantle(1,ispec2D)
+ iglob=ibool_crust_mantle(i,j,k,ispec)
- vx=veloc_crust_mantle(1,iglob)
- vy=veloc_crust_mantle(2,iglob)
- vz=veloc_crust_mantle(3,iglob)
+ vx=veloc_crust_mantle(1,iglob)
+ vy=veloc_crust_mantle(2,iglob)
+ vz=veloc_crust_mantle(3,iglob)
- nx=normal_xmin_crust_mantle(1,j,k,ispec2D)
- ny=normal_xmin_crust_mantle(2,j,k,ispec2D)
- nz=normal_xmin_crust_mantle(3,j,k,ispec2D)
+ nx=normal_xmin_crust_mantle(1,j,k,ispec2D)
+ ny=normal_xmin_crust_mantle(2,j,k,ispec2D)
+ nz=normal_xmin_crust_mantle(3,j,k,ispec2D)
- vn=vx*nx+vy*ny+vz*nz
+ vn=vx*nx+vy*ny+vz*nz
- tx=rho_vp_crust_mantle(i,j,k,ispec)*vn*nx+rho_vs_crust_mantle(i,j,k,ispec)*(vx-vn*nx)
- ty=rho_vp_crust_mantle(i,j,k,ispec)*vn*ny+rho_vs_crust_mantle(i,j,k,ispec)*(vy-vn*ny)
- tz=rho_vp_crust_mantle(i,j,k,ispec)*vn*nz+rho_vs_crust_mantle(i,j,k,ispec)*(vz-vn*nz)
+ tx=rho_vp_crust_mantle(i,j,k,ispec)*vn*nx+rho_vs_crust_mantle(i,j,k,ispec)*(vx-vn*nx)
+ ty=rho_vp_crust_mantle(i,j,k,ispec)*vn*ny+rho_vs_crust_mantle(i,j,k,ispec)*(vy-vn*ny)
+ tz=rho_vp_crust_mantle(i,j,k,ispec)*vn*nz+rho_vs_crust_mantle(i,j,k,ispec)*(vz-vn*nz)
- weight=jacobian2D_xmin_crust_mantle(j,k,ispec2D)*wgllwgll_yz(j,k)
+ weight=jacobian2D_xmin_crust_mantle(j,k,ispec2D)*wgllwgll_yz(j,k)
- accel_crust_mantle(1,iglob)=accel_crust_mantle(1,iglob) - tx*weight
- accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
- accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
+ accel_crust_mantle(1,iglob)=accel_crust_mantle(1,iglob) - tx*weight
+ accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
+ accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
- if (SIMULATION_TYPE == 3) then
- b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_xmin_crust_mantle(:,j,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
- absorb_xmin_crust_mantle(1,j,k,ispec2D) = tx*weight
- absorb_xmin_crust_mantle(2,j,k,ispec2D) = ty*weight
- absorb_xmin_crust_mantle(3,j,k,ispec2D) = tz*weight
- endif
+ if (SIMULATION_TYPE == 3) then
+ b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_xmin_crust_mantle(:,j,k,ispec2D)
+ else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ absorb_xmin_crust_mantle(1,j,k,ispec2D) = tx*weight
+ absorb_xmin_crust_mantle(2,j,k,ispec2D) = ty*weight
+ absorb_xmin_crust_mantle(3,j,k,ispec2D) = tz*weight
+ endif
+ enddo
enddo
enddo
- enddo
-
+
+ else
+ ! on GPU
+ if( nspec2D_xmin_crust_mantle > 0 ) call compute_stacey_elastic_cuda(Mesh_pointer, &
+ absorb_xmin_crust_mantle, &
+ 0) ! <= xmin
+ endif
+
! writes absorbing boundary values
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmin_crust_mantle > 0 ) then
call write_abs(0,absorb_xmin_crust_mantle, reclen_xmin_crust_mantle,it)
endif
- endif
+
+ endif ! NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC
! xmax
! if two chunks exclude this face for one of them
@@ -142,55 +154,66 @@
call read_abs(1,absorb_xmax_crust_mantle,reclen_xmax_crust_mantle,NSTEP-it+1)
endif
- do ispec2D=1,nspec2D_xmax_crust_mantle
+ if(.NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_xmax_crust_mantle
- ispec=ibelm_xmax_crust_mantle(ispec2D)
+ ispec=ibelm_xmax_crust_mantle(ispec2D)
- ! exclude elements that are not on absorbing edges
- if(nkmin_xi_crust_mantle(2,ispec2D) == 0 .or. njmin_crust_mantle(2,ispec2D) == 0) cycle
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_xi_crust_mantle(2,ispec2D) == 0 .or. njmin_crust_mantle(2,ispec2D) == 0) cycle
- i=NGLLX
- do k=nkmin_xi_crust_mantle(2,ispec2D),NGLLZ
- do j=njmin_crust_mantle(2,ispec2D),njmax_crust_mantle(2,ispec2D)
- iglob=ibool_crust_mantle(i,j,k,ispec)
+ i=NGLLX
+ do k=nkmin_xi_crust_mantle(2,ispec2D),NGLLZ
+ do j=njmin_crust_mantle(2,ispec2D),njmax_crust_mantle(2,ispec2D)
+ iglob=ibool_crust_mantle(i,j,k,ispec)
- vx=veloc_crust_mantle(1,iglob)
- vy=veloc_crust_mantle(2,iglob)
- vz=veloc_crust_mantle(3,iglob)
+ vx=veloc_crust_mantle(1,iglob)
+ vy=veloc_crust_mantle(2,iglob)
+ vz=veloc_crust_mantle(3,iglob)
- nx=normal_xmax_crust_mantle(1,j,k,ispec2D)
- ny=normal_xmax_crust_mantle(2,j,k,ispec2D)
- nz=normal_xmax_crust_mantle(3,j,k,ispec2D)
+ nx=normal_xmax_crust_mantle(1,j,k,ispec2D)
+ ny=normal_xmax_crust_mantle(2,j,k,ispec2D)
+ nz=normal_xmax_crust_mantle(3,j,k,ispec2D)
- vn=vx*nx+vy*ny+vz*nz
+ vn=vx*nx+vy*ny+vz*nz
- tx=rho_vp_crust_mantle(i,j,k,ispec)*vn*nx+rho_vs_crust_mantle(i,j,k,ispec)*(vx-vn*nx)
- ty=rho_vp_crust_mantle(i,j,k,ispec)*vn*ny+rho_vs_crust_mantle(i,j,k,ispec)*(vy-vn*ny)
- tz=rho_vp_crust_mantle(i,j,k,ispec)*vn*nz+rho_vs_crust_mantle(i,j,k,ispec)*(vz-vn*nz)
+ tx=rho_vp_crust_mantle(i,j,k,ispec)*vn*nx+rho_vs_crust_mantle(i,j,k,ispec)*(vx-vn*nx)
+ ty=rho_vp_crust_mantle(i,j,k,ispec)*vn*ny+rho_vs_crust_mantle(i,j,k,ispec)*(vy-vn*ny)
+ tz=rho_vp_crust_mantle(i,j,k,ispec)*vn*nz+rho_vs_crust_mantle(i,j,k,ispec)*(vz-vn*nz)
- weight=jacobian2D_xmax_crust_mantle(j,k,ispec2D)*wgllwgll_yz(j,k)
+ weight=jacobian2D_xmax_crust_mantle(j,k,ispec2D)*wgllwgll_yz(j,k)
- accel_crust_mantle(1,iglob)=accel_crust_mantle(1,iglob) - tx*weight
- accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
- accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
+ accel_crust_mantle(1,iglob)=accel_crust_mantle(1,iglob) - tx*weight
+ accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
+ accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
- if (SIMULATION_TYPE == 3) then
- b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_xmax_crust_mantle(:,j,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
- absorb_xmax_crust_mantle(1,j,k,ispec2D) = tx*weight
- absorb_xmax_crust_mantle(2,j,k,ispec2D) = ty*weight
- absorb_xmax_crust_mantle(3,j,k,ispec2D) = tz*weight
- endif
+ if (SIMULATION_TYPE == 3) then
+ b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_xmax_crust_mantle(:,j,k,ispec2D)
+ else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ absorb_xmax_crust_mantle(1,j,k,ispec2D) = tx*weight
+ absorb_xmax_crust_mantle(2,j,k,ispec2D) = ty*weight
+ absorb_xmax_crust_mantle(3,j,k,ispec2D) = tz*weight
+ endif
+ enddo
enddo
enddo
- enddo
+ else
+ ! on GPU
+ if( nspec2D_xmax_crust_mantle > 0 ) call compute_stacey_elastic_cuda(Mesh_pointer, &
+ absorb_xmax_crust_mantle, &
+ 1) ! <= xmin
+ endif
+
+
! writes absorbing boundary values
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmax_crust_mantle > 0 ) then
call write_abs(1,absorb_xmax_crust_mantle,reclen_xmax_crust_mantle,it)
endif
- endif
+
+ endif ! NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AB
! ymin
@@ -199,50 +222,60 @@
call read_abs(2,absorb_ymin_crust_mantle, reclen_ymin_crust_mantle,NSTEP-it+1)
endif
- do ispec2D=1,nspec2D_ymin_crust_mantle
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_ymin_crust_mantle
- ispec=ibelm_ymin_crust_mantle(ispec2D)
+ ispec=ibelm_ymin_crust_mantle(ispec2D)
- ! exclude elements that are not on absorbing edges
- if(nkmin_eta_crust_mantle(1,ispec2D) == 0 .or. nimin_crust_mantle(1,ispec2D) == 0) cycle
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_eta_crust_mantle(1,ispec2D) == 0 .or. nimin_crust_mantle(1,ispec2D) == 0) cycle
- j=1
- do k=nkmin_eta_crust_mantle(1,ispec2D),NGLLZ
- do i=nimin_crust_mantle(1,ispec2D),nimax_crust_mantle(1,ispec2D)
- iglob=ibool_crust_mantle(i,j,k,ispec)
+ j=1
+ do k=nkmin_eta_crust_mantle(1,ispec2D),NGLLZ
+ do i=nimin_crust_mantle(1,ispec2D),nimax_crust_mantle(1,ispec2D)
+ iglob=ibool_crust_mantle(i,j,k,ispec)
- vx=veloc_crust_mantle(1,iglob)
- vy=veloc_crust_mantle(2,iglob)
- vz=veloc_crust_mantle(3,iglob)
+ vx=veloc_crust_mantle(1,iglob)
+ vy=veloc_crust_mantle(2,iglob)
+ vz=veloc_crust_mantle(3,iglob)
- nx=normal_ymin_crust_mantle(1,i,k,ispec2D)
- ny=normal_ymin_crust_mantle(2,i,k,ispec2D)
- nz=normal_ymin_crust_mantle(3,i,k,ispec2D)
+ nx=normal_ymin_crust_mantle(1,i,k,ispec2D)
+ ny=normal_ymin_crust_mantle(2,i,k,ispec2D)
+ nz=normal_ymin_crust_mantle(3,i,k,ispec2D)
- vn=vx*nx+vy*ny+vz*nz
+ vn=vx*nx+vy*ny+vz*nz
- tx=rho_vp_crust_mantle(i,j,k,ispec)*vn*nx+rho_vs_crust_mantle(i,j,k,ispec)*(vx-vn*nx)
- ty=rho_vp_crust_mantle(i,j,k,ispec)*vn*ny+rho_vs_crust_mantle(i,j,k,ispec)*(vy-vn*ny)
- tz=rho_vp_crust_mantle(i,j,k,ispec)*vn*nz+rho_vs_crust_mantle(i,j,k,ispec)*(vz-vn*nz)
+ tx=rho_vp_crust_mantle(i,j,k,ispec)*vn*nx+rho_vs_crust_mantle(i,j,k,ispec)*(vx-vn*nx)
+ ty=rho_vp_crust_mantle(i,j,k,ispec)*vn*ny+rho_vs_crust_mantle(i,j,k,ispec)*(vy-vn*ny)
+ tz=rho_vp_crust_mantle(i,j,k,ispec)*vn*nz+rho_vs_crust_mantle(i,j,k,ispec)*(vz-vn*nz)
- weight=jacobian2D_ymin_crust_mantle(i,k,ispec2D)*wgllwgll_xz(i,k)
+ weight=jacobian2D_ymin_crust_mantle(i,k,ispec2D)*wgllwgll_xz(i,k)
- accel_crust_mantle(1,iglob)=accel_crust_mantle(1,iglob) - tx*weight
- accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
- accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
+ accel_crust_mantle(1,iglob)=accel_crust_mantle(1,iglob) - tx*weight
+ accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
+ accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
- if (SIMULATION_TYPE == 3) then
- b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_ymin_crust_mantle(:,i,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
- absorb_ymin_crust_mantle(1,i,k,ispec2D) = tx*weight
- absorb_ymin_crust_mantle(2,i,k,ispec2D) = ty*weight
- absorb_ymin_crust_mantle(3,i,k,ispec2D) = tz*weight
- endif
+ if (SIMULATION_TYPE == 3) then
+ b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_ymin_crust_mantle(:,i,k,ispec2D)
+ else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ absorb_ymin_crust_mantle(1,i,k,ispec2D) = tx*weight
+ absorb_ymin_crust_mantle(2,i,k,ispec2D) = ty*weight
+ absorb_ymin_crust_mantle(3,i,k,ispec2D) = tz*weight
+ endif
+ enddo
enddo
enddo
- enddo
+ else
+ ! on GPU
+ if( nspec2D_ymin_crust_mantle > 0 ) call compute_stacey_elastic_cuda(Mesh_pointer, &
+ absorb_ymin_crust_mantle, &
+ 2) ! <= ymin
+ endif
+
+
! writes absorbing boundary values
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_ymin_crust_mantle > 0 ) then
call write_abs(2,absorb_ymin_crust_mantle,reclen_ymin_crust_mantle,it)
@@ -257,54 +290,65 @@
call read_abs(3,absorb_ymax_crust_mantle,reclen_ymax_crust_mantle,NSTEP-it+1)
endif
- do ispec2D=1,nspec2D_ymax_crust_mantle
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_ymax_crust_mantle
- ispec=ibelm_ymax_crust_mantle(ispec2D)
+ ispec=ibelm_ymax_crust_mantle(ispec2D)
- ! exclude elements that are not on absorbing edges
- if(nkmin_eta_crust_mantle(2,ispec2D) == 0 .or. nimin_crust_mantle(2,ispec2D) == 0) cycle
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_eta_crust_mantle(2,ispec2D) == 0 .or. nimin_crust_mantle(2,ispec2D) == 0) cycle
- j=NGLLY
- do k=nkmin_eta_crust_mantle(2,ispec2D),NGLLZ
- do i=nimin_crust_mantle(2,ispec2D),nimax_crust_mantle(2,ispec2D)
- iglob=ibool_crust_mantle(i,j,k,ispec)
+ j=NGLLY
+ do k=nkmin_eta_crust_mantle(2,ispec2D),NGLLZ
+ do i=nimin_crust_mantle(2,ispec2D),nimax_crust_mantle(2,ispec2D)
+ iglob=ibool_crust_mantle(i,j,k,ispec)
- vx=veloc_crust_mantle(1,iglob)
- vy=veloc_crust_mantle(2,iglob)
- vz=veloc_crust_mantle(3,iglob)
+ vx=veloc_crust_mantle(1,iglob)
+ vy=veloc_crust_mantle(2,iglob)
+ vz=veloc_crust_mantle(3,iglob)
- nx=normal_ymax_crust_mantle(1,i,k,ispec2D)
- ny=normal_ymax_crust_mantle(2,i,k,ispec2D)
- nz=normal_ymax_crust_mantle(3,i,k,ispec2D)
+ nx=normal_ymax_crust_mantle(1,i,k,ispec2D)
+ ny=normal_ymax_crust_mantle(2,i,k,ispec2D)
+ nz=normal_ymax_crust_mantle(3,i,k,ispec2D)
- vn=vx*nx+vy*ny+vz*nz
+ vn=vx*nx+vy*ny+vz*nz
- tx=rho_vp_crust_mantle(i,j,k,ispec)*vn*nx+rho_vs_crust_mantle(i,j,k,ispec)*(vx-vn*nx)
- ty=rho_vp_crust_mantle(i,j,k,ispec)*vn*ny+rho_vs_crust_mantle(i,j,k,ispec)*(vy-vn*ny)
- tz=rho_vp_crust_mantle(i,j,k,ispec)*vn*nz+rho_vs_crust_mantle(i,j,k,ispec)*(vz-vn*nz)
+ tx=rho_vp_crust_mantle(i,j,k,ispec)*vn*nx+rho_vs_crust_mantle(i,j,k,ispec)*(vx-vn*nx)
+ ty=rho_vp_crust_mantle(i,j,k,ispec)*vn*ny+rho_vs_crust_mantle(i,j,k,ispec)*(vy-vn*ny)
+ tz=rho_vp_crust_mantle(i,j,k,ispec)*vn*nz+rho_vs_crust_mantle(i,j,k,ispec)*(vz-vn*nz)
- weight=jacobian2D_ymax_crust_mantle(i,k,ispec2D)*wgllwgll_xz(i,k)
+ weight=jacobian2D_ymax_crust_mantle(i,k,ispec2D)*wgllwgll_xz(i,k)
- accel_crust_mantle(1,iglob)=accel_crust_mantle(1,iglob) - tx*weight
- accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
- accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
+ accel_crust_mantle(1,iglob)=accel_crust_mantle(1,iglob) - tx*weight
+ accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
+ accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
- if (SIMULATION_TYPE == 3) then
- b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_ymax_crust_mantle(:,i,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
- absorb_ymax_crust_mantle(1,i,k,ispec2D) = tx*weight
- absorb_ymax_crust_mantle(2,i,k,ispec2D) = ty*weight
- absorb_ymax_crust_mantle(3,i,k,ispec2D) = tz*weight
- endif
+ if (SIMULATION_TYPE == 3) then
+ b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_ymax_crust_mantle(:,i,k,ispec2D)
+ else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ absorb_ymax_crust_mantle(1,i,k,ispec2D) = tx*weight
+ absorb_ymax_crust_mantle(2,i,k,ispec2D) = ty*weight
+ absorb_ymax_crust_mantle(3,i,k,ispec2D) = tz*weight
+ endif
+ enddo
enddo
enddo
- enddo
+ else
+ ! on GPU
+ if( nspec2D_ymax_crust_mantle > 0 ) call compute_stacey_elastic_cuda(Mesh_pointer, &
+ absorb_ymax_crust_mantle, &
+ 3) ! <= ymax
+ endif
+
! writes absorbing boundary values
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_ymax_crust_mantle > 0 ) then
call write_abs(3,absorb_ymax_crust_mantle,reclen_ymax_crust_mantle,it)
endif
+ if( GPU_MODE ) call load_CPU_elastic()
+
end subroutine compute_stacey_crust_mantle
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90 2012-02-21 00:33:56 UTC (rev 19657)
@@ -32,9 +32,10 @@
use specfem_par,only: &
ichunk,SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it, &
- wgllwgll_xz,wgllwgll_yz,wgllwgll_xy, &
- NSPEC2D_BOTTOM
+ wgllwgll_xz,wgllwgll_yz,wgllwgll_xy
+ use specfem_par,only: GPU_MODE,Mesh_pointer
+
use specfem_par_outercore,only: &
veloc_outer_core,accel_outer_core,b_accel_outer_core, &
ibool_outer_core, &
@@ -42,7 +43,7 @@
jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core, &
jacobian2D_bottom_outer_core, &
nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
+ nspec2D_ymin_outer_core,nspec2D_ymax_outer_core,nspec2D_zmin_outer_core, &
vp_outer_core, &
nimin_outer_core,nimax_outer_core,nkmin_eta_outer_core, &
njmin_outer_core,njmax_outer_core,nkmin_xi_outer_core, &
@@ -69,6 +70,8 @@
! file access (by process rank modulo 8) showed that the following,
! simple approach is still fastest. (assuming that files are accessed on a local scratch disk)
+ if( GPU_MODE ) call load_GPU_acoustic()
+
! xmin
! if two chunks exclude this face for one of them
if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC) then
@@ -80,33 +83,42 @@
call read_abs(4,absorb_xmin_outer_core,reclen_xmin_outer_core,NSTEP-it+1)
endif
- do ispec2D=1,nspec2D_xmin_outer_core
+ if( .NOT. GPU_MODE) then
+ ! on CPU
+ do ispec2D=1,nspec2D_xmin_outer_core
- ispec=ibelm_xmin_outer_core(ispec2D)
+ ispec=ibelm_xmin_outer_core(ispec2D)
- ! exclude elements that are not on absorbing edges
- if(nkmin_xi_outer_core(1,ispec2D) == 0 .or. njmin_outer_core(1,ispec2D) == 0) cycle
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_xi_outer_core(1,ispec2D) == 0 .or. njmin_outer_core(1,ispec2D) == 0) cycle
- i=1
- do k=nkmin_xi_outer_core(1,ispec2D),NGLLZ
- do j=njmin_outer_core(1,ispec2D),njmax_outer_core(1,ispec2D)
- iglob=ibool_outer_core(i,j,k,ispec)
+ i=1
+ do k=nkmin_xi_outer_core(1,ispec2D),NGLLZ
+ do j=njmin_outer_core(1,ispec2D),njmax_outer_core(1,ispec2D)
+ iglob=ibool_outer_core(i,j,k,ispec)
- sn = veloc_outer_core(iglob)/vp_outer_core(i,j,k,ispec)
+ sn = veloc_outer_core(iglob)/vp_outer_core(i,j,k,ispec)
- weight = jacobian2D_xmin_outer_core(j,k,ispec2D)*wgllwgll_yz(j,k)
+ weight = jacobian2D_xmin_outer_core(j,k,ispec2D)*wgllwgll_yz(j,k)
- accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
+ accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
- if (SIMULATION_TYPE == 3) then
- b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_xmin_outer_core(j,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
- absorb_xmin_outer_core(j,k,ispec2D) = weight*sn
- endif
+ if (SIMULATION_TYPE == 3) then
+ b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_xmin_outer_core(j,k,ispec2D)
+ else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ absorb_xmin_outer_core(j,k,ispec2D) = weight*sn
+ endif
+ enddo
enddo
enddo
- enddo
+ else
+ ! on GPU
+ if( nspec2D_xmin_outer_core > 0 ) call compute_stacey_acoustic_cuda(Mesh_pointer, &
+ absorb_xmin_outer_core, &
+ 4) ! <= xmin
+ endif
+
! writes absorbing boundary values
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmin_outer_core > 0 ) then
call write_abs(4,absorb_xmin_outer_core,reclen_xmin_outer_core,it)
@@ -122,34 +134,45 @@
call read_abs(5,absorb_xmax_outer_core,reclen_xmax_outer_core,NSTEP-it+1)
endif
- do ispec2D=1,nspec2D_xmax_outer_core
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_xmax_outer_core
- ispec=ibelm_xmax_outer_core(ispec2D)
+ ispec=ibelm_xmax_outer_core(ispec2D)
- ! exclude elements that are not on absorbing edges
- if(nkmin_xi_outer_core(2,ispec2D) == 0 .or. njmin_outer_core(2,ispec2D) == 0) cycle
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_xi_outer_core(2,ispec2D) == 0 .or. njmin_outer_core(2,ispec2D) == 0) cycle
- i=NGLLX
- do k=nkmin_xi_outer_core(2,ispec2D),NGLLZ
- do j=njmin_outer_core(2,ispec2D),njmax_outer_core(2,ispec2D)
- iglob=ibool_outer_core(i,j,k,ispec)
+ i=NGLLX
+ do k=nkmin_xi_outer_core(2,ispec2D),NGLLZ
+ do j=njmin_outer_core(2,ispec2D),njmax_outer_core(2,ispec2D)
+ iglob=ibool_outer_core(i,j,k,ispec)
- sn = veloc_outer_core(iglob)/vp_outer_core(i,j,k,ispec)
+ sn = veloc_outer_core(iglob)/vp_outer_core(i,j,k,ispec)
- weight = jacobian2D_xmax_outer_core(j,k,ispec2D)*wgllwgll_yz(j,k)
+ weight = jacobian2D_xmax_outer_core(j,k,ispec2D)*wgllwgll_yz(j,k)
- accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
+ accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
- if (SIMULATION_TYPE == 3) then
- b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_xmax_outer_core(j,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
- absorb_xmax_outer_core(j,k,ispec2D) = weight*sn
- endif
+ if (SIMULATION_TYPE == 3) then
+ b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_xmax_outer_core(j,k,ispec2D)
+ else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ absorb_xmax_outer_core(j,k,ispec2D) = weight*sn
+ endif
+ enddo
enddo
enddo
- enddo
+ else
+ ! on GPU
+ if( nspec2D_xmax_outer_core > 0 ) call compute_stacey_acoustic_cuda(Mesh_pointer, &
+ absorb_xmax_outer_core, &
+ 5) ! <= xmax
+ endif
+
+
+
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmax_outer_core > 0 ) then
call write_abs(5,absorb_xmax_outer_core,reclen_xmax_outer_core,it)
endif
@@ -161,34 +184,43 @@
call read_abs(6,absorb_ymin_outer_core,reclen_ymin_outer_core,NSTEP-it+1)
endif
- do ispec2D=1,nspec2D_ymin_outer_core
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_ymin_outer_core
- ispec=ibelm_ymin_outer_core(ispec2D)
+ ispec=ibelm_ymin_outer_core(ispec2D)
- ! exclude elements that are not on absorbing edges
- if(nkmin_eta_outer_core(1,ispec2D) == 0 .or. nimin_outer_core(1,ispec2D) == 0) cycle
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_eta_outer_core(1,ispec2D) == 0 .or. nimin_outer_core(1,ispec2D) == 0) cycle
- j=1
- do k=nkmin_eta_outer_core(1,ispec2D),NGLLZ
- do i=nimin_outer_core(1,ispec2D),nimax_outer_core(1,ispec2D)
- iglob=ibool_outer_core(i,j,k,ispec)
+ j=1
+ do k=nkmin_eta_outer_core(1,ispec2D),NGLLZ
+ do i=nimin_outer_core(1,ispec2D),nimax_outer_core(1,ispec2D)
+ iglob=ibool_outer_core(i,j,k,ispec)
- sn = veloc_outer_core(iglob)/vp_outer_core(i,j,k,ispec)
+ sn = veloc_outer_core(iglob)/vp_outer_core(i,j,k,ispec)
- weight=jacobian2D_ymin_outer_core(i,k,ispec2D)*wgllwgll_xz(i,k)
+ weight=jacobian2D_ymin_outer_core(i,k,ispec2D)*wgllwgll_xz(i,k)
- accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
+ accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
- if (SIMULATION_TYPE == 3) then
- b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_ymin_outer_core(i,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
- absorb_ymin_outer_core(i,k,ispec2D) = weight*sn
- endif
+ if (SIMULATION_TYPE == 3) then
+ b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_ymin_outer_core(i,k,ispec2D)
+ else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ absorb_ymin_outer_core(i,k,ispec2D) = weight*sn
+ endif
+ enddo
enddo
enddo
- enddo
+ else
+ ! on GPU
+ if( nspec2D_ymin_outer_core > 0 ) call compute_stacey_acoustic_cuda(Mesh_pointer, &
+ absorb_ymin_outer_core, &
+ 6) ! <= ymin
+ endif
+
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_ymin_outer_core > 0 ) then
call write_abs(6,absorb_ymin_outer_core,reclen_ymin_outer_core,it)
endif
@@ -198,70 +230,92 @@
call read_abs(7,absorb_ymax_outer_core,reclen_ymax_outer_core,NSTEP-it+1)
endif
- do ispec2D=1,nspec2D_ymax_outer_core
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_ymax_outer_core
- ispec=ibelm_ymax_outer_core(ispec2D)
+ ispec=ibelm_ymax_outer_core(ispec2D)
- ! exclude elements that are not on absorbing edges
- if(nkmin_eta_outer_core(2,ispec2D) == 0 .or. nimin_outer_core(2,ispec2D) == 0) cycle
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_eta_outer_core(2,ispec2D) == 0 .or. nimin_outer_core(2,ispec2D) == 0) cycle
- j=NGLLY
- do k=nkmin_eta_outer_core(2,ispec2D),NGLLZ
- do i=nimin_outer_core(2,ispec2D),nimax_outer_core(2,ispec2D)
- iglob=ibool_outer_core(i,j,k,ispec)
+ j=NGLLY
+ do k=nkmin_eta_outer_core(2,ispec2D),NGLLZ
+ do i=nimin_outer_core(2,ispec2D),nimax_outer_core(2,ispec2D)
+ iglob=ibool_outer_core(i,j,k,ispec)
- sn = veloc_outer_core(iglob)/vp_outer_core(i,j,k,ispec)
+ sn = veloc_outer_core(iglob)/vp_outer_core(i,j,k,ispec)
- weight=jacobian2D_ymax_outer_core(i,k,ispec2D)*wgllwgll_xz(i,k)
+ weight=jacobian2D_ymax_outer_core(i,k,ispec2D)*wgllwgll_xz(i,k)
- accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
+ accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
- if (SIMULATION_TYPE == 3) then
- b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_ymax_outer_core(i,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
- absorb_ymax_outer_core(i,k,ispec2D) = weight*sn
- endif
+ if (SIMULATION_TYPE == 3) then
+ b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_ymax_outer_core(i,k,ispec2D)
+ else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ absorb_ymax_outer_core(i,k,ispec2D) = weight*sn
+ endif
+ enddo
enddo
enddo
- enddo
+ else
+ ! on GPU
+ if( nspec2D_ymax_outer_core > 0 ) call compute_stacey_acoustic_cuda(Mesh_pointer, &
+ absorb_ymax_outer_core, &
+ 7) ! <= ymax
+ endif
+
+
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_ymax_outer_core > 0 ) then
call write_abs(7,absorb_ymax_outer_core,reclen_ymax_outer_core,it)
endif
! for surface elements exactly on the ICB
- if (SIMULATION_TYPE == 3 .and. NSPEC2D_BOTTOM(IREGION_OUTER_CORE)> 0) then
+ if (SIMULATION_TYPE == 3 .and. nspec2D_zmin_outer_core > 0) then
call read_abs(8,absorb_zmin_outer_core,reclen_zmin,NSTEP-it+1)
endif
- do ispec2D = 1,NSPEC2D_BOTTOM(IREGION_OUTER_CORE)
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D = 1,nspec2D_zmin_outer_core
- ispec = ibelm_bottom_outer_core(ispec2D)
+ ispec = ibelm_bottom_outer_core(ispec2D)
- k = 1
- do j = 1,NGLLY
- do i = 1,NGLLX
- iglob = ibool_outer_core(i,j,k,ispec)
+ k = 1
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ iglob = ibool_outer_core(i,j,k,ispec)
- sn = veloc_outer_core(iglob)/vp_outer_core(i,j,k,ispec)
+ sn = veloc_outer_core(iglob)/vp_outer_core(i,j,k,ispec)
- weight = jacobian2D_bottom_outer_core(i,j,ispec2D)*wgllwgll_xy(i,j)
+ weight = jacobian2D_bottom_outer_core(i,j,ispec2D)*wgllwgll_xy(i,j)
- accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
+ accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
- if (SIMULATION_TYPE == 3) then
- b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_zmin_outer_core(i,j,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
- absorb_zmin_outer_core(i,j,ispec2D) = weight*sn
- endif
+ if (SIMULATION_TYPE == 3) then
+ b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_zmin_outer_core(i,j,ispec2D)
+ else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ absorb_zmin_outer_core(i,j,ispec2D) = weight*sn
+ endif
+ enddo
enddo
enddo
- enddo
- if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. NSPEC2D_BOTTOM(IREGION_OUTER_CORE) > 0 ) then
+ else
+ ! on GPU
+ if( nspec2D_zmin_outer_core > 0 ) call compute_stacey_acoustic_cuda(Mesh_pointer, &
+ absorb_zmin_outer_core, &
+ 8) ! <= zmin
+ endif
+
+
+ if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_zmin_outer_core > 0 ) then
call write_abs(8,absorb_zmin_outer_core,reclen_zmin,it)
endif
+ if( GPU_MODE) call load_CPU_acoustic()
+
end subroutine compute_stacey_outer_core
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2012-02-21 00:33:56 UTC (rev 19657)
@@ -81,7 +81,7 @@
call close_file_abs(7)
endif
- if (NSPEC2D_BOTTOM(IREGION_OUTER_CORE) > 0 .and. (SIMULATION_TYPE == 3 &
+ if (nspec2D_zmin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
.or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
call close_file_abs(8)
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-02-21 00:33:56 UTC (rev 19657)
@@ -1059,14 +1059,14 @@
call MPI_REDUCE(ncuda_devices,ncuda_devices_max,1,MPI_INTEGER,MPI_MAX,0,MPI_COMM_WORLD,ier)
! prepares general fields on GPU
- call prepare_constants_device(Mesh_pointer,NGLLX, &
+ call prepare_constants_device(Mesh_pointer,myrank,NGLLX, &
hprime_xx, hprime_yy, hprime_zz, &
hprimewgll_xx, hprimewgll_yy, hprimewgll_zz, &
wgllwgll_xy, wgllwgll_xz, wgllwgll_yz, &
NSOURCES, nsources_local, &
- sourcearrays, islice_selected_source, ispec_selected_source, &
- number_receiver_global, ispec_selected_rec, &
- nrec, nrec_local, &
+ sourcearrays,islice_selected_source,ispec_selected_source, &
+ number_receiver_global,islice_selected_rec,ispec_selected_rec, &
+ nrec, nrec_local, nadj_rec_local, &
NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
@@ -1174,7 +1174,42 @@
endif
call sync_all()
+ ! prepares absorbing arrays
+ if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
+ if(myrank == 0 ) write(IMAIN,*) " loading absorbing boundaries"
+ call prepare_fields_absorb_device(Mesh_pointer, &
+ nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
+ nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
+ NSPEC2DMAX_XMIN_XMAX_CM,NSPEC2DMAX_YMIN_YMAX_CM, &
+ nimin_crust_mantle,nimax_crust_mantle, &
+ njmin_crust_mantle,njmax_crust_mantle, &
+ nkmin_xi_crust_mantle,nkmin_eta_crust_mantle, &
+ ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle, &
+ ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle, &
+ normal_xmin_crust_mantle,normal_xmax_crust_mantle, &
+ normal_ymin_crust_mantle,normal_ymax_crust_mantle, &
+ jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle, &
+ jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle, &
+ rho_vp_crust_mantle,rho_vs_crust_mantle, &
+ nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
+ nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
+ nspec2D_zmin_outer_core, &
+ NSPEC2DMAX_XMIN_XMAX_OC,NSPEC2DMAX_YMIN_YMAX_OC, &
+ nimin_outer_core,nimax_outer_core, &
+ njmin_outer_core,njmax_outer_core, &
+ nkmin_xi_outer_core,nkmin_eta_outer_core, &
+ ibelm_xmin_outer_core,ibelm_xmax_outer_core, &
+ ibelm_ymin_outer_core,ibelm_ymax_outer_core, &
+ ibelm_bottom_outer_core, &
+ jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core, &
+ jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core, &
+ jacobian2D_bottom_outer_core, &
+ vp_outer_core)
+
+ endif
+ call sync_all()
+
! prepares MPI interfaces
if(myrank == 0 ) write(IMAIN,*) " loading mpi interfaces"
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2012-02-21 00:33:56 UTC (rev 19657)
@@ -374,6 +374,8 @@
read(27) njunk1
read(27) njunk2
+ nspec2D_zmin_outer_core = NSPEC2D_BOTTOM(IREGION_OUTER_CORE)
+
read(27) ibelm_xmin_outer_core
read(27) ibelm_xmax_outer_core
read(27) ibelm_ymin_outer_core
@@ -1867,6 +1869,8 @@
! local parameters
integer(kind=8) :: filesize
integer :: ier
+ integer :: nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm
+ integer :: nabs_xmin_oc,nabs_xmax_oc,nabs_ymin_oc,nabs_ymax_oc,nabs_zmin_oc
! sets up absorbing boundary buffer arrays
@@ -1952,9 +1956,9 @@
allocate(absorb_ymax_outer_core(NGLLX,NGLLZ,nabs_ymax_oc),stat=ier)
if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb ymax')
- if (NSPEC2D_BOTTOM(IREGION_OUTER_CORE) > 0 .and. &
- (SIMULATION_TYPE == 3 .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- nabs_zmin_oc = NSPEC2D_BOTTOM(IREGION_OUTER_CORE)
+ if (nspec2D_zmin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ nabs_zmin_oc = nspec2D_zmin_outer_core
else
nabs_zmin_oc = 1
endif
@@ -2215,11 +2219,11 @@
endif
endif
- if (NSPEC2D_BOTTOM(IREGION_OUTER_CORE) > 0 .and. &
- (SIMULATION_TYPE == 3 .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD)))then
+ if (nspec2D_zmin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD)))then
! size of single record
- reclen_zmin = CUSTOM_REAL * (NGLLX * NGLLY * NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
+ reclen_zmin = CUSTOM_REAL * (NGLLX * NGLLY * nspec2D_zmin_outer_core)
! total file size
filesize = reclen_zmin
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90 2012-02-21 00:33:56 UTC (rev 19657)
@@ -305,6 +305,9 @@
enddo
endif
+ ! counter for adjoint receiver stations in local slice, used to allocate adjoint source arrays
+ nadj_rec_local = 0
+
! counts receivers for adjoint simulations
if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
! by Ebru
@@ -313,8 +316,6 @@
comp(2) = bic(1:2)//'E'
comp(3) = bic(1:2)//'Z'
- ! counter for adjoint receiver stations in local slice, used to allocate adjoint source arrays
- nadj_rec_local = 0
! temporary counter to check if any files are found at all
nadj_files_found = 0
do irec = 1,nrec
@@ -444,6 +445,7 @@
! allocates source arrays
if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
+ ! source interpolated on all GLL points in source element
allocate(sourcearrays(NDIM,NGLLX,NGLLY,NGLLZ,NSOURCES),stat=ier)
if( ier /= 0 ) call exit_MPI(myrank,'error allocating sourcearrays')
@@ -460,6 +462,7 @@
! adjoint source arrays
if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
+ ! adjoint source buffer length
NSTEP_SUB_ADJ = ceiling( dble(NSTEP)/dble(NTSTEP_BETWEEN_READ_ADJSRC) )
allocate(iadj_vec(NSTEP),stat=ier)
if( ier /= 0 ) call exit_MPI(myrank,'error allocating iadj_vec')
@@ -492,40 +495,44 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine setup_sources_receivers_srcarr(NSOURCES,myrank, &
- ispec_selected_source,islice_selected_source, &
- xi_source,eta_source,gamma_source, &
- Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
- xigll,yigll,zigll,sourcearrays)
+ subroutine setup_sources_receivers_srcarr()
+! NSOURCES,myrank, &
+! ispec_selected_source,islice_selected_source, &
+! xi_source,eta_source,gamma_source, &
+! Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
+! xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+! etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+! gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+! xigll,yigll,zigll,sourcearrays)
+ use specfem_par
+ use specfem_par_crustmantle
+
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+! include "constants.h"
+! include "OUTPUT_FILES/values_from_mesher.h"
+!
+! integer NSOURCES,myrank
+!
+! integer, dimension(NSOURCES) :: islice_selected_source,ispec_selected_source
+! double precision, dimension(NSOURCES) :: xi_source,eta_source,gamma_source
+! double precision, dimension(NSOURCES) :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
+!
+! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
+! xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle,&
+! etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+! gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle
+!
+! double precision, dimension(NGLLX) :: xigll
+! double precision, dimension(NGLLY) :: yigll
+! double precision, dimension(NGLLZ) :: zigll
+!
+! real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ,NSOURCES) :: sourcearrays
- integer NSOURCES,myrank
- integer, dimension(NSOURCES) :: islice_selected_source,ispec_selected_source
- double precision, dimension(NSOURCES) :: xi_source,eta_source,gamma_source
- double precision, dimension(NSOURCES) :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle,&
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle
-
- double precision, dimension(NGLLX) :: xigll
- double precision, dimension(NGLLY) :: yigll
- double precision, dimension(NGLLZ) :: zigll
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ,NSOURCES) :: sourcearrays
-
-
! local parameters
- integer :: isource
+ integer :: isource,iglob,ispec,i,j,k
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sourcearray
do isource = 1,NSOURCES
@@ -544,8 +551,30 @@
gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
xigll,yigll,zigll,NSPEC_CRUST_MANTLE)
+ ! point forces, initializes sourcearray, used for simplified CUDA routines
+ if(USE_FORCE_POINT_SOURCE) then
+ ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+ iglob = ibool_crust_mantle(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)), &
+ ispec_selected_source(isource))
+ ! sets sourcearrays
+ sourcearray(:,:,:,:) = 0.0
+ ispec = ispec_selected_source(isource)
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ if( ibool_crust_mantle(i,j,k,ispec) == iglob ) then
+ ! elastic source components
+ sourcearray(:,i,j,k) = nu_source(COMPONENT_FORCE_SOURCE,:,isource)
+ endif
+ enddo
+ enddo
+ enddo
+ endif
+
+ ! stores source excitations
sourcearrays(:,:,:,:,isource) = sourcearray(:,:,:,:)
-
endif
enddo
@@ -621,7 +650,7 @@
! e.g.: first block 1 has iadjsrc_len = 1000 with start at 2001 and end at 3000
! so iadj_vec(1) = 1000 - 0, iadj_vec(2) = 1000 - 1, ..., to iadj_vec(1000) = 1000 - 999 = 1
! then for block 2, iadjsrc_len = 1000 with start at 1001 and end at 2000
- ! so iadj_vec(1001) = 1000 - 0, iad_vec(1002) = 1000 - 1, .. and so on again down to 1
+ ! so iadj_vec(1001) = 1000 - 0, iadj_vec(1002) = 1000 - 1, .. and so on again down to 1
! then block 3 and your guess is right now... iadj_vec(2001) to iadj_vec(3000) is 1000 down to 1. :)
iadj_vec(it) = iadjsrc_len(it_sub_adj) - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC)
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2012-02-20 04:22:33 UTC (rev 19656)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2012-02-21 00:33:56 UTC (rev 19657)
@@ -133,7 +133,6 @@
double precision, dimension(:), allocatable :: xi_source,eta_source,gamma_source
double precision, dimension(:), allocatable :: tshift_cmt,hdur,hdur_gaussian
double precision, dimension(:), allocatable :: theta_source,phi_source
- double precision, external :: comp_source_time_function
double precision :: t0
!-----------------------------------------------------------------
@@ -504,8 +503,6 @@
real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: absorb_xmin_crust_mantle, &
absorb_xmax_crust_mantle, absorb_ymin_crust_mantle, absorb_ymax_crust_mantle
- integer :: nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm
-
integer :: reclen_xmin_crust_mantle, reclen_xmax_crust_mantle, &
reclen_ymin_crust_mantle,reclen_ymax_crust_mantle
@@ -722,14 +719,14 @@
! Stacey
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_STACEY) :: vp_outer_core
integer :: nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core
+ nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
+ nspec2D_zmin_outer_core
integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_OC) :: nimin_outer_core,nimax_outer_core,nkmin_eta_outer_core
integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_OC) :: njmin_outer_core,njmax_outer_core,nkmin_xi_outer_core
real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: absorb_xmin_outer_core, &
absorb_xmax_outer_core, absorb_ymin_outer_core, absorb_ymax_outer_core, &
absorb_zmin_outer_core
- integer :: nabs_xmin_oc,nabs_xmax_oc,nabs_ymin_oc,nabs_ymax_oc,nabs_zmin_oc
integer :: reclen_xmin_outer_core, reclen_xmax_outer_core, &
reclen_ymin_outer_core, reclen_ymax_outer_core
More information about the CIG-COMMITS
mailing list