[cig-commits] r20570 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER: EXAMPLES/regional_Greece_small/DATA setup src/cuda src/meshfem3D src/shared src/specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Wed Aug 15 12:55:45 PDT 2012
Author: danielpeter
Date: 2012-08-15 12:55:44 -0700 (Wed, 15 Aug 2012)
New Revision: 20570
Modified:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_MPI_interfaces.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regions_mesh.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/get_perm_color.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_atten3D_QRFSI12.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_attenuation.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_s362ani.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/save_arrays_solver.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_MPI_interfaces.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_color_perm.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_model.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/get_model_parameters.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_compute_parameters.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/get_attenuation.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
Log:
updates ocean coupling; updates mesh coloring for attenuation arrays; adds flag in constants.h.in to use 3D attenuation by default
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file 2012-08-15 19:55:44 UTC (rev 20570)
@@ -37,15 +37,15 @@
# parameters describing the Earth model
OCEANS = .true.
-ELLIPTICITY = .false.
+ELLIPTICITY = .true.
TOPOGRAPHY = .true.
-GRAVITY = .false.
-ROTATION = .false.
-ATTENUATION = .false.
+GRAVITY = .true.
+ROTATION = .true.
+ATTENUATION = .true.
ATTENUATION_NEW = .false.
# absorbing boundary conditions for a regional simulation
-ABSORBING_CONDITIONS = .false.
+ABSORBING_CONDITIONS = .true.
# record length in minutes
RECORD_LENGTH_IN_MINUTES = 15.0d0
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in 2012-08-15 19:55:44 UTC (rev 20570)
@@ -231,17 +231,18 @@
! the mesher) and use them for the computation of boundary kernel (in the solver)
logical, parameter :: SAVE_BOUNDARY_MESH = .false.
+! mesh coloring:
! add mesh coloring for the GPU + MPI implementation
logical, parameter :: USE_MESH_COLORING_GPU = .false.
integer, parameter :: MAX_NUMBER_OF_COLORS = 1000
! enhanced coloring:
-!
! using Droux algorithm
! try several times with one more color before giving up
logical, parameter :: USE_DROUX_OPTIMIZATION = .false.
integer, parameter :: MAX_NB_TRIES_OF_DROUX_1993 = 15
-!
+
+! using balancing algorithm
! postprocess the colors to balance them if Droux (1993) algorithm is not used
logical, parameter :: BALANCE_COLORS_SIMPLE_ALGO = .false.
@@ -270,6 +271,12 @@
! (default is to use transverse isotropy only between MOHO and 220 )
logical, parameter :: USE_FULL_TISO_MANTLE = .false.
+! forces attenuation arrays to be fully 3D and defined on all GLL points
+! (useful for more accurate attenuation profiles and adjoint inversions)
+! (if set to .false., 3D attenuation is only used for models with 3D attenuation distributions)
+ logical, parameter :: USE_3D_ATTENUATION = .true.
+
+
!!-----------------------------------------------------------
!!
!! time stamp information
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu 2012-08-15 19:55:44 UTC (rev 20570)
@@ -266,444 +266,11 @@
/* ----------------------------------------------------------------------------------------------- */
-//daniel: helper function
-/*
- __global__ void check_phase_ispec_kernel(int num_phase_ispec,
- int* phase_ispec,
- int NSPEC_AB,
- int* ier) {
- int i,ispec,iphase,count0,count1;
- *ier = 0;
+// scalar arrays (acoustic/fluid outer core)
- for(iphase=0; iphase < 2; iphase++){
- count0 = 0;
- count1 = 0;
-
- for(i=0; i < num_phase_ispec; i++){
- ispec = phase_ispec[iphase*num_phase_ispec + i] - 1;
- if( ispec < -1 || ispec >= NSPEC_AB ){
- printf("Error in d_phase_ispec_inner_elastic %d %d\n",i,ispec);
- *ier = 1;
- return;
- }
- if( ispec >= 0 ){ count0++;}
- if( ispec < 0 ){ count1++;}
- }
-
- printf("check_phase_ispec done: phase %d, count = %d %d \n",iphase,count0,count1);
-
- }
- }
-
- void check_phase_ispec(long* Mesh_pointer_f,int type){
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- printf("check phase_ispec for type=%d\n",type);
-
- dim3 grid(1,1);
- dim3 threads(1,1,1);
-
- int* h_debug = (int*) calloc(1,sizeof(int));
- int* d_debug;
- cudaMalloc((void**)&d_debug,sizeof(int));
-
- if( type == 1 ){
- check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_elastic,
- mp->d_phase_ispec_inner_elastic,
- mp->NSPEC_AB,
- d_debug);
- }else if( type == 2 ){
- check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_acoustic,
- mp->d_phase_ispec_inner_acoustic,
- mp->NSPEC_AB,
- d_debug);
- }
-
- cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
- cudaFree(d_debug);
- if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
- free(h_debug);
- fflush(stdout);
-
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("check_phase_ispec");
- #endif
-
- }
- */
-
/* ----------------------------------------------------------------------------------------------- */
-//daniel: helper function
-/*
- __global__ void check_ispec_is_kernel(int NSPEC_AB,
- int* ispec_is,
- int* ier) {
- int ispec,count0,count1;
-
- *ier = 0;
- count0 = 0;
- count1 = 0;
- for(ispec=0; ispec < NSPEC_AB; ispec++){
- if( ispec_is[ispec] < -1 || ispec_is[ispec] > 1 ){
- printf("Error in ispec_is %d %d\n",ispec,ispec_is[ispec]);
- *ier = 1;
- return;
- //exit(1);
- }
- if( ispec_is[ispec] == 0 ){count0++;}
- if( ispec_is[ispec] != 0 ){count1++;}
- }
- printf("check_ispec_is done: count = %d %d\n",count0,count1);
- }
-
- void check_ispec_is(long* Mesh_pointer_f,int type){
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- printf("check ispec_is for type=%d\n",type);
-
- dim3 grid(1,1);
- dim3 threads(1,1,1);
-
- int* h_debug = (int*) calloc(1,sizeof(int));
- int* d_debug;
- cudaMalloc((void**)&d_debug,sizeof(int));
-
- if( type == 0 ){
- check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
- mp->d_ispec_is_inner,
- d_debug);
- }else if( type == 1 ){
- check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
- mp->d_ispec_is_elastic,
- d_debug);
- }else if( type == 2 ){
- check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
- mp->d_ispec_is_acoustic,
- d_debug);
- }
-
- cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
- cudaFree(d_debug);
- if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
- free(h_debug);
- fflush(stdout);
-
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("check_ispec_is");
- #endif
- }
- */
-/* ----------------------------------------------------------------------------------------------- */
-//daniel: helper function
-/*
- __global__ void check_array_ispec_kernel(int num_array_ispec,
- int* array_ispec,
- int NSPEC_AB,
- int* ier) {
-
- int i,ispec,count0,count1;
-
- *ier = 0;
- count0 = 0;
- count1 = 0;
-
- for(i=0; i < num_array_ispec; i++){
- ispec = array_ispec[i] - 1;
- if( ispec < -1 || ispec >= NSPEC_AB ){
- printf("Error in d_array_ispec %d %d\n",i,ispec);
- *ier = 1;
- return;
- }
- if( ispec >= 0 ){ count0++;}
- if( ispec < 0 ){ count1++;}
- }
-
- printf("check_array_ispec done: count = %d %d \n",count0,count1);
- }
-
- void check_array_ispec(long* Mesh_pointer_f,int type){
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- printf("check array_ispec for type=%d\n",type);
-
- dim3 grid(1,1);
- dim3 threads(1,1,1);
-
- int* h_debug = (int*) calloc(1,sizeof(int));
- int* d_debug;
- cudaMalloc((void**)&d_debug,sizeof(int));
-
- if( type == 1 ){
- check_array_ispec_kernel<<<grid,threads>>>(mp->d_num_abs_boundary_faces,
- mp->d_abs_boundary_ispec,
- mp->NSPEC_AB,
- d_debug);
- }
-
- cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
- cudaFree(d_debug);
- if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
- free(h_debug);
- fflush(stdout);
-
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("check_array_ispec");
- #endif
-
- }
- */
-
-
-/* ----------------------------------------------------------------------------------------------- */
-
-// Check functions
-
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(check_max_norm_displ_gpu,
- CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {
-
-TRACE("check_max_norm_displ_gpu");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- cudaMemcpy(displ, mp->d_displ,*size*sizeof(realw),cudaMemcpyDeviceToHost);
- realw maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(displ[i]));
- }
- printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(check_max_norm_vector,
- CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {
-
-TRACE("check_max_norm_vector");
-
- int procid;
-#ifdef WITH_MPI
- MPI_Comm_rank(MPI_COMM_WORLD,&procid);
-#else
- procid = 0;
-#endif
- realw maxnorm=0;
- int maxloc;
- for(int i=0;i<*size;i++) {
- if(maxnorm<fabsf(vector1[i])) {
- maxnorm = vector1[i];
- maxloc = i;
- }
- }
- printf("%d:maxnorm of vector %d [%d] = %e\n",procid,*announceID,maxloc,maxnorm);
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(check_max_norm_displ,
- CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {
-
-TRACE("check_max_norm_displ");
-
- realw maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(displ[i]));
- }
- printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(check_max_norm_b_displ_gpu,
- CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {
-
-TRACE("check_max_norm_b_displ_gpu");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- realw* b_accel = (realw*)malloc(*size*sizeof(realw));
-
- cudaMemcpy(b_displ, mp->d_b_displ,*size*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(realw),cudaMemcpyDeviceToHost);
-
- realw maxnorm=0;
- realw maxnorm_accel=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
- maxnorm_accel = MAX(maxnorm,fabsf(b_accel[i]));
- }
- free(b_accel);
- printf("%d: maxnorm of backward displ = %e\n",*announceID,maxnorm);
- printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm_accel);
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(check_max_norm_b_accel_gpu,
- CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {
-
-TRACE("check_max_norm_b_accel_gpu");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(realw),cudaMemcpyDeviceToHost);
-
- realw maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
- }
- printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm);
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(check_max_norm_b_veloc_gpu,
- CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {
-
-TRACE("check_max_norm_b_veloc_gpu");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- cudaMemcpy(b_veloc, mp->d_b_veloc,*size*sizeof(realw),cudaMemcpyDeviceToHost);
-
- realw maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(b_veloc[i]));
- }
- printf("%d: maxnorm of backward veloc = %e\n",*announceID,maxnorm);
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(check_max_norm_b_displ,
- CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {
-
-TRACE("check_max_norm_b_displ");
-
- realw maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
- }
- printf("%d:maxnorm of backward displ = %e\n",*announceID,maxnorm);
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(check_max_norm_b_accel,
- CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {
-
-TRACE("check_max_norm_b_accel");
-
- realw maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
- }
- printf("%d:maxnorm of backward accel = %e\n",*announceID,maxnorm);
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(check_error_vectors,
- CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {
-
-TRACE("check_error_vectors");
-
- int size = *sizef;
-
- double diff2 = 0;
- double sum = 0;
- double temp;
- double maxerr=0;
- int maxerrorloc;
-
- for(int i=0;i<size;++i) {
- temp = vector1[i]-vector2[i];
- diff2 += temp*temp;
- sum += vector1[i]*vector1[i];
- if(maxerr < fabsf(temp)) {
- maxerr = abs(temp);
- maxerrorloc = i;
- }
- }
-
- printf("rel error = %f, maxerr = %e @ %d\n",diff2/sum,maxerr,maxerrorloc);
- int myrank;
-#ifdef WITH_MPI
- MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
-#else
- myrank = 0;
-#endif
- if(myrank == 0) {
- for(int i=maxerrorloc;i>maxerrorloc-5;i--) {
- printf("[%d]: %e vs. %e\n",i,vector1[i],vector2[i]);
- }
- }
-
-}
-*/
-
-/* ----------------------------------------------------------------------------------------------- */
-
-// Auxiliary functions
-
-/* ----------------------------------------------------------------------------------------------- */
-
-
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(get_max_accel,
- GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {
-
-TRACE("get_max_accel");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer);
- int procid;
-#ifdef WITH_MPI
- MPI_Comm_rank(MPI_COMM_WORLD,&procid);
-#else
- procid = 0;
-#endif
- int size = *sizef;
- int it = *itf;
- realw* accel_cpy = (realw*)malloc(size*sizeof(realw));
- cudaMemcpy(accel_cpy,mp->d_accel,size*sizeof(realw),cudaMemcpyDeviceToHost);
- realw maxval=0;
- for(int i=0;i<size;++i) {
- maxval = MAX(maxval,accel_cpy[i]);
- }
- printf("%d/%d: max=%e\n",it,procid,maxval);
- free(accel_cpy);
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-
-// ACOUSTIC simulations
-
-/* ----------------------------------------------------------------------------------------------- */
-
__global__ void get_maximum_scalar_kernel(realw* array, int size, realw* d_max){
/* simplest version: uses only 1 thread
@@ -728,7 +295,7 @@
unsigned int i = tid + bx*blockDim.x;
// loads absolute values into shared memory
- sdata[tid] = (i < size) ? fabs(array[i]) : 0.0 ;
+ sdata[tid] = (i < size) ? fabs(array[i]) : 0.0f ;
__syncthreads();
@@ -884,7 +451,7 @@
/* ----------------------------------------------------------------------------------------------- */
-// ELASTIC simulations
+// vector arrays (elastic solid part)
/* ----------------------------------------------------------------------------------------------- */
@@ -901,7 +468,7 @@
// loads values into shared memory: assume array is a vector array
sdata[tid] = (i < size) ? sqrt(array[i*3]*array[i*3]
+ array[i*3+1]*array[i*3+1]
- + array[i*3+2]*array[i*3+2]) : 0.0 ;
+ + array[i*3+2]*array[i*3+2]) : 0.0f ;
__syncthreads();
@@ -1181,3 +748,440 @@
}
+// unused....
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Auxiliary functions
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------------------------------- */
+//daniel: helper function
+/*
+ __global__ void check_phase_ispec_kernel(int num_phase_ispec,
+ int* phase_ispec,
+ int NSPEC_AB,
+ int* ier) {
+
+ int i,ispec,iphase,count0,count1;
+ *ier = 0;
+
+ for(iphase=0; iphase < 2; iphase++){
+ count0 = 0;
+ count1 = 0;
+
+ for(i=0; i < num_phase_ispec; i++){
+ ispec = phase_ispec[iphase*num_phase_ispec + i] - 1;
+ if( ispec < -1 || ispec >= NSPEC_AB ){
+ printf("Error in d_phase_ispec_inner_elastic %d %d\n",i,ispec);
+ *ier = 1;
+ return;
+ }
+ if( ispec >= 0 ){ count0++;}
+ if( ispec < 0 ){ count1++;}
+ }
+
+ printf("check_phase_ispec done: phase %d, count = %d %d \n",iphase,count0,count1);
+
+ }
+ }
+
+ void check_phase_ispec(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check phase_ispec for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 1 ){
+ check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_elastic,
+ mp->d_phase_ispec_inner_elastic,
+ mp->NSPEC_AB,
+ d_debug);
+ }else if( type == 2 ){
+ check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_acoustic,
+ mp->d_phase_ispec_inner_acoustic,
+ mp->NSPEC_AB,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+ #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_phase_ispec");
+ #endif
+
+ }
+ */
+
+/* ----------------------------------------------------------------------------------------------- */
+//daniel: helper function
+/*
+ __global__ void check_ispec_is_kernel(int NSPEC_AB,
+ int* ispec_is,
+ int* ier) {
+
+ int ispec,count0,count1;
+
+ *ier = 0;
+ count0 = 0;
+ count1 = 0;
+ for(ispec=0; ispec < NSPEC_AB; ispec++){
+ if( ispec_is[ispec] < -1 || ispec_is[ispec] > 1 ){
+ printf("Error in ispec_is %d %d\n",ispec,ispec_is[ispec]);
+ *ier = 1;
+ return;
+ //exit(1);
+ }
+ if( ispec_is[ispec] == 0 ){count0++;}
+ if( ispec_is[ispec] != 0 ){count1++;}
+ }
+ printf("check_ispec_is done: count = %d %d\n",count0,count1);
+ }
+
+ void check_ispec_is(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check ispec_is for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 0 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_inner,
+ d_debug);
+ }else if( type == 1 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_elastic,
+ d_debug);
+ }else if( type == 2 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_acoustic,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+ #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_ispec_is");
+ #endif
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+//daniel: helper function
+/*
+ __global__ void check_array_ispec_kernel(int num_array_ispec,
+ int* array_ispec,
+ int NSPEC_AB,
+ int* ier) {
+
+ int i,ispec,count0,count1;
+
+ *ier = 0;
+ count0 = 0;
+ count1 = 0;
+
+ for(i=0; i < num_array_ispec; i++){
+ ispec = array_ispec[i] - 1;
+ if( ispec < -1 || ispec >= NSPEC_AB ){
+ printf("Error in d_array_ispec %d %d\n",i,ispec);
+ *ier = 1;
+ return;
+ }
+ if( ispec >= 0 ){ count0++;}
+ if( ispec < 0 ){ count1++;}
+ }
+
+ printf("check_array_ispec done: count = %d %d \n",count0,count1);
+ }
+
+ void check_array_ispec(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check array_ispec for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 1 ){
+ check_array_ispec_kernel<<<grid,threads>>>(mp->d_num_abs_boundary_faces,
+ mp->d_abs_boundary_ispec,
+ mp->NSPEC_AB,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+ #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_array_ispec");
+ #endif
+
+ }
+ */
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Check functions
+
+/* ----------------------------------------------------------------------------------------------- */
+/*
+ extern "C"
+ void FC_FUNC_(check_max_norm_displ_gpu,
+ CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {
+
+ TRACE("check_max_norm_displ_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(displ, mp->d_displ,*size*sizeof(realw),cudaMemcpyDeviceToHost);
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(displ[i]));
+ }
+ printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+/*
+ extern "C"
+ void FC_FUNC_(check_max_norm_vector,
+ CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {
+
+ TRACE("check_max_norm_vector");
+
+ int procid;
+ #ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ #else
+ procid = 0;
+ #endif
+ realw maxnorm=0;
+ int maxloc;
+ for(int i=0;i<*size;i++) {
+ if(maxnorm<fabsf(vector1[i])) {
+ maxnorm = vector1[i];
+ maxloc = i;
+ }
+ }
+ printf("%d:maxnorm of vector %d [%d] = %e\n",procid,*announceID,maxloc,maxnorm);
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+/*
+ extern "C"
+ void FC_FUNC_(check_max_norm_displ,
+ CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {
+
+ TRACE("check_max_norm_displ");
+
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(displ[i]));
+ }
+ printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+/*
+ extern "C"
+ void FC_FUNC_(check_max_norm_b_displ_gpu,
+ CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {
+
+ TRACE("check_max_norm_b_displ_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ realw* b_accel = (realw*)malloc(*size*sizeof(realw));
+
+ cudaMemcpy(b_displ, mp->d_b_displ,*size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(realw),cudaMemcpyDeviceToHost);
+
+ realw maxnorm=0;
+ realw maxnorm_accel=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
+ maxnorm_accel = MAX(maxnorm,fabsf(b_accel[i]));
+ }
+ free(b_accel);
+ printf("%d: maxnorm of backward displ = %e\n",*announceID,maxnorm);
+ printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm_accel);
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+/*
+ extern "C"
+ void FC_FUNC_(check_max_norm_b_accel_gpu,
+ CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {
+
+ TRACE("check_max_norm_b_accel_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(realw),cudaMemcpyDeviceToHost);
+
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
+ }
+ printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm);
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+/*
+ extern "C"
+ void FC_FUNC_(check_max_norm_b_veloc_gpu,
+ CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {
+
+ TRACE("check_max_norm_b_veloc_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(b_veloc, mp->d_b_veloc,*size*sizeof(realw),cudaMemcpyDeviceToHost);
+
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_veloc[i]));
+ }
+ printf("%d: maxnorm of backward veloc = %e\n",*announceID,maxnorm);
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+/*
+ extern "C"
+ void FC_FUNC_(check_max_norm_b_displ,
+ CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {
+
+ TRACE("check_max_norm_b_displ");
+
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
+ }
+ printf("%d:maxnorm of backward displ = %e\n",*announceID,maxnorm);
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+/*
+ extern "C"
+ void FC_FUNC_(check_max_norm_b_accel,
+ CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {
+
+ TRACE("check_max_norm_b_accel");
+
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
+ }
+ printf("%d:maxnorm of backward accel = %e\n",*announceID,maxnorm);
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+/*
+ extern "C"
+ void FC_FUNC_(check_error_vectors,
+ CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {
+
+ TRACE("check_error_vectors");
+
+ int size = *sizef;
+
+ double diff2 = 0;
+ double sum = 0;
+ double temp;
+ double maxerr=0;
+ int maxerrorloc;
+
+ for(int i=0;i<size;++i) {
+ temp = vector1[i]-vector2[i];
+ diff2 += temp*temp;
+ sum += vector1[i]*vector1[i];
+ if(maxerr < fabsf(temp)) {
+ maxerr = abs(temp);
+ maxerrorloc = i;
+ }
+ }
+
+ printf("rel error = %f, maxerr = %e @ %d\n",diff2/sum,maxerr,maxerrorloc);
+ int myrank;
+ #ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+ #else
+ myrank = 0;
+ #endif
+ if(myrank == 0) {
+ for(int i=maxerrorloc;i>maxerrorloc-5;i--) {
+ printf("[%d]: %e vs. %e\n",i,vector1[i],vector2[i]);
+ }
+ }
+
+ }
+ */
+
+
+/* ----------------------------------------------------------------------------------------------- */
+/*
+ extern "C"
+ void FC_FUNC_(get_max_accel,
+ GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {
+
+ TRACE("get_max_accel");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+ int procid;
+ #ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ #else
+ procid = 0;
+ #endif
+ int size = *sizef;
+ int it = *itf;
+ realw* accel_cpy = (realw*)malloc(size*sizeof(realw));
+ cudaMemcpy(accel_cpy,mp->d_accel,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ realw maxval=0;
+ for(int i=0;i<size;++i) {
+ maxval = MAX(maxval,accel_cpy[i]);
+ }
+ printf("%d/%d: max=%e\n",it,procid,maxval);
+ free(accel_cpy);
+ }
+ */
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2012-08-15 19:55:44 UTC (rev 20570)
@@ -558,75 +558,58 @@
/* ----------------------------------------------------------------------------------------------- */
-
__global__ void compute_coupling_ocean_cuda_kernel(realw* accel_crust_mantle,
- realw* rmassx_crust_mantle,
- realw* rmassy_crust_mantle,
- realw* rmassz_crust_mantle,
- realw* rmass_ocean_load,
- realw* normal_top_crust_mantle,
- int* ibool_crust_mantle,
- int* ibelm_top_crust_mantle,
- int* updated_dof_ocean_load,
- int NSPEC2D_TOP_CM) {
+ realw* rmassx_crust_mantle,
+ realw* rmassy_crust_mantle,
+ realw* rmassz_crust_mantle,
+ realw* rmass_ocean_load,
+ int npoin_ocean_load,
+ int* iglob_ocean_load,
+ realw* normal_ocean_load) {
- int i = threadIdx.x;
- int j = threadIdx.y;
- int iface = blockIdx.x + gridDim.x*blockIdx.y;
+ int ipoin = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
- int k,iglob,ispec;
- realw nx,ny,nz;
+ int iglob;
+ realw nx,ny,nz,rmass;
realw force_normal_comp;
realw additional_term_x,additional_term_y,additional_term_z;
- // for surfaces elements exactly at the top of the crust mantle (ocean bottom)
- if( iface < NSPEC2D_TOP_CM ){
+ // for global points exactly at the top of the crust mantle (ocean bottom)
+ if(ipoin < npoin_ocean_load) {
+ // get global point number
// "-1" from index values to convert from Fortran-> C indexing
- ispec = ibelm_top_crust_mantle[iface] - 1;
+ iglob = iglob_ocean_load[ipoin] - 1;
- // only for DOFs exactly on the CMB (top of these elements)
- k = NGLLX - 1;
+ // get normal
+ nx = normal_ocean_load[INDEX2(NDIM,0,ipoin)]; // (1,ipoin)
+ ny = normal_ocean_load[INDEX2(NDIM,1,ipoin)]; // (1,ipoin)
+ nz = normal_ocean_load[INDEX2(NDIM,2,ipoin)]; // (1,ipoin)
- // get global point number
- iglob = ibool_crust_mantle[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1;
+ // make updated component of right-hand side
+ // we divide by rmass() which is 1 / M
+ // we use the total force which includes the Coriolis term above
+ force_normal_comp = accel_crust_mantle[iglob*3]*nx / rmassx_crust_mantle[iglob]
+ + accel_crust_mantle[iglob*3+1]*ny / rmassy_crust_mantle[iglob]
+ + accel_crust_mantle[iglob*3+2]*nz / rmassz_crust_mantle[iglob];
- // only update this global point once
+ rmass = rmass_ocean_load[ipoin];
- // daniel: TODO - there might be better ways to implement a mutex like below,
- // and find a workaround to not use the temporary update array.
- // As a reminder, atomicExch atomically sets the value of the memory
- // location stored in address to val and returns the old value.
- // 0 indicates that we still have to do this point
+ additional_term_x = (rmass - rmassx_crust_mantle[iglob]) * force_normal_comp;
+ additional_term_y = (rmass - rmassy_crust_mantle[iglob]) * force_normal_comp;
+ additional_term_z = (rmass - rmassz_crust_mantle[iglob]) * force_normal_comp;
- if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){
-
- // get normal on the CMB
- nx = normal_top_crust_mantle[INDEX4(NDIM,NGLLX,NGLLX,0,i,j,iface)]; // (1,i,j,iface)
- ny = normal_top_crust_mantle[INDEX4(NDIM,NGLLX,NGLLX,1,i,j,iface)]; // (2,i,j,iface)
- nz = normal_top_crust_mantle[INDEX4(NDIM,NGLLX,NGLLX,2,i,j,iface)]; // (3,i,j,iface)
-
- // make updated component of right-hand side
- // we divide by rmass() which is 1 / M
- // we use the total force which includes the Coriolis term above
- force_normal_comp = accel_crust_mantle[iglob*3]*nx / rmassx_crust_mantle[iglob]
- + accel_crust_mantle[iglob*3+1]*ny / rmassy_crust_mantle[iglob]
- + accel_crust_mantle[iglob*3+2]*nz / rmassz_crust_mantle[iglob];
-
- additional_term_x = (rmass_ocean_load[iglob] - rmassx_crust_mantle[iglob]) * force_normal_comp;
- additional_term_y = (rmass_ocean_load[iglob] - rmassy_crust_mantle[iglob]) * force_normal_comp;
- additional_term_z = (rmass_ocean_load[iglob] - rmassz_crust_mantle[iglob]) * force_normal_comp;
-
- // probably wouldn't need atomicAdd anymore, but just to be sure...
- atomicAdd(&accel_crust_mantle[iglob*3], + additional_term_x * nx);
- atomicAdd(&accel_crust_mantle[iglob*3+1], + additional_term_y * ny);
- atomicAdd(&accel_crust_mantle[iglob*3+2], + additional_term_z * nz);
- }
+ // since we access this global point only once...
+ accel_crust_mantle[iglob*3] += additional_term_x * nx;
+ accel_crust_mantle[iglob*3+1] += additional_term_y * ny;
+ accel_crust_mantle[iglob*3+2] += additional_term_z * nz;
}
}
+
/* ----------------------------------------------------------------------------------------------- */
+
extern "C"
void FC_FUNC_(compute_coupling_ocean_cuda,
COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f,
@@ -636,7 +619,10 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- int num_blocks_x = mp->nspec2D_top_crust_mantle;
+ int blocksize = BLOCKSIZE_TRANSFER;
+ int size_padded = ((int)ceil(((double)mp->npoin_oceans)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
@@ -644,40 +630,28 @@
}
dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(5,5,1);
+ dim3 threads(blocksize,1,1);
- // initializes temporary array to zero
- print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
- sizeof(int)*(mp->NGLOB_CRUST_MANTLE_OCEANS)),88501);
-
if( *NCHUNKS_VAL != 6 && mp->absorbing_conditions){
compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_accel_crust_mantle,
mp->d_rmassx_crust_mantle,
mp->d_rmassy_crust_mantle,
mp->d_rmassz_crust_mantle,
mp->d_rmass_ocean_load,
- mp->d_normal_top_crust_mantle,
- mp->d_ibool_crust_mantle,
- mp->d_ibelm_top_crust_mantle,
- mp->d_updated_dof_ocean_load,
- mp->nspec2D_top_crust_mantle);
+ mp->npoin_oceans,
+ mp->d_iglob_ocean_load,
+ mp->d_normal_ocean_load);
// for backward/reconstructed potentials
if( mp->simulation_type == 3 ) {
- // re-initializes array
- print_CUDA_error_if_any(cudaMemset(mp->d_b_updated_dof_ocean_load,0,
- sizeof(int)*(mp->NGLOB_CRUST_MANTLE_OCEANS)),88502);
-
compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
mp->d_rmassx_crust_mantle,
mp->d_rmassy_crust_mantle,
mp->d_rmassz_crust_mantle,
mp->d_rmass_ocean_load,
- mp->d_normal_top_crust_mantle,
- mp->d_ibool_crust_mantle,
- mp->d_ibelm_top_crust_mantle,
- mp->d_b_updated_dof_ocean_load,
- mp->nspec2D_top_crust_mantle);
+ mp->npoin_oceans,
+ mp->d_iglob_ocean_load,
+ mp->d_normal_ocean_load);
}
}else{
compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_accel_crust_mantle,
@@ -685,28 +659,20 @@
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle,
mp->d_rmass_ocean_load,
- mp->d_normal_top_crust_mantle,
- mp->d_ibool_crust_mantle,
- mp->d_ibelm_top_crust_mantle,
- mp->d_updated_dof_ocean_load,
- mp->nspec2D_top_crust_mantle);
+ mp->npoin_oceans,
+ mp->d_iglob_ocean_load,
+ mp->d_normal_ocean_load);
// for backward/reconstructed potentials
if( mp->simulation_type == 3 ) {
- // re-initializes array
- print_CUDA_error_if_any(cudaMemset(mp->d_b_updated_dof_ocean_load,0,
- sizeof(int)*(mp->NGLOB_CRUST_MANTLE_OCEANS)),88502);
-
compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle,
mp->d_rmass_ocean_load,
- mp->d_normal_top_crust_mantle,
- mp->d_ibool_crust_mantle,
- mp->d_ibelm_top_crust_mantle,
- mp->d_b_updated_dof_ocean_load,
- mp->nspec2D_top_crust_mantle);
+ mp->npoin_oceans,
+ mp->d_iglob_ocean_load,
+ mp->d_normal_ocean_load);
}
}
@@ -714,3 +680,4 @@
exit_on_cuda_error("compute_coupling_ocean_cuda");
#endif
}
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu 2012-08-15 19:55:44 UTC (rev 20570)
@@ -305,7 +305,7 @@
realw* d_c44store,realw* d_c45store,realw* d_c46store,
realw* d_c55store,realw* d_c56store,realw* d_c66store,
int ATTENUATION,
- realw minus_sum_beta,
+ realw one_minus_sum_beta_use,
realw duxdxl,realw duxdyl,realw duxdzl,
realw duydxl,realw duydyl,realw duydzl,
realw duzdxl,realw duzdyl,realw duzdzl,
@@ -315,7 +315,7 @@
){
realw c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,c33,c34,c35,c36,c44,c45,c46,c55,c56,c66;
- realw mul;
+ realw mul,minus_sum_beta;
c11 = d_c11store[offset];
c12 = d_c12store[offset];
@@ -341,7 +341,9 @@
// use unrelaxed parameters if attenuation
if( ATTENUATION){
+ minus_sum_beta = one_minus_sum_beta_use - 1.0f;
mul = c44;
+
c11 = c11 + 1.33333333333333333333f * minus_sum_beta * mul;
c12 = c12 - 0.66666666666666666666f * minus_sum_beta * mul;
c13 = c13 - 0.66666666666666666666f * minus_sum_beta * mul;
@@ -760,7 +762,7 @@
realw templ;
realw fac1,fac2,fac3;
- realw minus_sum_beta,one_minus_sum_beta_use;
+ realw one_minus_sum_beta_use;
realw sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz;
realw epsilondev_xx_loc,epsilondev_yy_loc,epsilondev_xy_loc,epsilondev_xz_loc,epsilondev_yz_loc;
@@ -785,9 +787,11 @@
__shared__ realw s_tempx1[NGLL3];
__shared__ realw s_tempx2[NGLL3];
__shared__ realw s_tempx3[NGLL3];
+
__shared__ realw s_tempy1[NGLL3];
__shared__ realw s_tempy2[NGLL3];
__shared__ realw s_tempy3[NGLL3];
+
__shared__ realw s_tempz1[NGLL3];
__shared__ realw s_tempz2[NGLL3];
__shared__ realw s_tempz3[NGLL3];
@@ -842,8 +846,7 @@
s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * d_veloc[iglob*3 + 1];
s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * d_veloc[iglob*3 + 2];
#endif
- }
- else{
+ }else{
// takes old routines
s_dummyx_loc_att[tx] = s_dummyx_loc[tx];
s_dummyy_loc_att[tx] = s_dummyy_loc[tx];
@@ -1141,7 +1144,6 @@
}else{
one_minus_sum_beta_use = one_minus_sum_beta[working_element]; // (1,1,1,ispec)
}
- minus_sum_beta = one_minus_sum_beta_use - 1.0f;
}
// computes stresses
@@ -1152,7 +1154,7 @@
d_c23store,d_c24store,d_c25store,d_c26store,d_c33store,d_c34store,d_c35store,
d_c36store,d_c44store,d_c45store,d_c46store,d_c55store,d_c56store,d_c66store,
ATTENUATION,
- minus_sum_beta,
+ one_minus_sum_beta_use,
duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl,
duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl,
&sigma_xx,&sigma_yy,&sigma_zz,
@@ -1202,8 +1204,8 @@
// jacobian
jacobianl = 1.0f / (xixl*(etayl*gammazl-etazl*gammayl)
- -xiyl*(etaxl*gammazl-etazl*gammaxl)
- +xizl*(etaxl*gammayl-etayl*gammaxl));
+ - xiyl*(etaxl*gammazl-etazl*gammaxl)
+ + xizl*(etaxl*gammayl-etayl*gammaxl));
if( GRAVITY ){
// computes non-symmetric terms for gravity
@@ -1390,13 +1392,16 @@
// update memory variables based upon the Runge-Kutta scheme
if( ATTENUATION && ( ! USE_ATTENUATION_MIMIC ) ){
+
compute_element_cm_att_memory(tx,working_element,
- d_muvstore,
- factor_common,alphaval,betaval,gammaval,
- R_xx,R_yy,R_xy,R_xz,R_yz,
- epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz,
- epsilondev_xx_loc,epsilondev_yy_loc,epsilondev_xy_loc,epsilondev_xz_loc,epsilondev_yz_loc,
- d_c44store,ANISOTROPY,ATTENUATION_3D);
+ d_muvstore,
+ factor_common,alphaval,betaval,gammaval,
+ R_xx,R_yy,R_xy,R_xz,R_yz,
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,
+ epsilondev_xz,epsilondev_yz,
+ epsilondev_xx_loc,epsilondev_yy_loc,epsilondev_xy_loc,
+ epsilondev_xz_loc,epsilondev_yz_loc,
+ d_c44store,ANISOTROPY,ATTENUATION_3D);
}
// save deviatoric strain for Runge-Kutta scheme
@@ -1635,7 +1640,7 @@
int nb_colors,nb_blocks_to_compute;
int istart;
int color_offset,color_offset_nonpadded;
- int color_offset_nonpadded_att2,color_offset_nonpadded_att3;
+ int color_offset_nonpadded_att1,color_offset_nonpadded_att2,color_offset_nonpadded_att3;
int color_offset_nonpadded_strain;
int color_offset_ispec;
@@ -1648,6 +1653,7 @@
// array offsets
color_offset = 0;
color_offset_nonpadded = 0;
+ color_offset_nonpadded_att1 = 0;
color_offset_nonpadded_att2 = 0;
color_offset_nonpadded_att3 = 0;
color_offset_nonpadded_strain = 0;
@@ -1660,6 +1666,8 @@
// array offsets
color_offset = (mp->nspec_outer_crust_mantle) * NGLL3_PADDED;
color_offset_nonpadded = (mp->nspec_outer_crust_mantle) * NGLL3;
+ color_offset_nonpadded_att1 = (mp->nspec_outer_crust_mantle) * NGLL3 * N_SLS;
+
// for factor_common array
if( mp->attenuation_3D ){
color_offset_nonpadded_att2 = (mp->nspec_outer_crust_mantle) * NGLL3;
@@ -1673,7 +1681,7 @@
color_offset_ispec = mp->nspec_outer_crust_mantle;
}
// for strain
- if( ! mp->NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1 ){
+ if( ! ( mp->NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1 ) ){
color_offset_nonpadded_strain = (mp->nspec_outer_crust_mantle) * NGLL3;
}
}
@@ -1719,22 +1727,22 @@
mp->d_eps_trace_over_3_crust_mantle + color_offset_nonpadded_strain,
mp->d_one_minus_sum_beta_crust_mantle + color_offset_nonpadded_att2,
mp->d_factor_common_crust_mantle + color_offset_nonpadded_att3,
- mp->d_R_xx_crust_mantle + color_offset_nonpadded,
- mp->d_R_yy_crust_mantle + color_offset_nonpadded,
- mp->d_R_xy_crust_mantle + color_offset_nonpadded,
- mp->d_R_xz_crust_mantle + color_offset_nonpadded,
- mp->d_R_yz_crust_mantle + color_offset_nonpadded,
+ mp->d_R_xx_crust_mantle + color_offset_nonpadded_att1,
+ mp->d_R_yy_crust_mantle + color_offset_nonpadded_att1,
+ mp->d_R_xy_crust_mantle + color_offset_nonpadded_att1,
+ mp->d_R_xz_crust_mantle + color_offset_nonpadded_att1,
+ mp->d_R_yz_crust_mantle + color_offset_nonpadded_att1,
mp->d_b_epsilondev_xx_crust_mantle + color_offset_nonpadded,
mp->d_b_epsilondev_yy_crust_mantle + color_offset_nonpadded,
mp->d_b_epsilondev_xy_crust_mantle + color_offset_nonpadded,
mp->d_b_epsilondev_xz_crust_mantle + color_offset_nonpadded,
mp->d_b_epsilondev_yz_crust_mantle + color_offset_nonpadded,
mp->d_b_eps_trace_over_3_crust_mantle + color_offset_nonpadded,
- mp->d_b_R_xx_crust_mantle + color_offset_nonpadded,
- mp->d_b_R_yy_crust_mantle + color_offset_nonpadded,
- mp->d_b_R_xy_crust_mantle + color_offset_nonpadded,
- mp->d_b_R_xz_crust_mantle + color_offset_nonpadded,
- mp->d_b_R_yz_crust_mantle + color_offset_nonpadded,
+ mp->d_b_R_xx_crust_mantle + color_offset_nonpadded_att1,
+ mp->d_b_R_yy_crust_mantle + color_offset_nonpadded_att1,
+ mp->d_b_R_xy_crust_mantle + color_offset_nonpadded_att1,
+ mp->d_b_R_xz_crust_mantle + color_offset_nonpadded_att1,
+ mp->d_b_R_yz_crust_mantle + color_offset_nonpadded_att1,
mp->d_c11store_crust_mantle + color_offset,
mp->d_c12store_crust_mantle + color_offset,
mp->d_c13store_crust_mantle + color_offset,
@@ -1755,13 +1763,13 @@
mp->d_c46store_crust_mantle + color_offset,
mp->d_c55store_crust_mantle + color_offset,
mp->d_c56store_crust_mantle + color_offset,
- mp->d_c66store_crust_mantle + color_offset
- );
+ mp->d_c66store_crust_mantle + color_offset);
// for padded and aligned arrays
color_offset += nb_blocks_to_compute * NGLL3_PADDED;
// for no-aligned arrays
color_offset_nonpadded += nb_blocks_to_compute * NGLL3;
+ color_offset_nonpadded_att1 += nb_blocks_to_compute * NGLL3 * N_SLS;
// for factor_common array
if( mp->attenuation_3D ){
color_offset_nonpadded_att2 += nb_blocks_to_compute * NGLL3;
@@ -1775,7 +1783,7 @@
color_offset_ispec += nb_blocks_to_compute;
}
// for strain
- if( ! mp->NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1 ){
+ if( ! ( mp->NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1 ) ){
color_offset_nonpadded_strain += nb_blocks_to_compute * NGLL3;
}
@@ -1826,8 +1834,7 @@
mp->d_c25store_crust_mantle,mp->d_c26store_crust_mantle,mp->d_c33store_crust_mantle,
mp->d_c34store_crust_mantle,mp->d_c35store_crust_mantle,mp->d_c36store_crust_mantle,
mp->d_c44store_crust_mantle,mp->d_c45store_crust_mantle,mp->d_c46store_crust_mantle,
- mp->d_c55store_crust_mantle,mp->d_c56store_crust_mantle,mp->d_c66store_crust_mantle
- );
+ mp->d_c55store_crust_mantle,mp->d_c56store_crust_mantle,mp->d_c66store_crust_mantle);
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu 2012-08-15 19:55:44 UTC (rev 20570)
@@ -340,7 +340,8 @@
realw* d_minus_deriv_gravity_table,
realw* d_density_table,
realw* wgll_cube,
- int NSPEC_INNER_CORE_STRAIN_ONLY){
+ int NSPEC_INNER_CORE_STRAIN_ONLY,
+ int NSPEC_INNER_CORE){
/* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
int bx = blockIdx.y*gridDim.x+blockIdx.x;
@@ -1010,16 +1011,24 @@
//mesh coloring
if( use_mesh_coloring_gpu ){
- // no atomic operation needed, colors don't share global points between elements
+ if( NSPEC_INNER_CORE > COLORING_MIN_NSPEC_INNER_CORE ){
+ // no atomic operation needed, colors don't share global points between elements
#ifdef USE_TEXTURES_FIELDS
- d_accel[iglob*3] = tex1Dfetch(d_accel_ic_tex, iglob*3) + sum_terms1;
- d_accel[iglob*3 + 1] = tex1Dfetch(d_accel_ic_tex, iglob*3 + 1) + sum_terms2;
- d_accel[iglob*3 + 2] = tex1Dfetch(d_accel_ic_tex, iglob*3 + 2) + sum_terms3;
+ d_accel[iglob*3] = tex1Dfetch(d_accel_ic_tex, iglob*3) + sum_terms1;
+ d_accel[iglob*3 + 1] = tex1Dfetch(d_accel_ic_tex, iglob*3 + 1) + sum_terms2;
+ d_accel[iglob*3 + 2] = tex1Dfetch(d_accel_ic_tex, iglob*3 + 2) + sum_terms3;
#else
- d_accel[iglob*3] += sum_terms1;
- d_accel[iglob*3 + 1] += sum_terms2;
- d_accel[iglob*3 + 2] += sum_terms3;
+ d_accel[iglob*3] += sum_terms1;
+ d_accel[iglob*3 + 1] += sum_terms2;
+ d_accel[iglob*3 + 2] += sum_terms3;
#endif // USE_TEXTURES_FIELDS
+ }else{
+ // poor element count, only use 1 color per inner/outer run
+ // forces atomic operations
+ atomicAdd(&d_accel[iglob*3], sum_terms1);
+ atomicAdd(&d_accel[iglob*3+1], sum_terms2);
+ atomicAdd(&d_accel[iglob*3+2], sum_terms3);
+ }
}else{
@@ -1095,8 +1104,7 @@
realw* d_b_R_xz,
realw* d_b_R_yz,
realw* d_c11store,realw* d_c12store,realw* d_c13store,
- realw* d_c33store,realw* d_c44store
- ){
+ realw* d_c33store,realw* d_c44store){
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("before kernel Kernel_2_inner_core");
@@ -1168,7 +1176,8 @@
mp->d_minus_deriv_gravity_table,
mp->d_density_table,
mp->d_wgll_cube,
- mp->NSPEC_INNER_CORE_STRAIN_ONLY);
+ mp->NSPEC_INNER_CORE_STRAIN_ONLY,
+ mp->NSPEC_INNER_CORE);
if(mp->simulation_type == 3) {
@@ -1216,7 +1225,8 @@
mp->d_minus_deriv_gravity_table,
mp->d_density_table,
mp->d_wgll_cube,
- mp->NSPEC_INNER_CORE_STRAIN_ONLY);
+ mp->NSPEC_INNER_CORE_STRAIN_ONLY,
+ mp->NSPEC_INNER_CORE);
}
// cudaEventRecord( stop, 0 );
@@ -1270,51 +1280,102 @@
int nb_colors,nb_blocks_to_compute;
int istart;
int color_offset,color_offset_nonpadded;
- int color_offset_nonpadded_att2,color_offset_nonpadded_att3;
+ int color_offset_nonpadded_att1,color_offset_nonpadded_att2,color_offset_nonpadded_att3;
int color_offset_nonpadded_strain;
int color_offset_ispec;
// sets up color loop
- if( *iphase == 1 ){
- // outer elements
- nb_colors = mp->num_colors_outer_inner_core;
- istart = 0;
+ if( mp->NSPEC_INNER_CORE > COLORING_MIN_NSPEC_INNER_CORE ){
+ if( *iphase == 1 ){
+ // outer elements
+ nb_colors = mp->num_colors_outer_inner_core;
+ istart = 0;
- // array offsets
- color_offset = 0;
- color_offset_nonpadded = 0;
- color_offset_nonpadded_att2 = 0;
- color_offset_nonpadded_att3 = 0;
- color_offset_nonpadded_strain = 0;
- color_offset_ispec = 0;
+ // array offsets
+ color_offset = 0;
+ color_offset_nonpadded = 0;
+ color_offset_nonpadded_att1 = 0;
+ color_offset_nonpadded_att2 = 0;
+ color_offset_nonpadded_att3 = 0;
+ color_offset_nonpadded_strain = 0;
+ color_offset_ispec = 0;
+ }else{
+ // inner elements (start after outer elements)
+ nb_colors = mp->num_colors_outer_inner_core + mp->num_colors_inner_inner_core;
+ istart = mp->num_colors_outer_inner_core;
+
+ // array offsets
+ color_offset = (mp->nspec_outer_inner_core) * NGLL3_PADDED;
+ color_offset_nonpadded = (mp->nspec_outer_inner_core) * NGLL3;
+ color_offset_nonpadded_att1 = (mp->nspec_outer_inner_core) * NGLL3 * N_SLS;
+ // for factor_common array
+ if( mp->attenuation_3D ){
+ color_offset_nonpadded_att2 = (mp->nspec_outer_inner_core) * NGLL3;
+ color_offset_nonpadded_att3 = (mp->nspec_outer_inner_core) * NGLL3 * N_SLS;
+ }else{
+ color_offset_nonpadded_att2 = (mp->nspec_outer_inner_core) * 1;
+ color_offset_nonpadded_att3 = (mp->nspec_outer_inner_core) * 1 * N_SLS;
+ }
+ // for idoubling array
+ color_offset_ispec = mp->nspec_outer_inner_core;
+ // for strain
+ if( ! mp->NSPEC_INNER_CORE_STRAIN_ONLY == 1 ){
+ color_offset_nonpadded_strain = (mp->nspec_outer_inner_core) * NGLL3;
+ }
+ }
}else{
- // inner elements (start after outer elements)
- nb_colors = mp->num_colors_outer_inner_core + mp->num_colors_inner_inner_core;
- istart = mp->num_colors_outer_inner_core;
- // array offsets
- color_offset = (mp->nspec_outer_inner_core) * NGLL3_PADDED;
- color_offset_nonpadded = (mp->nspec_outer_inner_core) * NGLL3;
- // for factor_common array
- if( mp->attenuation_3D ){
- color_offset_nonpadded_att2 = (mp->nspec_outer_inner_core) * NGLL3;
- color_offset_nonpadded_att3 = (mp->nspec_outer_inner_core) * NGLL3 * N_SLS;
+ // poor element count, only use 1 color per inner/outer run
+
+ if( *iphase == 1 ){
+ // outer elements
+ nb_colors = 1;
+ istart = 0;
+
+ // array offsets
+ color_offset = 0;
+ color_offset_nonpadded = 0;
+ color_offset_nonpadded_att1 = 0;
+ color_offset_nonpadded_att2 = 0;
+ color_offset_nonpadded_att3 = 0;
+ color_offset_nonpadded_strain = 0;
+ color_offset_ispec = 0;
}else{
- color_offset_nonpadded_att2 = (mp->nspec_outer_inner_core) * 1;
- color_offset_nonpadded_att3 = (mp->nspec_outer_inner_core) * 1 * N_SLS;
+ // inner element colors (start after outer elements)
+ nb_colors = 1;
+ istart = 0;
+
+ // array offsets
+ color_offset = (mp->nspec_outer_inner_core) * NGLL3_PADDED;
+ color_offset_nonpadded = (mp->nspec_outer_inner_core) * NGLL3;
+ color_offset_nonpadded_att1 = (mp->nspec_outer_inner_core) * NGLL3 * N_SLS;
+ // for factor_common array
+ if( mp->attenuation_3D ){
+ color_offset_nonpadded_att2 = (mp->nspec_outer_inner_core) * NGLL3;
+ color_offset_nonpadded_att3 = (mp->nspec_outer_inner_core) * NGLL3 * N_SLS;
+ }else{
+ color_offset_nonpadded_att2 = (mp->nspec_outer_inner_core) * 1;
+ color_offset_nonpadded_att3 = (mp->nspec_outer_inner_core) * 1 * N_SLS;
+ }
+ // for idoubling array
+ color_offset_ispec = mp->nspec_outer_inner_core;
+ // for strain
+ if( ! mp->NSPEC_INNER_CORE_STRAIN_ONLY == 1 ){
+ color_offset_nonpadded_strain = (mp->nspec_outer_inner_core) * NGLL3;
+ }
}
- // for idoubling array
- color_offset_ispec = mp->nspec_outer_inner_core;
- // for strain
- if( ! mp->NSPEC_INNER_CORE_STRAIN_ONLY == 1 ){
- color_offset_nonpadded_strain = (mp->nspec_outer_inner_core) * NGLL3;
- }
}
+
// loops over colors
for(int icolor = istart; icolor < nb_colors; icolor++){
- nb_blocks_to_compute = mp->h_num_elem_colors_inner_core[icolor];
+ // gets number of elements for this color
+ if( mp->NSPEC_INNER_CORE > COLORING_MIN_NSPEC_INNER_CORE ){
+ nb_blocks_to_compute = mp->h_num_elem_colors_inner_core[icolor];
+ }else{
+ nb_blocks_to_compute = num_elements;
+ }
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
// checks
@@ -1348,22 +1409,22 @@
mp->d_eps_trace_over_3_inner_core + color_offset_nonpadded_strain,
mp->d_one_minus_sum_beta_inner_core + color_offset_nonpadded_att2,
mp->d_factor_common_inner_core + color_offset_nonpadded_att3,
- mp->d_R_xx_inner_core + color_offset_nonpadded,
- mp->d_R_yy_inner_core + color_offset_nonpadded,
- mp->d_R_xy_inner_core + color_offset_nonpadded,
- mp->d_R_xz_inner_core + color_offset_nonpadded,
- mp->d_R_yz_inner_core + color_offset_nonpadded,
+ mp->d_R_xx_inner_core + color_offset_nonpadded_att1,
+ mp->d_R_yy_inner_core + color_offset_nonpadded_att1,
+ mp->d_R_xy_inner_core + color_offset_nonpadded_att1,
+ mp->d_R_xz_inner_core + color_offset_nonpadded_att1,
+ mp->d_R_yz_inner_core + color_offset_nonpadded_att1,
mp->d_b_epsilondev_xx_inner_core + color_offset_nonpadded,
mp->d_b_epsilondev_yy_inner_core + color_offset_nonpadded,
mp->d_b_epsilondev_xy_inner_core + color_offset_nonpadded,
mp->d_b_epsilondev_xz_inner_core + color_offset_nonpadded,
mp->d_b_epsilondev_yz_inner_core + color_offset_nonpadded,
mp->d_b_eps_trace_over_3_inner_core + color_offset_nonpadded,
- mp->d_b_R_xx_inner_core + color_offset_nonpadded,
- mp->d_b_R_yy_inner_core + color_offset_nonpadded,
- mp->d_b_R_xy_inner_core + color_offset_nonpadded,
- mp->d_b_R_xz_inner_core + color_offset_nonpadded,
- mp->d_b_R_yz_inner_core + color_offset_nonpadded,
+ mp->d_b_R_xx_inner_core + color_offset_nonpadded_att1,
+ mp->d_b_R_yy_inner_core + color_offset_nonpadded_att1,
+ mp->d_b_R_xy_inner_core + color_offset_nonpadded_att1,
+ mp->d_b_R_xz_inner_core + color_offset_nonpadded_att1,
+ mp->d_b_R_yz_inner_core + color_offset_nonpadded_att1,
mp->d_c11store_inner_core + color_offset,
mp->d_c12store_inner_core + color_offset,
mp->d_c13store_inner_core + color_offset,
@@ -1375,6 +1436,7 @@
color_offset += nb_blocks_to_compute * NGLL3_PADDED;
// for no-aligned arrays
color_offset_nonpadded += nb_blocks_to_compute * NGLL3;
+ color_offset_nonpadded_att1 += nb_blocks_to_compute * NGLL3 * N_SLS;
// for factor_common array
if( mp->attenuation_3D ){
color_offset_nonpadded_att2 += nb_blocks_to_compute * NGLL3;
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu 2012-08-15 19:55:44 UTC (rev 20570)
@@ -126,7 +126,8 @@
realw time,
realw two_omega_earth,
realw deltat,
- realw* d_A_array_rotation,realw* d_B_array_rotation){
+ realw* d_A_array_rotation,realw* d_B_array_rotation,
+ int NSPEC_OUTER_CORE){
int bx = blockIdx.y*gridDim.x+blockIdx.x;
int tx = threadIdx.x;
@@ -440,13 +441,20 @@
//mesh coloring
if( use_mesh_coloring_gpu ){
- // no atomic operation needed, colors don't share global points between elements
+ if( NSPEC_OUTER_CORE > COLORING_MIN_NSPEC_OUTER_CORE ){
+ // no atomic operation needed, colors don't share global points between elements
#ifdef USE_TEXTURES_FIELDS
- d_potential_dot_dot[iglob] = tex1Dfetch(d_accel_oc_tex, iglob) + sum_terms;
+ d_potential_dot_dot[iglob] = tex1Dfetch(d_accel_oc_tex, iglob) + sum_terms;
#else
- d_potential_dot_dot[iglob] += sum_terms;
+ d_potential_dot_dot[iglob] += sum_terms;
#endif // USE_TEXTURES_FIELDS
+ }else{
+ // poor element count, only use 1 color per inner/outer run
+ // forces atomic operations
+ atomicAdd(&d_potential_dot_dot[iglob],sum_terms);
+ }
+
}else{
atomicAdd(&d_potential_dot_dot[iglob],sum_terms);
@@ -466,8 +474,7 @@
realw* d_gammax,realw* d_gammay,realw* d_gammaz,
realw time, realw b_time,
realw* d_A_array_rotation,realw* d_B_array_rotation,
- realw* d_b_A_array_rotation,realw* d_b_B_array_rotation
- ){
+ realw* d_b_A_array_rotation,realw* d_b_B_array_rotation){
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("before outer_core kernel Kernel_2");
@@ -518,7 +525,8 @@
time,
mp->d_two_omega_earth,
mp->d_deltat,
- d_A_array_rotation,d_B_array_rotation);
+ d_A_array_rotation,d_B_array_rotation,
+ mp->NSPEC_OUTER_CORE);
if(mp->simulation_type == 3) {
Kernel_2_outer_core_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
@@ -545,7 +553,8 @@
b_time,
mp->d_b_two_omega_earth,
mp->d_b_deltat,
- d_b_A_array_rotation,d_b_B_array_rotation);
+ d_b_A_array_rotation,d_b_B_array_rotation,
+ mp->NSPEC_OUTER_CORE);
}
// cudaEventRecord( stop, 0 );
@@ -596,7 +605,7 @@
if( num_elements == 0 ) return;
// mesh coloring
- if( mp->use_mesh_coloring_gpu ){
+ if( mp->use_mesh_coloring_gpu){
// note: array offsets require sorted arrays, such that e.g. ibool starts with elastic elements
// and followed by acoustic ones.
@@ -607,28 +616,56 @@
int color_offset,color_offset_nonpadded;
// sets up color loop
- if( *iphase == 1 ){
- // outer elements
- nb_colors = mp->num_colors_outer_outer_core;
- istart = 0;
+ if( mp->NSPEC_OUTER_CORE > COLORING_MIN_NSPEC_OUTER_CORE ){
+ if( *iphase == 1 ){
+ // outer elements
+ nb_colors = mp->num_colors_outer_outer_core;
+ istart = 0;
- // array offsets
- color_offset = 0;
- color_offset_nonpadded = 0;
+ // array offsets
+ color_offset = 0;
+ color_offset_nonpadded = 0;
+ }else{
+ // inner element colors (start after outer elements)
+ nb_colors = mp->num_colors_outer_outer_core + mp->num_colors_inner_outer_core;
+ istart = mp->num_colors_outer_outer_core;
+
+ // array offsets (inner elements start after outer ones)
+ color_offset = mp->nspec_outer_outer_core * NGLL3_PADDED;
+ color_offset_nonpadded = mp->nspec_outer_outer_core * NGLL3;
+ }
}else{
- // inner element colors (start after outer elements)
- nb_colors = mp->num_colors_outer_outer_core + mp->num_colors_inner_outer_core;
- istart = mp->num_colors_outer_outer_core;
- // array offsets (inner elements start after outer ones)
- color_offset = mp->nspec_outer_outer_core * NGLL3_PADDED;
- color_offset_nonpadded = mp->nspec_outer_outer_core * NGLL3;
+ // poor element count, only use 1 color per inner/outer run
+
+ if( *iphase == 1 ){
+ // outer elements
+ nb_colors = 1;
+ istart = 0;
+
+ // array offsets
+ color_offset = 0;
+ color_offset_nonpadded = 0;
+ }else{
+ // inner element colors (start after outer elements)
+ nb_colors = 1;
+ istart = 0;
+
+ // array offsets (inner elements start after outer ones)
+ color_offset = mp->nspec_outer_outer_core * NGLL3_PADDED;
+ color_offset_nonpadded = mp->nspec_outer_outer_core * NGLL3;
+ }
}
// loops over colors
for(int icolor = istart; icolor < nb_colors; icolor++){
- nb_blocks_to_compute = mp->h_num_elem_colors_outer_core[icolor];
+ // gets number of elements for this color
+ if( mp->NSPEC_OUTER_CORE > COLORING_MIN_NSPEC_OUTER_CORE ){
+ nb_blocks_to_compute = mp->h_num_elem_colors_outer_core[icolor];
+ }else{
+ nb_blocks_to_compute = num_elements;
+ }
Kernel_2_outer_core(nb_blocks_to_compute,mp,
*iphase,
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2012-08-15 19:55:44 UTC (rev 20570)
@@ -601,13 +601,9 @@
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- // copies surface buffer to GPU
- cudaMemcpy(mp->d_noise_surface_movie,h_noise_surface_movie,
- NDIM*NGLL2*(mp->nspec_top)*sizeof(realw),cudaMemcpyHostToDevice);
-
- int num_blocks_x = mp->nspec_top;
realw deltat = *deltat_f;
+ int num_blocks_x = mp->nspec2D_top_crust_mantle;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
@@ -617,6 +613,12 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(NGLL2,1,1);
+ // copies surface buffer to GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_surface_movie,h_noise_surface_movie,
+ NDIM*NGLL2*(mp->nspec2D_top_crust_mantle)*sizeof(realw),
+ cudaMemcpyHostToDevice),90900);
+
+ // calculates noise strength kernel
compute_kernels_strength_noise_cuda_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
mp->d_ibelm_top_crust_mantle,
mp->d_ibool_crust_mantle,
@@ -626,7 +628,7 @@
mp->d_normal_z_noise,
mp->d_Sigma_kl,
deltat,
- mp->nspec_top);
+ mp->nspec2D_top_crust_mantle);
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("compute_kernels_strength_noise_cuda_kernel");
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-08-15 19:55:44 UTC (rev 20570)
@@ -40,8 +40,8 @@
*/
-#ifndef GPU_MESH_
-#define GPU_MESH_
+#ifndef CUDA_MESH_H
+#define CUDA_MESH_H
#include <sys/types.h>
#include <unistd.h>
@@ -137,6 +137,18 @@
#pragma message ("\nCompiling with: USE_MESH_COLORING_GPU enabled\n")
#endif
+// note: mesh coloring has a tradeoff between extra costs for looping over colors
+// and slowliness of atomic updates;
+// in general, the more elements per color the more efficient
+//
+// thresholds for coloring :
+// we assume that the crust/mantle region has enough elements for coloring
+//
+// minimum number of elements required in inner core before mesh coloring becomes attractive
+#define COLORING_MIN_NSPEC_INNER_CORE 1000
+// minimum number of elements required in outer core before mesh coloring becomes attractive
+#define COLORING_MIN_NSPEC_OUTER_CORE 1000
+
/* ----------------------------------------------------------------------------------------------- */
// Texture memory usage:
@@ -311,19 +323,14 @@
int nspec_outer_crust_mantle;
int nspec_inner_crust_mantle;
- int nspec2D_top_crust_mantle;
int nspec2D_bottom_crust_mantle;
int num_colors_inner_crust_mantle;
int num_colors_outer_crust_mantle;
int* h_num_elem_colors_crust_mantle;
- int* d_ibelm_top_crust_mantle;
int* d_ibelm_bottom_crust_mantle;
- // normal definition for coupling regions
- realw* d_normal_top_crust_mantle;
-
// ------------------------------------------------------------------ //
// outer_core
// ------------------------------------------------------------------ //
@@ -482,15 +489,13 @@
// ------------------------------------------------------------------ //
// oceans
// ------------------------------------------------------------------ //
- int NGLOB_CRUST_MANTLE_OCEANS;
+ int npoin_oceans;
// model parameter
realw* d_rmass_ocean_load;
+ int* d_iglob_ocean_load;
+ realw* d_normal_ocean_load;
- // temporary global array: used to synchronize updates on global accel array
- int* d_updated_dof_ocean_load;
- int* d_b_updated_dof_ocean_load;
-
// ------------------------------------------------------------------ //
// attenuation
// ------------------------------------------------------------------ //
@@ -668,8 +673,6 @@
// ------------------------------------------------------------------ //
int noise_tomography;
- int nspec_top;
-
realw* d_noise_surface_movie;
realw* d_noise_sourcearray;
@@ -677,6 +680,10 @@
realw* d_normal_y_noise;
realw* d_normal_z_noise;
realw* d_mask_noise;
+
+ // free surface
+ int nspec2D_top_crust_mantle;
+ int* d_ibelm_top_crust_mantle;
realw* d_jacobian2D_top_crust_mantle;
// noise sensitivity kernel
@@ -685,4 +692,4 @@
} Mesh;
-#endif
+#endif // CUDA_MESH_H
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2012-08-15 19:55:44 UTC (rev 20570)
@@ -154,7 +154,7 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
- int num_blocks_x = mp->nspec_top;
+ int num_blocks_x = mp->nspec2D_top_crust_mantle;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
@@ -164,14 +164,14 @@
dim3 threads(NGLL2,1,1);
noise_transfer_surface_to_host_kernel<<<grid,threads>>>(mp->d_ibelm_top_crust_mantle,
- mp->nspec_top,
+ mp->nspec2D_top_crust_mantle,
mp->d_ibool_crust_mantle,
mp->d_displ_crust_mantle,
mp->d_noise_surface_movie);
// copies noise array to CPU
cudaMemcpy(h_noise_surface_movie,mp->d_noise_surface_movie,
- NDIM*NGLL2*(mp->nspec_top)*sizeof(realw),cudaMemcpyDeviceToHost);
+ NDIM*NGLL2*(mp->nspec2D_top_crust_mantle)*sizeof(realw),cudaMemcpyDeviceToHost);
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("noise_transfer_surface_to_host");
@@ -304,7 +304,7 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- int num_blocks_x = mp->nspec_top;
+ int num_blocks_x = mp->nspec2D_top_crust_mantle;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
@@ -315,7 +315,7 @@
// copies surface movie to GPU
cudaMemcpy(mp->d_noise_surface_movie,h_noise_surface_movie,
- NDIM*NGLL2*(mp->nspec_top)*sizeof(realw),cudaMemcpyHostToDevice);
+ NDIM*NGLL2*(mp->nspec2D_top_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice);
switch(mp->noise_tomography) {
case 2:
@@ -323,7 +323,7 @@
noise_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_accel_crust_mantle,
mp->d_ibool_crust_mantle,
mp->d_ibelm_top_crust_mantle,
- mp->nspec_top,
+ mp->nspec2D_top_crust_mantle,
mp->d_noise_surface_movie,
mp->d_normal_x_noise,
mp->d_normal_y_noise,
@@ -338,7 +338,7 @@
noise_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
mp->d_ibool_crust_mantle,
mp->d_ibelm_top_crust_mantle,
- mp->nspec_top,
+ mp->nspec2D_top_crust_mantle,
mp->d_noise_surface_movie,
mp->d_normal_x_noise,
mp->d_normal_y_noise,
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h 2012-08-15 19:55:44 UTC (rev 20570)
@@ -26,8 +26,8 @@
!=====================================================================
*/
-#ifndef CUDA_HEADER_H
-#define CUDA_HEADER_H
+#ifndef CUDA_PREPARE_H
+#define CUDA_PREPARE_H
typedef float realw; // type of "working" variables
@@ -260,4 +260,4 @@
}
-#endif //CUDA_HEADER_H
+#endif //CUDA_PREPARE_H
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-08-15 19:55:44 UTC (rev 20570)
@@ -41,9 +41,50 @@
#include "mesh_constants_cuda.h"
#include "prepare_constants_cuda.h"
+/* ----------------------------------------------------------------------------------------------- */
+// helper functions
+
/* ----------------------------------------------------------------------------------------------- */
+
+// copies integer array from CPU host to GPU device
+void copy_todevice_int(void** d_array_addr_ptr,int* h_array,int size){
+ TRACE("copy_todevice_int");
+
+ // allocates memory on GPU
+ //
+ // note: cudaMalloc uses a double-pointer, such that it can return an error code in case it fails
+ // we thus pass the address to the pointer above (as void double-pointer) to have it
+ // pointing to the correct pointer of the array here
+ print_CUDA_error_if_any(cudaMalloc((void**)d_array_addr_ptr,size*sizeof(int)),
+ 10000);
+
+ // copies values onto GPU
+ //
+ // note: cudaMemcpy uses the pointer to the array, we thus re-cast the value of
+ // the double-pointer above to have the correct pointer to the array
+ print_CUDA_error_if_any(cudaMemcpy((int*) *d_array_addr_ptr,h_array,size*sizeof(int),cudaMemcpyHostToDevice),
+ 10001);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// copies integer array from CPU host to GPU device
+void copy_todevice_realw(void** d_array_addr_ptr,realw* h_array,int size){
+ TRACE("copy_todevice_realw");
+
+ // allocates memory on GPU
+ print_CUDA_error_if_any(cudaMalloc((void**)d_array_addr_ptr,size*sizeof(realw)),
+ 20000);
+ // copies values onto GPU
+ print_CUDA_error_if_any(cudaMemcpy((realw*) *d_array_addr_ptr,h_array,size*sizeof(realw),cudaMemcpyHostToDevice),
+ 20001);
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
// GPU preparation
/* ----------------------------------------------------------------------------------------------- */
@@ -64,7 +105,6 @@
int* nrec,int* nrec_local, int* nadj_rec_local,
int* NSPEC_CRUST_MANTLE, int* NGLOB_CRUST_MANTLE,
int* NSPEC_CRUST_MANTLE_STRAIN_ONLY,
- int* NGLOB_CRUST_MANTLE_OCEANS,
int* NSPEC_OUTER_CORE, int* NGLOB_OUTER_CORE,
int* NSPEC_INNER_CORE, int* NGLOB_INNER_CORE,
int* NSPEC_INNER_CORE_STRAIN_ONLY,
@@ -87,7 +127,7 @@
int* ANISOTROPIC_KL_f,
int* APPROXIMATE_HESS_KL_f) {
-TRACE("prepare_constants_device");
+ TRACE("prepare_constants_device");
// allocates mesh parameter structure
Mesh* mp = (Mesh*) malloc( sizeof(Mesh) );
@@ -140,7 +180,6 @@
mp->NSPEC_CRUST_MANTLE = *NSPEC_CRUST_MANTLE;
mp->NGLOB_CRUST_MANTLE = *NGLOB_CRUST_MANTLE;
mp->NSPEC_CRUST_MANTLE_STRAIN_ONLY = *NSPEC_CRUST_MANTLE_STRAIN_ONLY;
- mp->NGLOB_CRUST_MANTLE_OCEANS = *NGLOB_CRUST_MANTLE_OCEANS;
mp->NSPEC_OUTER_CORE = *NSPEC_OUTER_CORE;
mp->NGLOB_OUTER_CORE = *NGLOB_OUTER_CORE;
@@ -191,59 +230,42 @@
mp->nsources_local = *nsources_local;
if( mp->simulation_type == 1 || mp->simulation_type == 3 ){
// not needed in case of pure adjoint simulations (SIMULATION_TYPE == 2)
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_sourcearrays,
- sizeof(realw)* *NSOURCES*3*NGLL3),1301);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_sourcearrays, h_sourcearrays,
- sizeof(realw)* *NSOURCES*3*NGLL3,cudaMemcpyHostToDevice),1302);
+ copy_todevice_realw((void**)&mp->d_sourcearrays,h_sourcearrays,(*NSOURCES)*NDIM*NGLL3);
+
// buffer for source time function values
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_stf_pre_compute,
- *NSOURCES*sizeof(double)),1303);
+ (*NSOURCES)*sizeof(double)),1303);
}
+ copy_todevice_int((void**)&mp->d_islice_selected_source,h_islice_selected_source,(*NSOURCES));
+ copy_todevice_int((void**)&mp->d_ispec_selected_source,h_ispec_selected_source,(*NSOURCES));
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_source,
- sizeof(int) * *NSOURCES),1401);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_source, h_islice_selected_source,
- sizeof(int)* *NSOURCES,cudaMemcpyHostToDevice),1402);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_ispec_selected_source,
- sizeof(int)* *NSOURCES),1403);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_source, h_ispec_selected_source,
- sizeof(int)* *NSOURCES,cudaMemcpyHostToDevice),1404);
-
-
// receiver stations
// note that: size(number_receiver_global) = nrec_local
// size(ispec_selected_rec) = nrec
// number of receiver located in this partition
mp->nrec_local = *nrec_local;
if( mp->nrec_local > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_number_receiver_global),mp->nrec_local*sizeof(int)),1);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_number_receiver_global,h_number_receiver_global,
- mp->nrec_local*sizeof(int),cudaMemcpyHostToDevice),1512);
+ copy_todevice_int((void**)&mp->d_number_receiver_global,h_number_receiver_global,mp->nrec_local);
// for seismograms
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),
- 3*NGLL3*(mp->nrec_local)*sizeof(realw)),4015);
+ NDIM*NGLL3*(mp->nrec_local)*sizeof(realw)),4015);
- mp->h_station_seismo_field = (realw*) malloc( 3*NGLL3*(mp->nrec_local)*sizeof(realw) );
+ mp->h_station_seismo_field = (realw*) malloc( NDIM*NGLL3*(mp->nrec_local)*sizeof(realw) );
if( mp->h_station_seismo_field == NULL) exit_on_error("h_station_seismo_field not allocated \n");
}
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_selected_rec),(*nrec)*sizeof(int)),1513);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_rec,h_ispec_selected_rec,
- (*nrec)*sizeof(int),cudaMemcpyHostToDevice),1514);
+ copy_todevice_int((void**)&mp->d_ispec_selected_rec,h_ispec_selected_rec,(*nrec));
// receiver adjoint source arrays only used for noise and adjoint simulations
// adjoint source arrays
mp->nadj_rec_local = *nadj_rec_local;
if( mp->nadj_rec_local > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_adj_sourcearrays,
- (mp->nadj_rec_local)*3*NGLL3*sizeof(realw)),6003);
+ // prepares local irec array:
print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_pre_computed_irec,
(mp->nadj_rec_local)*sizeof(int)),6004);
- // prepares local irec array:
// the irec_local variable needs to be precomputed (as
// h_pre_comp..), because normally it is in the loop updating accel,
// and due to how it's incremented, it cannot be parallelized
@@ -264,8 +286,12 @@
free(h_pre_computed_irec);
// temporary array to prepare extracted source array values
- mp->h_adj_sourcearrays_slice = (realw*) malloc( (mp->nadj_rec_local)*3*NGLL3*sizeof(realw) );
+ mp->h_adj_sourcearrays_slice = (realw*) malloc( (mp->nadj_rec_local)*NDIM*NGLL3*sizeof(realw) );
if( mp->h_adj_sourcearrays_slice == NULL ) exit_on_error("h_adj_sourcearrays_slice not allocated\n");
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_adj_sourcearrays,
+ (mp->nadj_rec_local)*NDIM*NGLL3*sizeof(realw)),6003);
+
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -290,8 +316,7 @@
realw* b_deltat,
realw* b_A_array_rotation,
realw* b_B_array_rotation,
- int* NSPEC_OUTER_CORE_ROTATION
- ) {
+ int* NSPEC_OUTER_CORE_ROTATION) {
TRACE("prepare_fields_rotation_device");
@@ -304,30 +329,16 @@
mp->d_two_omega_earth = *two_omega_earth;
mp->d_deltat = *deltat;
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_A_array_rotation,
- NGLL3*(*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw)),9000);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_A_array_rotation, A_array_rotation,
- NGLL3*(*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw),cudaMemcpyHostToDevice),9001);
+ copy_todevice_realw((void**)&mp->d_A_array_rotation,A_array_rotation,NGLL3*(*NSPEC_OUTER_CORE_ROTATION));
+ copy_todevice_realw((void**)&mp->d_B_array_rotation,B_array_rotation,NGLL3*(*NSPEC_OUTER_CORE_ROTATION));
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_B_array_rotation,
- NGLL3*(*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw)),9002);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_B_array_rotation, B_array_rotation,
- NGLL3*(*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw),cudaMemcpyHostToDevice),9003);
-
// backward/reconstructed fields
if( mp->simulation_type == 3 ){
mp->d_b_two_omega_earth = *b_two_omega_earth;
mp->d_b_deltat = *b_deltat;
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_A_array_rotation,
- NGLL3*(*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw)),9000);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_A_array_rotation, b_A_array_rotation,
- NGLL3*(*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw),cudaMemcpyHostToDevice),9001);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_B_array_rotation,
- NGLL3*(*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw)),9002);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_B_array_rotation, b_B_array_rotation,
- NGLL3*(*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw),cudaMemcpyHostToDevice),9003);
+ copy_todevice_realw((void**)&mp->d_b_A_array_rotation,b_A_array_rotation,NGLL3*(*NSPEC_OUTER_CORE_ROTATION));
+ copy_todevice_realw((void**)&mp->d_b_B_array_rotation,b_B_array_rotation,NGLL3*(*NSPEC_OUTER_CORE_ROTATION));
}
}
@@ -347,8 +358,7 @@
realw* minus_deriv_gravity_table,
realw* density_table,
realw* h_wgll_cube,
- int* NRAD_GRAVITY
- ) {
+ int* NRAD_GRAVITY) {
TRACE("prepare_fields_gravity_device");
@@ -358,10 +368,7 @@
// no gravity case
// d ln(rho)/dr needed for the no gravity fluid potential
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_d_ln_density_dr_table,
- (*NRAD_GRAVITY)*sizeof(realw)),8000);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_d_ln_density_dr_table, d_ln_density_dr_table,
- (*NRAD_GRAVITY)*sizeof(realw),cudaMemcpyHostToDevice),8001);
+ copy_todevice_realw((void**)&mp->d_d_ln_density_dr_table,d_ln_density_dr_table,(*NRAD_GRAVITY));
}else{
// gravity case
@@ -370,25 +377,10 @@
setConst_wgll_cube(h_wgll_cube,mp);
// prepares gravity arrays
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_minus_rho_g_over_kappa_fluid,
- (*NRAD_GRAVITY)*sizeof(realw)),8000);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_rho_g_over_kappa_fluid, minus_rho_g_over_kappa_fluid,
- (*NRAD_GRAVITY)*sizeof(realw),cudaMemcpyHostToDevice),8001);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_minus_gravity_table,
- (*NRAD_GRAVITY)*sizeof(realw)),8000);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_gravity_table, minus_gravity_table,
- (*NRAD_GRAVITY)*sizeof(realw),cudaMemcpyHostToDevice),8001);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_minus_deriv_gravity_table,
- (*NRAD_GRAVITY)*sizeof(realw)),8000);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_deriv_gravity_table, minus_deriv_gravity_table,
- (*NRAD_GRAVITY)*sizeof(realw),cudaMemcpyHostToDevice),8001);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_density_table,
- (*NRAD_GRAVITY)*sizeof(realw)),8000);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_density_table, density_table,
- (*NRAD_GRAVITY)*sizeof(realw),cudaMemcpyHostToDevice),8001);
+ copy_todevice_realw((void**)&mp->d_minus_rho_g_over_kappa_fluid,minus_rho_g_over_kappa_fluid,(*NRAD_GRAVITY));
+ copy_todevice_realw((void**)&mp->d_minus_gravity_table,minus_gravity_table,(*NRAD_GRAVITY));
+ copy_todevice_realw((void**)&mp->d_minus_deriv_gravity_table,minus_deriv_gravity_table,(*NRAD_GRAVITY));
+ copy_todevice_realw((void**)&mp->d_density_table,density_table,(*NRAD_GRAVITY));
}
}
@@ -418,8 +410,7 @@
realw* factor_common_inner_core,
realw* one_minus_sum_beta_inner_core,
realw* alphaval,realw* betaval,realw* gammaval,
- realw* b_alphaval,realw* b_betaval,realw* b_gammaval
- ) {
+ realw* b_alphaval,realw* b_betaval,realw* b_gammaval) {
TRACE("prepare_fields_attenuat_device");
int R_size1,R_size2,R_size3;
@@ -430,131 +421,61 @@
if( ! mp->attenuation ){ exit_on_cuda_error("prepare_fields_attenuat_device attenuation not properly initialized"); }
// crust_mantle
+ R_size1 = N_SLS*NGLL3*mp->NSPEC_CRUST_MANTLE;
if( mp->attenuation_3D ){
- R_size1 = N_SLS*NGLL3*mp->NSPEC_CRUST_MANTLE;
R_size2 = NGLL3*mp->NSPEC_CRUST_MANTLE;
R_size3 = N_SLS*NGLL3*mp->NSPEC_CRUST_MANTLE;
}else{
- R_size1 = N_SLS*NGLL3*mp->NSPEC_CRUST_MANTLE;
R_size2 = 1*mp->NSPEC_CRUST_MANTLE;
R_size3 = N_SLS*1*mp->NSPEC_CRUST_MANTLE;
}
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_one_minus_sum_beta_crust_mantle,
- R_size2*sizeof(realw)),4430);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta_crust_mantle,one_minus_sum_beta_crust_mantle,
- R_size2*sizeof(realw),cudaMemcpyHostToDevice),4431);
+ copy_todevice_realw((void**)&mp->d_one_minus_sum_beta_crust_mantle,one_minus_sum_beta_crust_mantle,R_size2);
if( ! mp->use_attenuation_mimic ){
// common factor
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_factor_common_crust_mantle,
- R_size3*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_factor_common_crust_mantle,factor_common_crust_mantle,
- R_size3*sizeof(realw),cudaMemcpyHostToDevice),4433);
-
+ copy_todevice_realw((void**)&mp->d_factor_common_crust_mantle,factor_common_crust_mantle,R_size3);
// memory variables
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xx_crust_mantle,
- R_size1*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_yy_crust_mantle,
- R_size1*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xy_crust_mantle,
- R_size1*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xz_crust_mantle,
- R_size1*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_yz_crust_mantle,
- R_size1*sizeof(realw)),4401);
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xx_crust_mantle,R_xx_crust_mantle,
- R_size1*sizeof(realw),cudaMemcpyHostToDevice),4800);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yy_crust_mantle,R_yy_crust_mantle,
- R_size1*sizeof(realw),cudaMemcpyHostToDevice),4800);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xy_crust_mantle,R_xy_crust_mantle,
- R_size1*sizeof(realw),cudaMemcpyHostToDevice),4800);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xz_crust_mantle,R_xz_crust_mantle,
- R_size1*sizeof(realw),cudaMemcpyHostToDevice),4800);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz_crust_mantle,R_yz_crust_mantle,
- R_size1*sizeof(realw),cudaMemcpyHostToDevice),4800);
+ copy_todevice_realw((void**)&mp->d_R_xx_crust_mantle,R_xx_crust_mantle,R_size1);
+ copy_todevice_realw((void**)&mp->d_R_yy_crust_mantle,R_yy_crust_mantle,R_size1);
+ copy_todevice_realw((void**)&mp->d_R_xy_crust_mantle,R_xy_crust_mantle,R_size1);
+ copy_todevice_realw((void**)&mp->d_R_xz_crust_mantle,R_xz_crust_mantle,R_size1);
+ copy_todevice_realw((void**)&mp->d_R_yz_crust_mantle,R_yz_crust_mantle,R_size1);
}
// inner_core
+ R_size1 = N_SLS*NGLL3*mp->NSPEC_INNER_CORE;
if( mp->attenuation_3D ){
- R_size1 = N_SLS*NGLL3*mp->NSPEC_INNER_CORE;
R_size2 = NGLL3*mp->NSPEC_INNER_CORE;
R_size3 = N_SLS*NGLL3*mp->NSPEC_INNER_CORE;
}else{
- R_size1 = N_SLS*NGLL3*mp->NSPEC_INNER_CORE;
R_size2 = 1*mp->NSPEC_INNER_CORE;
R_size3 = N_SLS*1*mp->NSPEC_INNER_CORE;
}
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_one_minus_sum_beta_inner_core,
- R_size2*sizeof(realw)),4430);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta_inner_core,one_minus_sum_beta_inner_core,
- R_size2*sizeof(realw),cudaMemcpyHostToDevice),4431);
+ copy_todevice_realw((void**)&mp->d_one_minus_sum_beta_inner_core,one_minus_sum_beta_inner_core,R_size2);
if( ! mp->use_attenuation_mimic ){
// common factor
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_factor_common_inner_core,
- R_size3*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_factor_common_inner_core,factor_common_inner_core,
- R_size3*sizeof(realw),cudaMemcpyHostToDevice),4433);
-
+ copy_todevice_realw((void**)&mp->d_factor_common_inner_core,factor_common_inner_core,R_size3);
// memory variables
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xx_inner_core,
- R_size1*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_yy_inner_core,
- R_size1*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xy_inner_core,
- R_size1*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xz_inner_core,
- R_size1*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_yz_inner_core,
- R_size1*sizeof(realw)),4401);
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xx_inner_core,R_xx_inner_core,
- R_size1*sizeof(realw),cudaMemcpyHostToDevice),4402);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yy_inner_core,R_yy_inner_core,
- R_size1*sizeof(realw),cudaMemcpyHostToDevice),4402);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xy_inner_core,R_xy_inner_core,
- R_size1*sizeof(realw),cudaMemcpyHostToDevice),4402);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xz_inner_core,R_xz_inner_core,
- R_size1*sizeof(realw),cudaMemcpyHostToDevice),4402);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz_inner_core,R_yz_inner_core,
- R_size1*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ copy_todevice_realw((void**)&mp->d_R_xx_inner_core,R_xx_inner_core,R_size1);
+ copy_todevice_realw((void**)&mp->d_R_yy_inner_core,R_yy_inner_core,R_size1);
+ copy_todevice_realw((void**)&mp->d_R_xy_inner_core,R_xy_inner_core,R_size1);
+ copy_todevice_realw((void**)&mp->d_R_xz_inner_core,R_xz_inner_core,R_size1);
+ copy_todevice_realw((void**)&mp->d_R_yz_inner_core,R_yz_inner_core,R_size1);
}
// alpha,beta,gamma factors
- print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_alphaval),
- N_SLS*sizeof(realw)),4434);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_alphaval ,alphaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4435);
+ copy_todevice_realw((void**)&mp->d_alphaval,alphaval,N_SLS);
+ copy_todevice_realw((void**)&mp->d_betaval,betaval,N_SLS);
+ copy_todevice_realw((void**)&mp->d_gammaval,gammaval,N_SLS);
- print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_betaval),
- N_SLS*sizeof(realw)),4436);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_betaval ,betaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4437);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_gammaval),
- N_SLS*sizeof(realw)),4438);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaval ,gammaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4439);
-
if( mp->simulation_type == 3 ){
// alpha,beta,gamma factors for backward fields
- print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_b_alphaval),
- N_SLS*sizeof(realw)),5434);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5435);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_b_betaval),
- N_SLS*sizeof(realw)),5436);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5437);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_b_gammaval),
- N_SLS*sizeof(realw)),5438);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5439);
+ copy_todevice_realw((void**)&mp->d_b_alphaval,b_alphaval,N_SLS);
+ copy_todevice_realw((void**)&mp->d_b_betaval,b_betaval,N_SLS);
+ copy_todevice_realw((void**)&mp->d_b_gammaval,b_gammaval,N_SLS);
}
}
@@ -590,8 +511,7 @@
realw* b_epsilondev_xz_inner_core,
realw* b_epsilondev_yz_inner_core,
realw* eps_trace_over_3_inner_core,
- realw* b_eps_trace_over_3_inner_core
- ) {
+ realw* b_eps_trace_over_3_inner_core) {
TRACE("prepare_fields_strain_device");
int R_size,size_strain_only;
@@ -603,125 +523,48 @@
// crust_mantle
R_size = NGLL3*mp->NSPEC_CRUST_MANTLE;
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xx_crust_mantle,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_yy_crust_mantle,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xy_crust_mantle,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xz_crust_mantle,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_yz_crust_mantle,
- R_size*sizeof(realw)),4432);
+ copy_todevice_realw((void**)&mp->d_epsilondev_xx_crust_mantle,epsilondev_xx_crust_mantle,R_size);
+ copy_todevice_realw((void**)&mp->d_epsilondev_yy_crust_mantle,epsilondev_yy_crust_mantle,R_size);
+ copy_todevice_realw((void**)&mp->d_epsilondev_xy_crust_mantle,epsilondev_xy_crust_mantle,R_size);
+ copy_todevice_realw((void**)&mp->d_epsilondev_xz_crust_mantle,epsilondev_xz_crust_mantle,R_size);
+ copy_todevice_realw((void**)&mp->d_epsilondev_yz_crust_mantle,epsilondev_yz_crust_mantle,R_size);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx_crust_mantle,epsilondev_xx_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yy_crust_mantle,epsilondev_yy_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xy_crust_mantle,epsilondev_xy_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xz_crust_mantle,epsilondev_xz_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz_crust_mantle,epsilondev_yz_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
-
// strain
size_strain_only = NGLL3*(mp->NSPEC_CRUST_MANTLE_STRAIN_ONLY);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_eps_trace_over_3_crust_mantle,
- size_strain_only*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_eps_trace_over_3_crust_mantle,eps_trace_over_3_crust_mantle,
- size_strain_only*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ copy_todevice_realw((void**)&mp->d_eps_trace_over_3_crust_mantle,eps_trace_over_3_crust_mantle,size_strain_only);
// backward/reconstructed fields
if( mp->simulation_type == 3 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xx_crust_mantle,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_yy_crust_mantle,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xy_crust_mantle,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xz_crust_mantle,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_yz_crust_mantle,
- R_size*sizeof(realw)),4432);
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx_crust_mantle,b_epsilondev_xx_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy_crust_mantle,b_epsilondev_yy_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy_crust_mantle,b_epsilondev_xy_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz_crust_mantle,b_epsilondev_xz_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz_crust_mantle,b_epsilondev_yz_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
-
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_eps_trace_over_3_crust_mantle,
- R_size*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_eps_trace_over_3_crust_mantle,b_eps_trace_over_3_crust_mantle,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_xx_crust_mantle,b_epsilondev_xx_crust_mantle,R_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_yy_crust_mantle,b_epsilondev_yy_crust_mantle,R_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_xy_crust_mantle,b_epsilondev_xy_crust_mantle,R_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_xz_crust_mantle,b_epsilondev_xz_crust_mantle,R_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_yz_crust_mantle,b_epsilondev_yz_crust_mantle,R_size);
+ //strain
+ copy_todevice_realw((void**)&mp->d_b_eps_trace_over_3_crust_mantle,b_eps_trace_over_3_crust_mantle,R_size);
}
// inner_core
R_size = NGLL3*mp->NSPEC_INNER_CORE;
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xx_inner_core,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_yy_inner_core,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xy_inner_core,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xz_inner_core,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_yz_inner_core,
- R_size*sizeof(realw)),4432);
+ copy_todevice_realw((void**)&mp->d_epsilondev_xx_inner_core,epsilondev_xx_inner_core,R_size);
+ copy_todevice_realw((void**)&mp->d_epsilondev_yy_inner_core,epsilondev_yy_inner_core,R_size);
+ copy_todevice_realw((void**)&mp->d_epsilondev_xy_inner_core,epsilondev_xy_inner_core,R_size);
+ copy_todevice_realw((void**)&mp->d_epsilondev_xz_inner_core,epsilondev_xz_inner_core,R_size);
+ copy_todevice_realw((void**)&mp->d_epsilondev_yz_inner_core,epsilondev_yz_inner_core,R_size);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx_inner_core,epsilondev_xx_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yy_inner_core,epsilondev_yy_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xy_inner_core,epsilondev_xy_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xz_inner_core,epsilondev_xz_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz_inner_core,epsilondev_yz_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
-
// strain
size_strain_only = NGLL3*(mp->NSPEC_INNER_CORE_STRAIN_ONLY);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_eps_trace_over_3_inner_core,
- size_strain_only*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_eps_trace_over_3_inner_core,eps_trace_over_3_inner_core,
- size_strain_only*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ copy_todevice_realw((void**)&mp->d_eps_trace_over_3_inner_core,eps_trace_over_3_inner_core,size_strain_only);
+
// backward/reconstructed fields
if( mp->simulation_type == 3 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xx_inner_core,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_yy_inner_core,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xy_inner_core,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xz_inner_core,
- R_size*sizeof(realw)),4432);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_yz_inner_core,
- R_size*sizeof(realw)),4432);
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx_inner_core,b_epsilondev_xx_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy_inner_core,b_epsilondev_yy_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy_inner_core,b_epsilondev_xy_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz_inner_core,b_epsilondev_xz_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz_inner_core,b_epsilondev_yz_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
-
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_eps_trace_over_3_inner_core,
- R_size*sizeof(realw)),4401);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_eps_trace_over_3_inner_core,b_eps_trace_over_3_inner_core,
- R_size*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_xx_inner_core,b_epsilondev_xx_inner_core,R_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_yy_inner_core,b_epsilondev_yy_inner_core,R_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_xy_inner_core,b_epsilondev_xy_inner_core,R_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_xz_inner_core,b_epsilondev_xz_inner_core,R_size);
+ copy_todevice_realw((void**)&mp->d_b_epsilondev_yz_inner_core,b_epsilondev_yz_inner_core,R_size);
+ // strain
+ copy_todevice_realw((void**)&mp->d_b_eps_trace_over_3_inner_core,b_eps_trace_over_3_inner_core,R_size);
}
}
@@ -761,8 +604,7 @@
realw* jacobian2D_xmin_outer_core, realw* jacobian2D_xmax_outer_core,
realw* jacobian2D_ymin_outer_core, realw* jacobian2D_ymax_outer_core,
realw* jacobian2D_bottom_outer_core,
- realw* vp_outer_core
- ) {
+ realw* vp_outer_core) {
TRACE("prepare_fields_absorb_device");
int size;
@@ -780,65 +622,24 @@
// vp & vs
size = NGLL3*(mp->NSPEC_CRUST_MANTLE);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rho_vp_crust_mantle,
- size*sizeof(realw)),2201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp_crust_mantle,rho_vp_crust_mantle,
- size*sizeof(realw),cudaMemcpyHostToDevice),2202);
+ copy_todevice_realw((void**)&mp->d_rho_vp_crust_mantle,rho_vp_crust_mantle,size);
+ copy_todevice_realw((void**)&mp->d_rho_vs_crust_mantle,rho_vs_crust_mantle,size);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rho_vs_crust_mantle,
- size*sizeof(realw)),2201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs_crust_mantle,rho_vs_crust_mantle,
- size*sizeof(realw),cudaMemcpyHostToDevice),2202);
-
// ijk index arrays
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nkmin_xi_crust_mantle,
- 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_xi_crust_mantle,nkmin_xi_crust_mantle,
- 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
+ copy_todevice_int((void**)&mp->d_nkmin_xi_crust_mantle,nkmin_xi_crust_mantle,2*(*NSPEC2DMAX_XMIN_XMAX_CM));
+ copy_todevice_int((void**)&mp->d_nkmin_eta_crust_mantle,nkmin_eta_crust_mantle,2*(*NSPEC2DMAX_YMIN_YMAX_CM));
+ copy_todevice_int((void**)&mp->d_njmin_crust_mantle,njmin_crust_mantle,2*(*NSPEC2DMAX_XMIN_XMAX_CM));
+ copy_todevice_int((void**)&mp->d_njmax_crust_mantle,njmax_crust_mantle,2*(*NSPEC2DMAX_XMIN_XMAX_CM));
+ copy_todevice_int((void**)&mp->d_nimin_crust_mantle,nimin_crust_mantle,2*(*NSPEC2DMAX_YMIN_YMAX_CM));
+ copy_todevice_int((void**)&mp->d_nimax_crust_mantle,nimax_crust_mantle,2*(*NSPEC2DMAX_YMIN_YMAX_CM));
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nkmin_eta_crust_mantle,
- 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_eta_crust_mantle,nkmin_eta_crust_mantle,
- 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmin_crust_mantle,
- 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_njmin_crust_mantle,njmin_crust_mantle,
- 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmax_crust_mantle,
- 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_njmax_crust_mantle,njmax_crust_mantle,
- 2*(*NSPEC2DMAX_XMIN_XMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nimin_crust_mantle,
- 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nimin_crust_mantle,nimin_crust_mantle,
- 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nimax_crust_mantle,
- 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nimax_crust_mantle,nimax_crust_mantle,
- 2*(*NSPEC2DMAX_YMIN_YMAX_CM)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
-
// xmin
if( mp->nspec2D_xmin_crust_mantle > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_xmin_crust_mantle,
- (mp->nspec2D_xmin_crust_mantle)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmin_crust_mantle,ibelm_xmin_crust_mantle,
- (mp->nspec2D_xmin_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_xmin_crust_mantle,
- NDIM*NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_xmin_crust_mantle,normal_xmin_crust_mantle,
- NDIM*NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmin_crust_mantle,
- NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmin_crust_mantle,jacobian2D_xmin_crust_mantle,
- NGLL2*(mp->nspec2D_xmin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+ copy_todevice_int((void**)&mp->d_ibelm_xmin_crust_mantle,ibelm_xmin_crust_mantle,mp->nspec2D_xmin_crust_mantle);
+ copy_todevice_realw((void**)&mp->d_normal_xmin_crust_mantle,normal_xmin_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_xmin_crust_mantle));
+ copy_todevice_realw((void**)&mp->d_jacobian2D_xmin_crust_mantle,jacobian2D_xmin_crust_mantle,
+ NGLL2*(mp->nspec2D_xmin_crust_mantle));
// boundary buffer
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmin_crust_mantle,
@@ -848,21 +649,11 @@
// xmax
if( mp->nspec2D_xmax_crust_mantle > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_xmax_crust_mantle,
- (mp->nspec2D_xmax_crust_mantle)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmax_crust_mantle,ibelm_xmax_crust_mantle,
- (mp->nspec2D_xmax_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_xmax_crust_mantle,
- NDIM*NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_xmax_crust_mantle,normal_xmax_crust_mantle,
- NDIM*NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmax_crust_mantle,
- NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmax_crust_mantle,jacobian2D_xmax_crust_mantle,
- NGLL2*(mp->nspec2D_xmax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+ copy_todevice_int((void**)&mp->d_ibelm_xmax_crust_mantle,ibelm_xmax_crust_mantle,mp->nspec2D_xmax_crust_mantle);
+ copy_todevice_realw((void**)&mp->d_normal_xmax_crust_mantle,normal_xmax_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_xmax_crust_mantle));
+ copy_todevice_realw((void**)&mp->d_jacobian2D_xmax_crust_mantle,jacobian2D_xmax_crust_mantle,
+ NGLL2*(mp->nspec2D_xmax_crust_mantle));
// boundary buffer
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmax_crust_mantle,
@@ -872,21 +663,11 @@
// ymin
if( mp->nspec2D_ymin_crust_mantle > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_ymin_crust_mantle,
- (mp->nspec2D_ymin_crust_mantle)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymin_crust_mantle,ibelm_ymin_crust_mantle,
- (mp->nspec2D_ymin_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_ymin_crust_mantle,
- NDIM*NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_ymin_crust_mantle,normal_ymin_crust_mantle,
- NDIM*NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymin_crust_mantle,
- NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymin_crust_mantle,jacobian2D_ymin_crust_mantle,
- NGLL2*(mp->nspec2D_ymin_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+ copy_todevice_int((void**)&mp->d_ibelm_ymin_crust_mantle,ibelm_ymin_crust_mantle,mp->nspec2D_ymin_crust_mantle);
+ copy_todevice_realw((void**)&mp->d_normal_ymin_crust_mantle,normal_ymin_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_ymin_crust_mantle));
+ copy_todevice_realw((void**)&mp->d_jacobian2D_ymin_crust_mantle,jacobian2D_ymin_crust_mantle,
+ NGLL2*(mp->nspec2D_ymin_crust_mantle));
// boundary buffer
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymin_crust_mantle,
@@ -896,21 +677,11 @@
// ymax
if( mp->nspec2D_ymax_crust_mantle > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_ymax_crust_mantle,
- (mp->nspec2D_ymax_crust_mantle)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymax_crust_mantle,ibelm_ymax_crust_mantle,
- (mp->nspec2D_ymax_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_normal_ymax_crust_mantle,
- NDIM*NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_ymax_crust_mantle,normal_ymax_crust_mantle,
- NDIM*NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymax_crust_mantle,
- NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymax_crust_mantle,jacobian2D_ymax_crust_mantle,
- NGLL2*(mp->nspec2D_ymax_crust_mantle)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+ copy_todevice_int((void**)&mp->d_ibelm_ymax_crust_mantle,ibelm_ymax_crust_mantle,mp->nspec2D_ymax_crust_mantle);
+ copy_todevice_realw((void**)&mp->d_normal_ymax_crust_mantle,normal_ymax_crust_mantle,
+ NDIM*NGLL2*(mp->nspec2D_ymax_crust_mantle));
+ copy_todevice_realw((void**)&mp->d_jacobian2D_ymax_crust_mantle,jacobian2D_ymax_crust_mantle,
+ NGLL2*(mp->nspec2D_ymax_crust_mantle));
// boundary buffer
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymax_crust_mantle,
@@ -928,54 +699,21 @@
// vp
size = NGLL3*(mp->NSPEC_OUTER_CORE);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_vp_outer_core,
- size*sizeof(realw)),2201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_vp_outer_core,vp_outer_core,
- size*sizeof(realw),cudaMemcpyHostToDevice),2202);
+ copy_todevice_realw((void**)&mp->d_vp_outer_core,vp_outer_core,size);
// ijk index arrays
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nkmin_xi_outer_core,
- 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_xi_outer_core,nkmin_xi_outer_core,
- 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
+ copy_todevice_int((void**)&mp->d_nkmin_xi_outer_core,nkmin_xi_outer_core,2*(*NSPEC2DMAX_XMIN_XMAX_OC));
+ copy_todevice_int((void**)&mp->d_nkmin_eta_outer_core,nkmin_eta_outer_core,2*(*NSPEC2DMAX_YMIN_YMAX_OC));
+ copy_todevice_int((void**)&mp->d_njmin_outer_core,njmin_outer_core,2*(*NSPEC2DMAX_XMIN_XMAX_OC));
+ copy_todevice_int((void**)&mp->d_njmax_outer_core,njmax_outer_core,2*(*NSPEC2DMAX_XMIN_XMAX_OC));
+ copy_todevice_int((void**)&mp->d_nimin_outer_core,nimin_outer_core,2*(*NSPEC2DMAX_YMIN_YMAX_OC));
+ copy_todevice_int((void**)&mp->d_nimax_outer_core,nimax_outer_core,2*(*NSPEC2DMAX_YMIN_YMAX_OC));
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nkmin_eta_outer_core,
- 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nkmin_eta_outer_core,nkmin_eta_outer_core,
- 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmin_outer_core,
- 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_njmin_outer_core,njmin_outer_core,
- 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_njmax_outer_core,
- 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_njmax_outer_core,njmax_outer_core,
- 2*(*NSPEC2DMAX_XMIN_XMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nimin_outer_core,
- 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nimin_outer_core,nimin_outer_core,
- 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nimax_outer_core,
- 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nimax_outer_core,nimax_outer_core,
- 2*(*NSPEC2DMAX_YMIN_YMAX_OC)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
// xmin
if( mp->nspec2D_xmin_outer_core > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_xmin_outer_core,
- (mp->nspec2D_xmin_outer_core)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmin_outer_core,ibelm_xmin_outer_core,
- (mp->nspec2D_xmin_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmin_outer_core,
- NGLL2*(mp->nspec2D_xmin_outer_core)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmin_outer_core,jacobian2D_xmin_outer_core,
- NGLL2*(mp->nspec2D_xmin_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+ copy_todevice_int((void**)&mp->d_ibelm_xmin_outer_core,ibelm_xmin_outer_core,mp->nspec2D_xmin_outer_core);
+ copy_todevice_realw((void**)&mp->d_jacobian2D_xmin_outer_core,jacobian2D_xmin_outer_core,
+ NGLL2*(mp->nspec2D_xmin_outer_core));
// boundary buffer
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmin_outer_core,
@@ -985,16 +723,9 @@
// xmax
if( mp->nspec2D_xmax_outer_core > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_xmax_outer_core,
- (mp->nspec2D_xmax_outer_core)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_xmax_outer_core,ibelm_xmax_outer_core,
- (mp->nspec2D_xmax_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_xmax_outer_core,
- NGLL2*(mp->nspec2D_xmax_outer_core)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_xmax_outer_core,jacobian2D_xmax_outer_core,
- NGLL2*(mp->nspec2D_xmax_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+ copy_todevice_int((void**)&mp->d_ibelm_xmax_outer_core,ibelm_xmax_outer_core,mp->nspec2D_xmax_outer_core);
+ copy_todevice_realw((void**)&mp->d_jacobian2D_xmax_outer_core,jacobian2D_xmax_outer_core,
+ NGLL2*(mp->nspec2D_xmax_outer_core));
// boundary buffer
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_xmax_outer_core,
@@ -1004,16 +735,9 @@
// ymin
if( mp->nspec2D_ymin_outer_core > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_ymin_outer_core,
- (mp->nspec2D_ymin_outer_core)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymin_outer_core,ibelm_ymin_outer_core,
- (mp->nspec2D_ymin_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymin_outer_core,
- NGLL2*(mp->nspec2D_ymin_outer_core)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymin_outer_core,jacobian2D_ymin_outer_core,
- NGLL2*(mp->nspec2D_ymin_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+ copy_todevice_int((void**)&mp->d_ibelm_ymin_outer_core,ibelm_ymin_outer_core,mp->nspec2D_ymin_outer_core);
+ copy_todevice_realw((void**)&mp->d_jacobian2D_ymin_outer_core,jacobian2D_ymin_outer_core,
+ NGLL2*(mp->nspec2D_ymin_outer_core));
// boundary buffer
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymin_outer_core,
@@ -1023,16 +747,9 @@
// ymax
if( mp->nspec2D_ymax_outer_core > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_ymax_outer_core,
- (mp->nspec2D_ymax_outer_core)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_ymax_outer_core,ibelm_ymax_outer_core,
- (mp->nspec2D_ymax_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_ymax_outer_core,
- NGLL2*(mp->nspec2D_ymax_outer_core)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_ymax_outer_core,jacobian2D_ymax_outer_core,
- NGLL2*(mp->nspec2D_ymax_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+ copy_todevice_int((void**)&mp->d_ibelm_ymax_outer_core,ibelm_ymax_outer_core,mp->nspec2D_ymax_outer_core);
+ copy_todevice_realw((void**)&mp->d_jacobian2D_ymax_outer_core,jacobian2D_ymax_outer_core,
+ NGLL2*(mp->nspec2D_ymax_outer_core));
// boundary buffer
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_ymax_outer_core,
@@ -1042,16 +759,9 @@
// zmin
if( mp->nspec2D_zmin_outer_core > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_zmin_outer_core,
- (mp->nspec2D_zmin_outer_core)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_zmin_outer_core,ibelm_bottom_outer_core,
- (mp->nspec2D_zmin_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_jacobian2D_zmin_outer_core,
- NGLL2*(mp->nspec2D_zmin_outer_core)*sizeof(realw)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_zmin_outer_core,jacobian2D_bottom_outer_core,
- NGLL2*(mp->nspec2D_zmin_outer_core)*sizeof(realw),cudaMemcpyHostToDevice),1202);
-
+ copy_todevice_int((void**)&mp->d_ibelm_zmin_outer_core,ibelm_bottom_outer_core,mp->nspec2D_zmin_outer_core);
+ copy_todevice_realw((void**)&mp->d_jacobian2D_zmin_outer_core,jacobian2D_bottom_outer_core,
+ NGLL2*(mp->nspec2D_zmin_outer_core));
// boundary buffer
if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_absorb_zmin_outer_core,
@@ -1081,8 +791,7 @@
int* num_interfaces_outer_core,
int* max_nibool_interfaces_outer_core,
int* nibool_interfaces_outer_core,
- int* ibool_interfaces_outer_core
- ){
+ int* ibool_interfaces_outer_core){
TRACE("prepare_mpi_buffers_device");
@@ -1095,16 +804,11 @@
mp->max_nibool_interfaces_crust_mantle = *max_nibool_interfaces_crust_mantle;
if( mp->num_interfaces_crust_mantle > 0 ){
// number of ibool entries array
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_crust_mantle,
- (mp->num_interfaces_crust_mantle)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_crust_mantle,nibool_interfaces_crust_mantle,
- (mp->num_interfaces_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
+ copy_todevice_int((void**)&mp->d_nibool_interfaces_crust_mantle,nibool_interfaces_crust_mantle,
+ mp->num_interfaces_crust_mantle);
// ibool entries (iglob indices) values on interface
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_crust_mantle,
- (mp->num_interfaces_crust_mantle)*(mp->max_nibool_interfaces_crust_mantle)*sizeof(int)),1203);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,
- (mp->num_interfaces_crust_mantle)*(mp->max_nibool_interfaces_crust_mantle)*sizeof(int),
- cudaMemcpyHostToDevice),1204);
+ copy_todevice_int((void**)&mp->d_ibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,
+ (mp->num_interfaces_crust_mantle)*(mp->max_nibool_interfaces_crust_mantle));
// allocates mpi buffer for exchange with cpu
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer_crust_mantle),
NDIM*(mp->max_nibool_interfaces_crust_mantle)*(mp->num_interfaces_crust_mantle)*sizeof(realw)),4004);
@@ -1115,16 +819,11 @@
mp->max_nibool_interfaces_inner_core = *max_nibool_interfaces_inner_core;
if( mp->num_interfaces_inner_core > 0 ){
// number of ibool entries array
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_inner_core,
- (mp->num_interfaces_inner_core)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_inner_core,nibool_interfaces_inner_core,
- (mp->num_interfaces_inner_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
+ copy_todevice_int((void**)&mp->d_nibool_interfaces_inner_core,nibool_interfaces_inner_core,
+ mp->num_interfaces_inner_core);
// ibool entries (iglob indices) values on interface
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_inner_core,
- (mp->num_interfaces_inner_core)*(mp->max_nibool_interfaces_inner_core)*sizeof(int)),1203);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_inner_core,ibool_interfaces_inner_core,
- (mp->num_interfaces_inner_core)*(mp->max_nibool_interfaces_inner_core)*sizeof(int),
- cudaMemcpyHostToDevice),1204);
+ copy_todevice_int((void**)&mp->d_ibool_interfaces_inner_core,ibool_interfaces_inner_core,
+ (mp->num_interfaces_inner_core)*(mp->max_nibool_interfaces_inner_core));
// allocates mpi buffer for exchange with cpu
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer_inner_core),
NDIM*(mp->max_nibool_interfaces_inner_core)*(mp->num_interfaces_inner_core)*sizeof(realw)),4004);
@@ -1136,22 +835,15 @@
mp->max_nibool_interfaces_outer_core = *max_nibool_interfaces_outer_core;
if( mp->num_interfaces_outer_core > 0 ){
// number of ibool entries array
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_outer_core,
- (mp->num_interfaces_outer_core)*sizeof(int)),1201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_outer_core,nibool_interfaces_outer_core,
- (mp->num_interfaces_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
+ copy_todevice_int((void**)&mp->d_nibool_interfaces_outer_core,nibool_interfaces_outer_core,
+ mp->num_interfaces_outer_core);
// ibool entries (iglob indices) values on interface
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_outer_core,
- (mp->num_interfaces_outer_core)*(mp->max_nibool_interfaces_outer_core)*sizeof(int)),1203);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_outer_core,ibool_interfaces_outer_core,
- (mp->num_interfaces_outer_core)*(mp->max_nibool_interfaces_outer_core)*sizeof(int),
- cudaMemcpyHostToDevice),1204);
+ copy_todevice_int((void**)&mp->d_ibool_interfaces_outer_core,ibool_interfaces_outer_core,
+ (mp->num_interfaces_outer_core)*(mp->max_nibool_interfaces_outer_core));
// allocates mpi buffer for exchange with cpu
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer_outer_core),
(mp->max_nibool_interfaces_outer_core)*(mp->num_interfaces_outer_core)*sizeof(realw)),4004);
}
-
-
}
/* ----------------------------------------------------------------------------------------------- */
@@ -1164,9 +856,9 @@
extern "C"
void FC_FUNC_(prepare_fields_noise_device,
PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
- int* nspec_top,
- int* ibelm_top_crust_mantle,
+ int* NSPEC_TOP,
int* NSTEP,
+ int* h_ibelm_top_crust_mantle,
realw* noise_sourcearray,
realw* normal_x_noise,
realw* normal_y_noise,
@@ -1179,58 +871,34 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
// free surface
- mp->nspec_top = *nspec_top;
- if( mp->nspec_top > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_top_crust_mantle,
- mp->nspec_top*sizeof(int)),7001);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_top_crust_mantle,ibelm_top_crust_mantle,
- mp->nspec_top*sizeof(int),cudaMemcpyHostToDevice),7002);
-
+ mp->nspec2D_top_crust_mantle = *NSPEC_TOP;
+ if( mp->nspec2D_top_crust_mantle > 0 ){
+ // note: d_ibelm_top_crust_mantle will only be needed for noise computations
+ copy_todevice_int((void**)&mp->d_ibelm_top_crust_mantle,h_ibelm_top_crust_mantle,mp->nspec2D_top_crust_mantle);
// alloc storage for the surface buffer to be copied
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie,
- NDIM*NGLL2*(mp->nspec_top)*sizeof(realw)),7005);
+ NDIM*NGLL2*(mp->nspec2D_top_crust_mantle)*sizeof(realw)),7005);
}else{
// for global mesh: each crust/mantle slice should have at top a free surface
- exit_on_cuda_error("prepare_fields_noise_device nspec_top not properly initialized");
+ exit_on_cuda_error("prepare_fields_noise_device NSPEC_TOP not properly initialized");
}
// prepares noise source array
if( mp->noise_tomography == 1 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
- NDIM*NGLL3*(*NSTEP)*sizeof(realw)),7101);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_sourcearray,noise_sourcearray,
- NDIM*NGLL3*(*NSTEP)*sizeof(realw),cudaMemcpyHostToDevice),7102);
+ copy_todevice_realw((void**)&mp->d_noise_sourcearray,noise_sourcearray,NDIM*NGLL3*(*NSTEP));
}
// prepares noise directions
if( mp->noise_tomography > 1 ){
- int nface_size = NGLL2*(mp->nspec_top);
+ int nface_size = NGLL2*(mp->nspec2D_top_crust_mantle);
// allocates memory on GPU
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_x_noise,
- nface_size*sizeof(realw)),7301);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7306);
+ copy_todevice_realw((void**)&mp->d_normal_x_noise,normal_x_noise,nface_size);
+ copy_todevice_realw((void**)&mp->d_normal_y_noise,normal_y_noise,nface_size);
+ copy_todevice_realw((void**)&mp->d_normal_z_noise,normal_z_noise,nface_size);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_y_noise,
- nface_size*sizeof(realw)),7302);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7307);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise,
- nface_size*sizeof(realw)),7303);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7308);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise,
- nface_size*sizeof(realw)),7304);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7309);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_jacobian2D_top_crust_mantle,
- nface_size*sizeof(realw)),7305);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_top_crust_mantle, jacobian2D_top_crust_mantle,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7310);
+ copy_todevice_realw((void**)&mp->d_mask_noise,mask_noise,nface_size);
+ copy_todevice_realw((void**)&mp->d_jacobian2D_top_crust_mantle,jacobian2D_top_crust_mantle,nface_size);
}
// prepares noise strength kernel
@@ -1251,6 +919,46 @@
/* ----------------------------------------------------------------------------------------------- */
+// OCEANS
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_oceans_device,
+ PREPARE_OCEANS_DEVICE)(long* Mesh_pointer_f,
+ int* npoin_oceans,
+ realw* h_rmass_ocean_load_selected,
+ int* h_iglob_ocean_load,
+ realw* h_normal_ocean_load) {
+
+ TRACE("prepare_oceans_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // arrays with global points on ocean surface
+ mp->npoin_oceans = *npoin_oceans;
+
+ // checks for global partitions, each slice must have a top surface with points on it
+ if( mp->npoin_oceans == 0 ){ exit_on_cuda_error("prepare_oceans_device has zero npoin_oceans"); }
+
+ // mass matrix
+ copy_todevice_realw((void**)&mp->d_rmass_ocean_load,h_rmass_ocean_load_selected,mp->npoin_oceans);
+
+ // global point indices
+ copy_todevice_int((void**)&mp->d_iglob_ocean_load,h_iglob_ocean_load,mp->npoin_oceans);
+
+ // normals
+ copy_todevice_realw((void**)&mp->d_normal_ocean_load,h_normal_ocean_load,NDIM*mp->npoin_oceans);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_oceans_device");
+#endif
+}
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
// Earth regions
// CRUST / MANTLE
@@ -1270,9 +978,6 @@
realw* h_rmassx,
realw* h_rmassy,
realw* h_rmassz,
- realw* h_normal_top_crust_mantle,
- int* h_ibelm_top_crust_mantle,
- int* h_ibelm_bottom_crust_mantle,
int* h_ibool,
realw* h_xstore, realw* h_ystore, realw* h_zstore,
int* h_ispec_is_tiso,
@@ -1287,8 +992,8 @@
int* phase_ispec_inner,
int* nspec_outer,
int* nspec_inner,
- int* NSPEC2D_TOP_CM,
int* NSPEC2D_BOTTOM_CM,
+ int* h_ibelm_bottom_crust_mantle,
int* NCHUNKS_VAL,
int* num_colors_outer,
int* num_colors_inner,
@@ -1336,20 +1041,19 @@
}
// global indexing
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_crust_mantle, size_padded*sizeof(int)),1021);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_crust_mantle, h_ibool,
- NGLL3*(mp->NSPEC_CRUST_MANTLE)*sizeof(int),cudaMemcpyHostToDevice),1022);
+ copy_todevice_int((void**)&mp->d_ibool_crust_mantle,h_ibool,NGLL3*(mp->NSPEC_CRUST_MANTLE));
+// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_crust_mantle, size_padded*sizeof(int)),1021);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_crust_mantle, h_ibool,
+// NGLL3*(mp->NSPEC_CRUST_MANTLE)*sizeof(int),cudaMemcpyHostToDevice),1022);
+
// transverse isotropic elements
// only needed if not anisotropic 3D mantle
if( ! mp->anisotropic_3D_mantle ){
// no anisotropy
// transverse isotropy flag
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_tiso_crust_mantle,
- (mp->NSPEC_CRUST_MANTLE)*sizeof(int)),1025);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_tiso_crust_mantle, h_ispec_is_tiso,
- (mp->NSPEC_CRUST_MANTLE)*sizeof(int),cudaMemcpyHostToDevice),1025);
+ copy_todevice_int((void**)&mp->d_ispec_is_tiso_crust_mantle,h_ispec_is_tiso,mp->NSPEC_CRUST_MANTLE);
// kappavstore, kappahstore
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappavstore_crust_mantle, size_padded*sizeof(realw)),1010);
@@ -1482,49 +1186,27 @@
// mesh locations
// ystore & zstore needed for tiso elements
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ystore_crust_mantle),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ystore_crust_mantle,h_ystore,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_zstore_crust_mantle),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_zstore_crust_mantle,h_zstore,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ copy_todevice_realw((void**)&mp->d_ystore_crust_mantle,h_ystore,size_glob);
+ copy_todevice_realw((void**)&mp->d_zstore_crust_mantle,h_zstore,size_glob);
+
// xstore only needed when gravity is on
if( mp->gravity ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_xstore_crust_mantle),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_xstore_crust_mantle,h_xstore,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ copy_todevice_realw((void**)&mp->d_xstore_crust_mantle,h_xstore,size_glob);
}
// inner/outer elements
mp->num_phase_ispec_crust_mantle = *num_phase_ispec;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_crust_mantle),
- mp->num_phase_ispec_crust_mantle*2*sizeof(int)),2008);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_crust_mantle,phase_ispec_inner,
- mp->num_phase_ispec_crust_mantle*2*sizeof(int),cudaMemcpyHostToDevice),2101);
+ copy_todevice_int((void**)&mp->d_phase_ispec_inner_crust_mantle,phase_ispec_inner,
+ mp->num_phase_ispec_crust_mantle*2);
mp->nspec_outer_crust_mantle = *nspec_outer;
mp->nspec_inner_crust_mantle = *nspec_inner;
- // CMB/ocean coupling
- mp->nspec2D_top_crust_mantle = *NSPEC2D_TOP_CM;
+ // CMB/fluid outer core coupling
mp->nspec2D_bottom_crust_mantle = *NSPEC2D_BOTTOM_CM;
- int size_tcm = NGLL2*(mp->nspec2D_top_crust_mantle);
+ copy_todevice_int((void**)&mp->d_ibelm_bottom_crust_mantle,h_ibelm_bottom_crust_mantle,mp->nspec2D_bottom_crust_mantle);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_normal_top_crust_mantle),
- sizeof(realw)*NDIM*size_tcm),40020);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_top_crust_mantle,h_normal_top_crust_mantle,
- sizeof(realw)*NDIM*size_tcm,cudaMemcpyHostToDevice),40030);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ibelm_top_crust_mantle),
- sizeof(int)*(mp->nspec2D_top_crust_mantle)),40021);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ibelm_bottom_crust_mantle),
- sizeof(int)*(mp->nspec2D_bottom_crust_mantle)),40021);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_top_crust_mantle,h_ibelm_top_crust_mantle,
- sizeof(int)*(mp->nspec2D_top_crust_mantle),cudaMemcpyHostToDevice),40031);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_bottom_crust_mantle,h_ibelm_bottom_crust_mantle,
- sizeof(int)*(mp->nspec2D_bottom_crust_mantle),cudaMemcpyHostToDevice),40031);
-
// wavefield
int size = NDIM * mp->NGLOB_CRUST_MANTLE;
@@ -1555,21 +1237,13 @@
// mass matrices
if( *NCHUNKS_VAL != 6 && mp->absorbing_conditions){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmassx_crust_mantle),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmassx_crust_mantle,h_rmassx,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmassy_crust_mantle),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmassy_crust_mantle,h_rmassy,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ copy_todevice_realw((void**)&mp->d_rmassx_crust_mantle,h_rmassx,size_glob);
+ copy_todevice_realw((void**)&mp->d_rmassy_crust_mantle,h_rmassy,size_glob);
}
+ copy_todevice_realw((void**)&mp->d_rmassz_crust_mantle,h_rmassz,size_glob);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmassz_crust_mantle),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmassz_crust_mantle,h_rmassz,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
-
// kernels
if( mp->simulation_type == 3 ){
-
size = NGLL3*(mp->NSPEC_CRUST_MANTLE);
// density kernel
@@ -1626,12 +1300,6 @@
realw* h_gammax, realw* h_gammay, realw* h_gammaz,
realw* h_rho, realw* h_kappav,
realw* h_rmass,
- realw* h_normal_top_outer_core,
- realw* h_normal_bottom_outer_core,
- realw* h_jacobian2D_top_outer_core,
- realw* h_jacobian2D_bottom_outer_core,
- int* h_ibelm_top_outer_core,
- int* h_ibelm_bottom_outer_core,
int* h_ibool,
realw* h_xstore, realw* h_ystore, realw* h_zstore,
int* num_phase_ispec,
@@ -1640,6 +1308,12 @@
int* nspec_inner,
int* NSPEC2D_TOP_OC,
int* NSPEC2D_BOTTOM_OC,
+ realw* h_normal_top_outer_core,
+ realw* h_normal_bottom_outer_core,
+ realw* h_jacobian2D_top_outer_core,
+ realw* h_jacobian2D_bottom_outer_core,
+ int* h_ibelm_top_outer_core,
+ int* h_ibelm_bottom_outer_core,
int* num_colors_outer,
int* num_colors_inner,
int* num_elem_colors) {
@@ -1700,28 +1374,21 @@
}
// global indexing
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_outer_core, size_padded*sizeof(int)),1021);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_outer_core, h_ibool,
- NGLL3*(mp->NSPEC_OUTER_CORE)*sizeof(int),cudaMemcpyHostToDevice),1022);
+ copy_todevice_int((void**)&mp->d_ibool_outer_core,h_ibool,NGLL3*(mp->NSPEC_OUTER_CORE));
+// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_outer_core, size_padded*sizeof(int)),1021);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_outer_core, h_ibool,
+// NGLL3*(mp->NSPEC_OUTER_CORE)*sizeof(int),cudaMemcpyHostToDevice),1022);
+
// mesh locations
// always needed
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_xstore_outer_core),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_xstore_outer_core,h_xstore,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ystore_outer_core),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ystore_outer_core,h_ystore,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_zstore_outer_core),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_zstore_outer_core,h_zstore,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ copy_todevice_realw((void**)&mp->d_xstore_outer_core,h_xstore,size_glob);
+ copy_todevice_realw((void**)&mp->d_ystore_outer_core,h_ystore,size_glob);
+ copy_todevice_realw((void**)&mp->d_zstore_outer_core,h_zstore,size_glob);
// inner/outer elements
mp->num_phase_ispec_outer_core = *num_phase_ispec;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_outer_core),
- mp->num_phase_ispec_outer_core*2*sizeof(int)),2008);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_outer_core,phase_ispec_inner,
- mp->num_phase_ispec_outer_core*2*sizeof(int),cudaMemcpyHostToDevice),2101);
+ copy_todevice_int((void**)&mp->d_phase_ispec_inner_outer_core,phase_ispec_inner,mp->num_phase_ispec_outer_core*2);
mp->nspec_outer_outer_core = *nspec_outer;
mp->nspec_inner_outer_core = *nspec_inner;
@@ -1732,20 +1399,14 @@
int size_toc = NGLL2*(mp->nspec2D_top_outer_core);
int size_boc = NGLL2*(mp->nspec2D_bottom_outer_core);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_normal_top_outer_core),sizeof(realw)*NDIM*size_toc),40020);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_normal_bottom_outer_core),sizeof(realw)*NDIM*size_boc),40021);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_top_outer_core,h_normal_top_outer_core,sizeof(realw)*NDIM*size_toc,cudaMemcpyHostToDevice),40030);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_bottom_outer_core,h_normal_bottom_outer_core,sizeof(realw)*NDIM*size_boc,cudaMemcpyHostToDevice),40031);
+ copy_todevice_realw((void**)&mp->d_normal_top_outer_core,h_normal_top_outer_core,NDIM*size_toc);
+ copy_todevice_realw((void**)&mp->d_normal_bottom_outer_core,h_normal_bottom_outer_core,NDIM*size_boc);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_jacobian2D_top_outer_core),sizeof(realw)*size_toc),40022);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_jacobian2D_bottom_outer_core),sizeof(realw)*size_boc),40023);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_top_outer_core,h_jacobian2D_top_outer_core,sizeof(realw)*size_toc,cudaMemcpyHostToDevice),40032);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_bottom_outer_core,h_jacobian2D_bottom_outer_core,sizeof(realw)*size_boc,cudaMemcpyHostToDevice),40033);
+ copy_todevice_realw((void**)&mp->d_jacobian2D_top_outer_core,h_jacobian2D_top_outer_core,size_toc);
+ copy_todevice_realw((void**)&mp->d_jacobian2D_bottom_outer_core,h_jacobian2D_bottom_outer_core,size_boc);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ibelm_top_outer_core),sizeof(int)*(mp->nspec2D_top_outer_core)),40024);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ibelm_bottom_outer_core),sizeof(int)*(mp->nspec2D_bottom_outer_core)),40025);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_top_outer_core,h_ibelm_top_outer_core,sizeof(int)*(mp->nspec2D_top_outer_core),cudaMemcpyHostToDevice),40034);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_bottom_outer_core,h_ibelm_bottom_outer_core,sizeof(int)*(mp->nspec2D_bottom_outer_core),cudaMemcpyHostToDevice),40035);
+ copy_todevice_int((void**)&mp->d_ibelm_top_outer_core,h_ibelm_top_outer_core,mp->nspec2D_top_outer_core);
+ copy_todevice_int((void**)&mp->d_ibelm_bottom_outer_core,h_ibelm_bottom_outer_core,mp->nspec2D_bottom_outer_core);
// wavefield
int size = mp->NGLOB_OUTER_CORE;
@@ -1773,15 +1434,11 @@
}
#endif
-
// mass matrix
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_outer_core),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_outer_core,h_rmass,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ copy_todevice_realw((void**)&mp->d_rmass_outer_core,h_rmass,size_glob);
// kernels
if( mp->simulation_type == 3 ){
-
size = NGLL3*(mp->NSPEC_OUTER_CORE);
// density kernel
@@ -1820,7 +1477,6 @@
realw* h_gammax, realw* h_gammay, realw* h_gammaz,
realw* h_rho, realw* h_kappav, realw* h_muv,
realw* h_rmass,
- int* h_ibelm_top_inner_core,
int* h_ibool,
realw* h_xstore, realw* h_ystore, realw* h_zstore,
realw *c11store,realw *c12store,realw *c13store,
@@ -1831,6 +1487,7 @@
int* nspec_outer,
int* nspec_inner,
int* NSPEC2D_TOP_IC,
+ int* h_ibelm_top_inner_core,
int* num_colors_outer,
int* num_colors_inner,
int* num_elem_colors) {
@@ -1929,44 +1586,32 @@
}
// global indexing
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_inner_core, size_padded*sizeof(int)),1021);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_inner_core, h_ibool,
- NGLL3*(mp->NSPEC_INNER_CORE)*sizeof(int),cudaMemcpyHostToDevice),1022);
+ copy_todevice_int((void**)&mp->d_ibool_inner_core,h_ibool,NGLL3*(mp->NSPEC_INNER_CORE));
+// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_inner_core, size_padded*sizeof(int)),1021);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_inner_core, h_ibool,
+// NGLL3*(mp->NSPEC_INNER_CORE)*sizeof(int),cudaMemcpyHostToDevice),1022);
+
// fictious element flags
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_idoubling_inner_core,
- mp->NSPEC_INNER_CORE*sizeof(int)),2010);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_idoubling_inner_core, h_idoubling_inner_core,
- mp->NSPEC_INNER_CORE*sizeof(int),cudaMemcpyHostToDevice),2011);
+ copy_todevice_int((void**)&mp->d_idoubling_inner_core,h_idoubling_inner_core,mp->NSPEC_INNER_CORE);
// mesh locations
// only needed when gravity is on
if( mp->gravity ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_xstore_inner_core),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_xstore_inner_core,h_xstore,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ystore_inner_core),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ystore_inner_core,h_ystore,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_zstore_inner_core),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_zstore_inner_core,h_zstore,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ copy_todevice_realw((void**)&mp->d_xstore_inner_core,h_xstore,size_glob);
+ copy_todevice_realw((void**)&mp->d_ystore_inner_core,h_ystore,size_glob);
+ copy_todevice_realw((void**)&mp->d_zstore_inner_core,h_zstore,size_glob);
}
// inner/outer elements
mp->num_phase_ispec_inner_core = *num_phase_ispec;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_inner_core),
- mp->num_phase_ispec_inner_core*2*sizeof(int)),2008);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_inner_core,phase_ispec_inner,
- mp->num_phase_ispec_inner_core*2*sizeof(int),cudaMemcpyHostToDevice),2101);
+ copy_todevice_int((void**)&mp->d_phase_ispec_inner_inner_core,phase_ispec_inner,mp->num_phase_ispec_inner_core*2);
mp->nspec_outer_inner_core = *nspec_outer;
mp->nspec_inner_inner_core = *nspec_inner;
mp->nspec2D_top_inner_core = *NSPEC2D_TOP_IC;
+ copy_todevice_int((void**)&mp->d_ibelm_top_inner_core,h_ibelm_top_inner_core,mp->nspec2D_top_inner_core);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ibelm_top_inner_core),sizeof(int)*(mp->nspec2D_top_inner_core)),40021);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_top_inner_core,h_ibelm_top_inner_core,sizeof(int)*(mp->nspec2D_top_inner_core),cudaMemcpyHostToDevice),40031);
-
// wavefield
int size = NDIM * mp->NGLOB_INNER_CORE;
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_inner_core),sizeof(realw)*size),4001);
@@ -1993,15 +1638,11 @@
}
#endif
-
// mass matrix
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_inner_core),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_inner_core,h_rmass,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ copy_todevice_realw((void**)&mp->d_rmass_inner_core,h_rmass,size_glob);
// kernels
if( mp->simulation_type == 3 ){
-
size = NGLL3*(mp->NSPEC_INNER_CORE);
// density kernel
@@ -2028,44 +1669,10 @@
#endif
}
-/* ----------------------------------------------------------------------------------------------- */
-// OCEANS
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(prepare_oceans_device,
- PREPARE_OCEANS_DEVICE)(long* Mesh_pointer_f,
- realw* h_rmass_ocean_load) {
-
- TRACE("prepare_oceans_device");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-
- // mass matrix
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load),
- sizeof(realw)*mp->NGLOB_CRUST_MANTLE_OCEANS),4501);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,h_rmass_ocean_load,
- sizeof(realw)*mp->NGLOB_CRUST_MANTLE_OCEANS,cudaMemcpyHostToDevice),4502);
-
- // temporary global array: used to synchronize updates on global accel array
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_updated_dof_ocean_load),
- sizeof(int)*mp->NGLOB_CRUST_MANTLE_OCEANS),4503);
- if( mp->simulation_type == 3 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_updated_dof_ocean_load),
- sizeof(int)*mp->NGLOB_CRUST_MANTLE_OCEANS),4504);
- }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_oceans_device");
-#endif
-}
-
-
-
-/* ----------------------------------------------------------------------------------------------- */
-
// cleanup
/* ----------------------------------------------------------------------------------------------- */
@@ -2380,10 +1987,8 @@
if( mp->gravity ){
cudaFree(mp->d_xstore_crust_mantle);
}
- cudaFree(mp->d_phase_ispec_inner_crust_mantle);
- cudaFree(mp->d_normal_top_crust_mantle);
- cudaFree(mp->d_ibelm_top_crust_mantle);
+ cudaFree(mp->d_phase_ispec_inner_crust_mantle);
cudaFree(mp->d_ibelm_bottom_crust_mantle);
cudaFree(mp->d_displ_crust_mantle);
@@ -2467,18 +2072,9 @@
cudaFree(mp->d_gammay_inner_core);
cudaFree(mp->d_gammaz_inner_core);
-
cudaFree(mp->d_muvstore_inner_core);
cudaFree(mp->d_ibool_inner_core);
- if( mp->oceans ){
- cudaFree(mp->d_rmass_ocean_load);
- cudaFree(mp->d_updated_dof_ocean_load);
- if( mp->simulation_type == 3 ){
- cudaFree(mp->d_b_updated_dof_ocean_load);
- }
- }
-
if( mp->gravity ){
cudaFree(mp->d_xstore_inner_core);
cudaFree(mp->d_ystore_inner_core);
@@ -2522,6 +2118,12 @@
}
cudaFree(mp->d_rmass_inner_core);
+ if( mp->oceans ){
+ cudaFree(mp->d_rmass_ocean_load);
+ cudaFree(mp->d_iglob_ocean_load);
+ cudaFree(mp->d_normal_ocean_load);
+ }
+
// releases previous contexts
cudaThreadExit();
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-08-15 19:55:44 UTC (rev 20570)
@@ -81,36 +81,6 @@
void FC_FUNC_(get_free_device_memory,
get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {}
-void FC_FUNC_(check_max_norm_displ_gpu,
- CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {}
-
-void FC_FUNC_(check_max_norm_vector,
- CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {}
-
-void FC_FUNC_(check_max_norm_displ,
- CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {}
-
-void FC_FUNC_(check_max_norm_b_displ_gpu,
- CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {}
-
-void FC_FUNC_(check_max_norm_b_accel_gpu,
- CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {}
-
-void FC_FUNC_(check_max_norm_b_veloc_gpu,
- CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {}
-
-void FC_FUNC_(check_max_norm_b_displ,
- CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {}
-
-void FC_FUNC_(check_max_norm_b_accel,
- CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {}
-
-void FC_FUNC_(check_error_vectors,
- CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {}
-
-void FC_FUNC_(get_max_accel,
- GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {}
-
void FC_FUNC_(check_norm_acoustic_from_device,
CHECK_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
long* Mesh_pointer_f,
@@ -126,7 +96,37 @@
realw* strain_norm2,
long* Mesh_pointer_f) {}
+ void FC_FUNC_(check_max_norm_displ_gpu,
+ CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {}
+ void FC_FUNC_(check_max_norm_vector,
+ CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {}
+
+ void FC_FUNC_(check_max_norm_displ,
+ CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {}
+
+ void FC_FUNC_(check_max_norm_b_displ_gpu,
+ CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {}
+
+ void FC_FUNC_(check_max_norm_b_accel_gpu,
+ CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {}
+
+ void FC_FUNC_(check_max_norm_b_veloc_gpu,
+ CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {}
+
+ void FC_FUNC_(check_max_norm_b_displ,
+ CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {}
+
+ void FC_FUNC_(check_max_norm_b_accel,
+ CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {}
+
+ void FC_FUNC_(check_error_vectors,
+ CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {}
+
+ void FC_FUNC_(get_max_accel,
+ GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {}
+
+
//
// src/cuda/compute_add_sources_elastic_cuda.cu
//
@@ -299,7 +299,7 @@
int* SIMULATION_TYPE_f,
realw* b_deltatover2_F,
int* OCEANS,
- int* NCHUNKS_VAL) {}
+ int* NCHUNKS_VAL) {}
void FC_FUNC_(kernel_3_b_cuda,
KERNEL_3_B_CUDA)(long* Mesh_pointer,
@@ -363,7 +363,6 @@
int* nrec,int* nrec_local, int* nadj_rec_local,
int* NSPEC_CRUST_MANTLE, int* NGLOB_CRUST_MANTLE,
int* NSPEC_CRUST_MANTLE_STRAIN_ONLY,
- int* NGLOB_CRUST_MANTLE_OCEANS,
int* NSPEC_OUTER_CORE, int* NGLOB_OUTER_CORE,
int* NSPEC_INNER_CORE, int* NGLOB_INNER_CORE,
int* NSPEC_INNER_CORE_STRAIN_ONLY,
@@ -396,8 +395,7 @@
realw* b_deltat,
realw* b_A_array_rotation,
realw* b_B_array_rotation,
- int* NSPEC_OUTER_CORE_ROTATION
- ) {}
+ int* NSPEC_OUTER_CORE_ROTATION) {}
void FC_FUNC_(prepare_fields_gravity_device,
PREPARE_FIELDS_gravity_DEVICE)(long* Mesh_pointer_f,
@@ -407,8 +405,7 @@
realw* minus_deriv_gravity_table,
realw* density_table,
realw* h_wgll_cube,
- int* NRAD_GRAVITY
- ) {}
+ int* NRAD_GRAVITY) {}
void FC_FUNC_(prepare_fields_attenuat_device,
PREPARE_FIELDS_ATTENUAT_DEVICE)(long* Mesh_pointer_f,
@@ -427,8 +424,7 @@
realw* factor_common_inner_core,
realw* one_minus_sum_beta_inner_core,
realw* alphaval,realw* betaval,realw* gammaval,
- realw* b_alphaval,realw* b_betaval,realw* b_gammaval
- ) {}
+ realw* b_alphaval,realw* b_betaval,realw* b_gammaval) {}
void FC_FUNC_(prepare_fields_strain_device,
PREPARE_FIELDS_STRAIN_DEVICE)(long* Mesh_pointer_f,
@@ -455,8 +451,7 @@
realw* b_epsilondev_xz_inner_core,
realw* b_epsilondev_yz_inner_core,
realw* eps_trace_over_3_inner_core,
- realw* b_eps_trace_over_3_inner_core
- ) {}
+ realw* b_eps_trace_over_3_inner_core) {}
void FC_FUNC_(prepare_fields_absorb_device,
PREPARE_FIELDS_ABSORB_DEVICE)(long* Mesh_pointer_f,
@@ -487,8 +482,7 @@
realw* jacobian2D_xmin_outer_core, realw* jacobian2D_xmax_outer_core,
realw* jacobian2D_ymin_outer_core, realw* jacobian2D_ymax_outer_core,
realw* jacobian2D_bottom_outer_core,
- realw* vp_outer_core
- ) {}
+ realw* vp_outer_core) {}
void FC_FUNC_(prepare_mpi_buffers_device,
PREPARE_MPI_BUFFERS_DEVICE)(long* Mesh_pointer_f,
@@ -503,14 +497,13 @@
int* num_interfaces_outer_core,
int* max_nibool_interfaces_outer_core,
int* nibool_interfaces_outer_core,
- int* ibool_interfaces_outer_core
- ){}
+ int* ibool_interfaces_outer_core){}
void FC_FUNC_(prepare_fields_noise_device,
PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
- int* nspec_top,
- int* ibelm_top_crust_mantle,
+ int* NSPEC_TOP,
int* NSTEP,
+ int* h_ibelm_top_crust_mantle,
realw* noise_sourcearray,
realw* normal_x_noise,
realw* normal_y_noise,
@@ -518,6 +511,13 @@
realw* mask_noise,
realw* jacobian2D_top_crust_mantle) {}
+void FC_FUNC_(prepare_oceans_device,
+ PREPARE_OCEANS_DEVICE)(long* Mesh_pointer_f,
+ int* npoin_oceans,
+ realw* h_rmass_ocean_load_selected,
+ int* h_iglob_ocean_load,
+ realw* h_normal_ocean_load) {}
+
void FC_FUNC_(prepare_crust_mantle_device,
PREPARE_CRUST_MANTLE_DEVICE)(long* Mesh_pointer_f,
realw* h_xix, realw* h_xiy, realw* h_xiz,
@@ -530,9 +530,6 @@
realw* h_rmassx,
realw* h_rmassy,
realw* h_rmassz,
- realw* h_normal_top_crust_mantle,
- int* h_ibelm_top_crust_mantle,
- int* h_ibelm_bottom_crust_mantle,
int* h_ibool,
realw* h_xstore, realw* h_ystore, realw* h_zstore,
int* h_ispec_is_tiso,
@@ -547,8 +544,8 @@
int* phase_ispec_inner,
int* nspec_outer,
int* nspec_inner,
- int* NSPEC2D_TOP_CM,
int* NSPEC2D_BOTTOM_CM,
+ int* h_ibelm_bottom_crust_mantle,
int* NCHUNKS_VAL,
int* num_colors_outer,
int* num_colors_inner,
@@ -561,12 +558,6 @@
realw* h_gammax, realw* h_gammay, realw* h_gammaz,
realw* h_rho, realw* h_kappav,
realw* h_rmass,
- realw* h_normal_top_outer_core,
- realw* h_normal_bottom_outer_core,
- realw* h_jacobian2D_top_outer_core,
- realw* h_jacobian2D_bottom_outer_core,
- int* h_ibelm_top_outer_core,
- int* h_ibelm_bottom_outer_core,
int* h_ibool,
realw* h_xstore, realw* h_ystore, realw* h_zstore,
int* num_phase_ispec,
@@ -575,6 +566,12 @@
int* nspec_inner,
int* NSPEC2D_TOP_OC,
int* NSPEC2D_BOTTOM_OC,
+ realw* h_normal_top_outer_core,
+ realw* h_normal_bottom_outer_core,
+ realw* h_jacobian2D_top_outer_core,
+ realw* h_jacobian2D_bottom_outer_core,
+ int* h_ibelm_top_outer_core,
+ int* h_ibelm_bottom_outer_core,
int* num_colors_outer,
int* num_colors_inner,
int* num_elem_colors) {}
@@ -586,7 +583,6 @@
realw* h_gammax, realw* h_gammay, realw* h_gammaz,
realw* h_rho, realw* h_kappav, realw* h_muv,
realw* h_rmass,
- int* h_ibelm_top_inner_core,
int* h_ibool,
realw* h_xstore, realw* h_ystore, realw* h_zstore,
realw *c11store,realw *c12store,realw *c13store,
@@ -597,14 +593,11 @@
int* nspec_outer,
int* nspec_inner,
int* NSPEC2D_TOP_IC,
+ int* h_ibelm_top_inner_core,
int* num_colors_outer,
int* num_colors_inner,
int* num_elem_colors) {}
-void FC_FUNC_(prepare_oceans_device,
- PREPARE_OCEANS_DEVICE)(long* Mesh_pointer_f,
- realw* h_rmass_ocean_load) {}
-
void FC_FUNC_(prepare_cleanup_device,
PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f,
int* NCHUNKS_VAL) {}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_MPI_interfaces.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_MPI_interfaces.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_MPI_interfaces.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -284,7 +284,7 @@
integer :: ier
! for central cube buffers
integer :: nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
- nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
+ nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
integer, dimension(:),allocatable :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
integer, dimension(:),allocatable :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
integer, dimension(:),allocatable :: ibelm_top_inner_core
@@ -418,7 +418,7 @@
ibelm_bottom_inner_core(NSPEC2D_BOTTOM_IC), &
stat=ier)
if( ier /= 0 ) call exit_MPI(myrank,'error allocating central cube index arrays')
-
+
! gets coupling arrays for inner core
nspec2D_xmin_inner_core = nspec2D_xmin
nspec2D_xmax_inner_core = nspec2D_xmax
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -106,7 +106,7 @@
iglob = ibool(i,j,k,ispec)
weight = wxgll(i)*wygll(j)*wzgll(k)
-
+
! definition depends if region is fluid or solid
select case( iregion_code)
case( IREGION_CRUST_MANTLE, IREGION_INNER_CORE )
@@ -161,7 +161,7 @@
! loops over surface points
do j = 1,NGLLY
do i = 1,NGLLX
-
+
! if 3D Earth with topography, compute local height of oceans
if( TOPOGRAPHY ) then
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regions_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regions_mesh.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regions_mesh.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -104,7 +104,7 @@
case default
call exit_MPI(myrank,'error ipass value in create_regions_mesh')
end select
- call flush_IMAIN()
+ call flush_IMAIN()
endif
! create the name for the database of the current slide and region
@@ -115,7 +115,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...allocating arrays '
- call flush_IMAIN()
+ call flush_IMAIN()
endif
call crm_allocate_arrays(iregion_code,nspec,ipass, &
NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX, &
@@ -127,7 +127,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...setting up layers '
- call flush_IMAIN()
+ call flush_IMAIN()
endif
call crm_setup_layers(iregion_code,nspec,ipass,NEX_PER_PROC_ETA)
@@ -136,7 +136,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...creating mesh elements '
- call flush_IMAIN()
+ call flush_IMAIN()
endif
call crm_create_elements(iregion_code,nspec,ipass, &
NEX_PER_PROC_XI,NEX_PER_PROC_ETA, &
@@ -151,7 +151,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...creating global addressing'
- call flush_IMAIN()
+ call flush_IMAIN()
endif
call crm_setup_indexing(nspec,nglob_theor,npointot)
@@ -161,7 +161,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...creating MPI buffers'
- call flush_IMAIN()
+ call flush_IMAIN()
endif
call crm_setup_mpi_buffers(npointot,nspec,iregion_code)
@@ -179,7 +179,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...precomputing jacobian'
- call flush_IMAIN()
+ call flush_IMAIN()
endif
call get_jacobian_boundaries(myrank,iboun,nspec,xstore,ystore,zstore, &
dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
@@ -200,7 +200,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...creating chunk buffers'
- call flush_IMAIN()
+ call flush_IMAIN()
endif
call create_chunk_buffers(iregion_code,nspec,ibool,idoubling, &
xstore,ystore,zstore,nglob_theor, &
@@ -218,7 +218,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...preparing MPI interfaces'
- call flush_IMAIN()
+ call flush_IMAIN()
endif
! creates MPI interface arrays
call create_MPI_interfaces(iregion_code)
@@ -245,7 +245,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...element mesh coloring '
- call flush_IMAIN()
+ call flush_IMAIN()
endif
call setup_color_perm(iregion_code)
@@ -277,7 +277,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...creating mass matrix'
- call flush_IMAIN()
+ call flush_IMAIN()
endif
! allocates mass matrices in this slice (will be fully assembled in the solver)
@@ -331,7 +331,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...saving binary files'
- call flush_IMAIN()
+ call flush_IMAIN()
endif
! saves mesh and model parameters
call save_arrays_solver(myrank,nspec,nglob,idoubling,ibool, &
@@ -357,11 +357,11 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...saving boundary mesh files'
- call flush_IMAIN()
+ call flush_IMAIN()
endif
! saves boundary file
call save_arrays_solver_boundary()
-
+
endif
! compute volume, bottom and top area of that part of the slice
@@ -383,7 +383,7 @@
if( myrank == 0) then
write(IMAIN,*)
write(IMAIN,*) ' ...saving AVS mesh files'
- call flush_IMAIN()
+ call flush_IMAIN()
endif
call crm_save_mesh_files(nspec,npointot,iregion_code)
endif
@@ -915,17 +915,18 @@
! time estimate
tCPU = wtime() - time_start
- ! outputs remaining time (poor estimation)
- tCPU = (1.0-(ilayer_loop-ifirst_region+1.0)/(ilast_region-ifirst_region+1.0)) &
- /(ilayer_loop-ifirst_region+1.0)/(ilast_region-ifirst_region+1.0)*tCPU*10.0
-
- write(IMAIN,*) " ",(ilayer_loop-ifirst_region+1.0)/(ilast_region-ifirst_region+1.0) * 100.0," %", &
- " time remaining:", tCPU,"s"
-
! outputs current time on system
call date_and_time(VALUES=tval)
- write(IMAIN,*) " ",tval(5),"h",tval(6),"min",tval(7),".",tval(8),"sec"
+ ! debug: outputs remaining time (poor estimation)
+ !tCPU = (1.0-(ilayer_loop-ifirst_region+1.0)/(ilast_region-ifirst_region+1.0)) &
+ ! /(ilayer_loop-ifirst_region+1.0)/(ilast_region-ifirst_region+1.0)*tCPU*10.0
+
+ ! user output
+ write(IMAIN,'(a,f5.1,a,a,i2.2,a,i2.2,a,i2.2,a)') &
+ " ",(ilayer_loop-ifirst_region+1.0)/(ilast_region-ifirst_region+1.0) * 100.0,"%", &
+ " time: ",tval(5),"h ",tval(6),"min ",tval(7),"sec"
+
! flushes I/O buffer
call flush_IMAIN()
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/get_perm_color.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/get_perm_color.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/get_perm_color.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -75,13 +75,13 @@
!user output
if(myrank == 0) then
write(IMAIN,*) ' colors:'
- write(IMAIN,*) ' number of colors for inner elements = ',nb_colors_inner_elements
- write(IMAIN,*) ' number of colors for outer elements = ',nb_colors_outer_elements
- write(IMAIN,*) ' total number of colors (sum of both) = ', nb_colors_inner_elements + nb_colors_outer_elements
+ write(IMAIN,*) ' number of colors for inner elements = ',nb_colors_inner_elements
+ write(IMAIN,*) ' number of colors for outer elements = ',nb_colors_outer_elements
+ write(IMAIN,*) ' total number of colors (sum of both) = ', nb_colors_inner_elements + nb_colors_outer_elements
write(IMAIN,*) ' elements:'
- write(IMAIN,*) ' number of elements for outer elements = ',nspec_outer
- write(IMAIN,*) ' number of elements for inner elements = ',nspec_inner
- write(IMAIN,*) ' total number of elements for domain elements = ',nspec_domain
+ write(IMAIN,*) ' number of elements for outer elements = ',nspec_outer
+ write(IMAIN,*) ' number of elements for inner elements = ',nspec_inner
+ write(IMAIN,*) ' total number of elements for domain elements = ',nspec_domain
endif
! total number of colors used
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_atten3D_QRFSI12.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_atten3D_QRFSI12.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_atten3D_QRFSI12.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -52,12 +52,12 @@
! QRFSI12 constants
integer,parameter :: NKQ=8,MAXL_Q=12
integer,parameter :: NSQ=(MAXL_Q+1)**2,NDEPTHS_REFQ=913
-
+
! model_atten3D_QRFSI12_variables
double precision,dimension(:,:),allocatable :: QRFSI12_Q_dqmu
double precision,dimension(:),allocatable :: QRFSI12_Q_spknt
double precision,dimension(:),allocatable :: QRFSI12_Q_refdepth,QRFSI12_Q_refqmu
-
+
end module model_atten3D_QRFSI12_par
!
@@ -69,7 +69,7 @@
! standard routine to setup model
use model_atten3D_QRFSI12_par
-
+
implicit none
include "constants.h"
@@ -77,7 +77,7 @@
include 'mpif.h'
integer :: myrank
-
+
! local parameters
integer :: ier
@@ -129,7 +129,7 @@
! get the dq model coefficients
open(unit=10,file=QRFSI12,status='old',action='read',iostat=ier)
if( ier /= 0 ) call exit_MPI(0,'error opening model file QRFSI12.dat')
-
+
do k=1,NKQ
read(10,*)index
j=0
@@ -164,7 +164,7 @@
! get the depths and 1/Q values of the reference model
open(11,file=QRFSI12_ref,status='old',action='read',iostat=ier)
if( ier /= 0 ) call exit_MPI(0,'error opening model file ref_QRFSI12')
-
+
do j=1,NDEPTHS_REFQ
read(11,*)QRFSI12_Q_refdepth(j),QRFSI12_Q_refqmu(j)
enddo
@@ -201,14 +201,14 @@
! in Colleen's original code theta refers to the latitude. Here we have redefined theta to be colatitude
! to agree with the rest of specfem
-
+
! debug
! print *,'entering QRFSI12 subroutine'
ylat=90.0d0-theta
xlon=phi
- ! only checks radius for crust, idoubling is missleading for oceanic crust
+ ! only checks radius for crust, idoubling is missleading for oceanic crust
! when we want to expand mantle up to surface...
! !if(idoubling == IFLAG_CRUST .or. radius >= rmoho) then
@@ -222,27 +222,27 @@
!debug
! print *,'QRFSI12: we are in the inner core'
-
+
Qmu = 84.6d0
-
+
else if(idoubling == IFLAG_OUTER_CORE_NORMAL) then
-
+
! we are in the outer core
-
+
!debug
! print *,'QRFSI12: we are in the outer core'
-
+
Qmu = 0.0d0
-
- else
-
+
+ else
+
! we are in the mantle
-
+
depth = 6371.-radius
-
+
!debug
! print *,'QRFSI12: we are in the mantle at depth',depth
-
+
ifnd=0
do i=2,NDEPTHS_REFQ
if(depth >= QRFSI12_Q_refdepth(i-1) .and. depth < QRFSI12_Q_refdepth(i))then
@@ -282,7 +282,7 @@
Qmu = 1/smallq
- ! if Qmu is larger than MAX_ATTENUATION_VALUE, set it to ATTENUATION_COMP_MAXIMUM.
+ ! if Qmu is larger than MAX_ATTENUATION_VALUE, set it to ATTENUATION_COMP_MAXIMUM.
! This assumes that this value is high enough that at this point there is almost no attenuation at all.
if(Qmu >= ATTENUATION_COMP_MAXIMUM) Qmu = 0.99d0*ATTENUATION_COMP_MAXIMUM
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_attenuation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_attenuation.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_attenuation.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -230,7 +230,7 @@
integer :: myrank,REFERENCE_1D_MODEL
double precision :: RICB, RCMB, R670, R220, R80
logical :: CRUSTAL
-
+
! local parameters
double precision :: tau_e(N_SLS)
double precision :: Qb
@@ -248,21 +248,21 @@
else if(REFERENCE_1D_MODEL == REFERENCE_MODEL_IASP91) then
AM_V%Qn = 12
else if(REFERENCE_1D_MODEL == REFERENCE_MODEL_AK135) then
- ! redefines "pure" 1D model without crustal modification
+ ! redefines "pure" 1D model without crustal modification
call define_model_ak135(.FALSE.)
AM_V%Qn = NR_AK135
else if(REFERENCE_1D_MODEL == REFERENCE_MODEL_1066A) then
- ! redefines "pure" 1D model without crustal modification
+ ! redefines "pure" 1D model without crustal modification
call define_model_1066a(.FALSE.)
AM_V%Qn = NR_1066A
else if(REFERENCE_1D_MODEL == REFERENCE_MODEL_1DREF) then
- ! redefines "pure" 1D model without crustal modification
+ ! redefines "pure" 1D model without crustal modification
call define_model_1dref(.FALSE.)
AM_V%Qn = NR_REF
else if(REFERENCE_1D_MODEL == REFERENCE_MODEL_JP1D) then
AM_V%Qn = 12
else if(REFERENCE_1D_MODEL == REFERENCE_MODEL_SEA1D) then
- ! redefines "pure" 1D model without crustal modification
+ ! redefines "pure" 1D model without crustal modification
call define_model_sea1d(.FALSE.)
AM_V%Qn = NR_SEA1D
else
@@ -308,20 +308,20 @@
! re-defines 1D models with crustal modification if necessary
if( CRUSTAL ) then
if(REFERENCE_1D_MODEL == REFERENCE_MODEL_AK135) then
- ! redefines 1D model with crustal modification
+ ! redefines 1D model with crustal modification
call define_model_ak135(CRUSTAL)
else if(REFERENCE_1D_MODEL == REFERENCE_MODEL_1066A) then
- ! redefines 1D model with crustal modification
+ ! redefines 1D model with crustal modification
call define_model_1066a(CRUSTAL)
else if(REFERENCE_1D_MODEL == REFERENCE_MODEL_1DREF) then
- ! redefines 1D model with crustal modification
+ ! redefines 1D model with crustal modification
call define_model_1dref(CRUSTAL)
else if(REFERENCE_1D_MODEL == REFERENCE_MODEL_SEA1D) then
- ! redefines 1D model with crustal modification
+ ! redefines 1D model with crustal modification
call define_model_sea1d(CRUSTAL)
endif
endif
-
+
end subroutine model_attenuation_setup
!
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_s362ani.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_s362ani.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_s362ani.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -58,7 +58,7 @@
real(kind=4),dimension(:),allocatable :: wk1,wk2,wk3
integer, dimension(maxhpa) :: lmxhpa,itypehpa,numcoe,nconpt
-
+
integer,dimension(:,:),allocatable :: itpspl,iconpt
integer,dimension(:),allocatable :: ihpakern,ivarkern
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/save_arrays_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/save_arrays_solver.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/save_arrays_solver.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -79,7 +79,7 @@
open(unit=27,file=prname(1:len_trim(prname))//'array_dims.txt', &
status='unknown',action='write',iostat=ier)
if( ier /= 0 ) call exit_mpi(myrank,'error opening array_dims file')
-
+
write(27,*) nspec
write(27,*) nglob
close(27)
@@ -320,7 +320,7 @@
! uncomment for vp & vs model storage
if( SAVE_MESH_FILES ) then
! outputs model files in binary format
- call save_arrays_solver_meshfiles(myrank,nspec)
+ call save_arrays_solver_meshfiles(myrank,nspec)
endif
end subroutine save_arrays_solver
@@ -340,7 +340,7 @@
use create_regions_mesh_par2,only: &
rhostore,kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
- Qmu_store, &
+ Qmu_store, &
prname
implicit none
@@ -352,7 +352,7 @@
integer :: i,j,k,ispec,ier
real(kind=CUSTOM_REAL) :: scaleval1,scaleval2
real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: temp_store
-
+
! scaling factors to re-dimensionalize units
scaleval1 = sngl( sqrt(PI*GRAV*RHOAV)*(R_EARTH/1000.0d0) )
scaleval2 = sngl( RHOAV/1000.0d0 )
@@ -425,12 +425,12 @@
write(27) eta_anisostore
close(27)
endif ! TRANSVERSE_ISOTROPY
-
+
! shear attenuation
if( ATTENUATION ) then
! saves Qmu_store to full custom_real array
! uses temporary array
- allocate(temp_store(NGLLX,NGLLY,NGLLZ,nspec))
+ allocate(temp_store(NGLLX,NGLLY,NGLLZ,nspec))
if (ATTENUATION_3D) then
! attenuation arrays are fully 3D
if(CUSTOM_REAL == SIZE_REAL) then
@@ -453,7 +453,7 @@
enddo
enddo
enddo
- enddo
+ enddo
endif
! Qmu
@@ -465,8 +465,8 @@
close(27)
! frees temporary memory
- deallocate(temp_store)
- endif ! ATTENUATION
+ deallocate(temp_store)
+ endif ! ATTENUATION
end subroutine save_arrays_solver_meshfiles
!
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_MPI_interfaces.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_MPI_interfaces.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_MPI_interfaces.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -32,11 +32,11 @@
INCLUDE_CENTRAL_CUBE,myrank,NUMFACES_SHARED
use create_MPI_interfaces_par
-
+
use MPI_crust_mantle_par
use MPI_outer_core_par
- use MPI_inner_core_par
-
+ use MPI_inner_core_par
+
implicit none
integer,intent(in):: iregion_code
@@ -89,7 +89,7 @@
! frees arrays not needed any further
deallocate(iprocfrom_faces,iprocto_faces,imsg_type)
- deallocate(iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners)
+ deallocate(iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners)
deallocate(buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar)
deallocate(buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector)
select case( iregion_code )
@@ -112,7 +112,7 @@
deallocate(iboolleft_eta_inner_core,iboolright_eta_inner_core)
deallocate(iboolfaces_inner_core)
end select
-
+
! synchronizes MPI processes
call sync_all()
@@ -393,7 +393,7 @@
use create_MPI_interfaces_par
use MPI_inner_core_par
-
+
implicit none
integer :: MAX_NEIGHBOURS,max_nibool
@@ -478,7 +478,7 @@
NGLOB_INNER_CORE, &
test_flag,ndim_assemble, &
iproc_eta,addressing,NCHUNKS,NPROC_XI,NPROC_ETA)
-
+
! frees array not needed anymore
deallocate(ibelm_bottom_inner_core)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_color_perm.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_color_perm.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_color_perm.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -51,7 +51,7 @@
! user output
if(myrank == 0) then
- write(IMAIN,*) 'mesh coloring: ',USE_MESH_COLORING_GPU
+ write(IMAIN,*) ' mesh coloring: ',USE_MESH_COLORING_GPU
endif
select case( iregion_code )
@@ -272,8 +272,12 @@
integer :: nspec_outer,nspec_inner,nspec_domain
integer :: nspec_outer_min_global,nspec_outer_max_global
- integer :: nb_colors,nb_colors_min,nb_colors_max
+ integer :: nspec_inner_min_global,nspec_inner_max_global
+ integer :: min_elem_global,max_elem_global
+ integer :: nb_colors
+ integer :: nb_colors_min,nb_colors_max
+
integer :: icolor,ispec,ispec_counter
integer :: ispec_inner,ispec_outer
integer :: ier
@@ -379,8 +383,6 @@
call exit_MPI(myrank, 'incorrect total number of elements in all the colors of the mesh for outer elements')
endif
-
-
! debug: no mesh coloring, only creates dummy coloring arrays
if( DEBUG_COLOR ) then
nb_colors_outer_elements = 0
@@ -455,6 +457,20 @@
close(99)
endif
+ ! checks non-zero elements in colors
+ do icolor = 1,nb_colors_outer_elements + nb_colors_inner_elements
+ ! checks
+ if( num_of_elems_in_this_color(icolor) == 0 ) then
+ print *,'rank: ',myrank,'domain:',idomain,' nspec = ',nspec_domain
+ print *,'error zero elements in this color:',icolor
+ print *,'total number of elements in all the colors of the mesh = ', &
+ sum(num_of_elems_in_this_color)
+ call exit_MPI(myrank, 'zero elements in a color of the mesh')
+ endif
+ enddo
+
+
+
! sets up domain coloring arrays
select case(idomain)
case( IREGION_CRUST_MANTLE )
@@ -536,16 +552,23 @@
call min_all_i(nb_colors,nb_colors_min)
call max_all_i(nb_colors,nb_colors_max)
- ! user output
+ ! min/max of elements per color
+ call min_all_i(minval(num_of_elems_in_this_color(:)),min_elem_global)
+ call max_all_i(maxval(num_of_elems_in_this_color(:)),max_elem_global)
+
+ ! min/max of inner/outer elements
+ call min_all_i(nspec_inner,nspec_inner_min_global)
+ call max_all_i(nspec_inner,nspec_inner_max_global)
call min_all_i(nspec_outer,nspec_outer_min_global)
call max_all_i(nspec_outer,nspec_outer_max_global)
- call min_all_i(nspec_outer,nspec_outer_min_global)
- call max_all_i(nspec_outer,nspec_outer_max_global)
+
+ ! user output
if(myrank == 0) then
- write(IMAIN,*) ' colors min = ',nb_colors_min
- write(IMAIN,*) ' colors max = ',nb_colors_max
- write(IMAIN,*) ' outer elements: min = ',nspec_outer_min_global
- write(IMAIN,*) ' outer elements: max = ',nspec_outer_max_global
+ write(IMAIN,*) ' total colors:'
+ write(IMAIN,*) ' total colors min/max = ',nb_colors_min,nb_colors_max
+ write(IMAIN,*) ' elements per color min/max = ',min_elem_global,max_elem_global
+ write(IMAIN,*) ' inner elements min/max = ',nspec_inner_min_global,nspec_inner_max_global
+ write(IMAIN,*) ' outer elements min/max = ',nspec_outer_min_global,nspec_outer_max_global
endif
! debug: outputs permutation array as vtk file
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_model.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_model.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_model.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -185,5 +185,5 @@
! flushes I/O buffer
call flush_IMAIN()
-
+
end subroutine sm_output_info
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -116,19 +116,19 @@
implicit none
include "constants.h"
-
+
! only master process writes out to main output file
! file I/O in fortran is buffered by default
!
- ! note: Fortran2003 includes a FLUSH statement
+ ! note: Fortran2003 includes a FLUSH statement
! which is implemented by most compilers by now
!
! otherwise:
- ! a) comment out the line below
+ ! a) comment out the line below
! b) try to use instead: call flush(IMAIN)
-
+
flush(IMAIN)
-
+
end subroutine flush_IMAIN
!-------------------------------------------------------------------------------------------------
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/get_model_parameters.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/get_model_parameters.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/get_model_parameters.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -158,19 +158,35 @@
!
!---
+ ! default values
! uses PREM as the 1D reference model by default
- ! uses no mantle heterogeneities by default
- ! uses no 3D model by default
+ REFERENCE_1D_MODEL = REFERENCE_MODEL_PREM
+
+ ! uses no anisotropic 3D model by default
ANISOTROPIC_3D_MANTLE = .false.
- ATTENUATION_3D = .false.
+
+ ! attenuation
+ ! note: to save memory, one can set ATTENUATION_3D to .false. which
+ ! will create attenuation arrays storing only 1 point per element
+ ! (otherwise, 3D attenuation arrays will be defined for all GLL points)
+ if( USE_3D_ATTENUATION ) then
+ ATTENUATION_3D = .true.
+ else
+ ATTENUATION_3D = .false.
+ endif
+
+ ! no crustal mesh stretching and 3D crust models by default
CASE_3D = .false.
CRUSTAL = .false.
+ ONE_CRUST = .false.
+
+ ! uses no 3D heterogeneity mantle by default
HETEROGEN_3D_MANTLE = .false.
+ ISOTROPIC_3D_MANTLE = .false.
HONOR_1D_SPHERICAL_MOHO = .false.
- ISOTROPIC_3D_MANTLE = .false.
- ONE_CRUST = .false.
- REFERENCE_1D_MODEL = REFERENCE_MODEL_PREM
+
+ ! no 3D model by default
THREE_D_MODEL = 0
TRANSVERSE_ISOTROPY = .false.
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_compute_parameters.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_compute_parameters.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_compute_parameters.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -396,9 +396,6 @@
if(ABSORBING_CONDITIONS .and. NCHUNKS == 3) &
stop 'absorbing conditions not supported for three chunks yet'
- if(ATTENUATION_3D .and. .not. ATTENUATION) &
- stop 'need ATTENUATION to use ATTENUATION_3D'
-
if(ATTENUATION_NEW .and. .not. ATTENUATION) &
stop 'need ATTENUATION to use ATTENUATION_NEW'
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -294,10 +294,10 @@
endif
write(IMAIN,*)
-
+
! flushes file buffer for main output file (IMAIN)
call flush_IMAIN()
-
+
! write time stamp file to give information about progression of simulation
write(outputname,"('/timestamp',i6.6)") it
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -30,7 +30,7 @@
accel_outer_core,b_accel_outer_core, &
normal_top_outer_core,jacobian2D_top_outer_core, &
wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
- SIMULATION_TYPE,nspec_top)
+ SIMULATION_TYPE,nspec2D_top)
implicit none
@@ -56,7 +56,7 @@
integer, dimension(NSPEC2D_TOP_OC) :: ibelm_top_outer_core
integer SIMULATION_TYPE
- integer nspec_top
+ integer nspec2D_top
! local parameters
real(kind=CUSTOM_REAL) :: displ_x,displ_y,displ_z,displ_n,nx,ny,nz,weight
@@ -64,16 +64,16 @@
! for surface elements exactly on the CMB
- do ispec2D = 1,nspec_top !NSPEC2D_TOP(IREGION_OUTER_CORE)
-
+ do ispec2D = 1,nspec2D_top !NSPEC2D_TOP(IREGION_OUTER_CORE)
+
ispec = ibelm_top_outer_core(ispec2D)
ispec_selected = ibelm_bottom_crust_mantle(ispec2D)
! only for DOFs exactly on the CMB (top of these elements)
- k = NGLLZ
+ k = NGLLZ
! get displacement on the solid side using pointwise matching
k_corresp = 1
-
+
do j = 1,NGLLY
do i = 1,NGLLX
! corresponding points are located at the bottom of the mantle
@@ -339,7 +339,7 @@
normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
RHO_BOTTOM_OC,minus_g_icb, &
- SIMULATION_TYPE,nspec_top)
+ SIMULATION_TYPE,nspec2D_top)
implicit none
@@ -368,14 +368,14 @@
real(kind=CUSTOM_REAL) minus_g_icb
integer SIMULATION_TYPE
- integer nspec_top
+ integer nspec2D_top
! local parameters
real(kind=CUSTOM_REAL) :: pressure,nx,ny,nz,weight
integer :: i,j,k,k_corresp,ispec,ispec2D,iglob,iglob_inner_core,ispec_selected
! for surface elements exactly on the ICB
- do ispec2D = 1,nspec_top ! NSPEC2D_TOP(IREGION_INNER_CORE)
+ do ispec2D = 1,nspec2D_top ! NSPEC2D_TOP(IREGION_INNER_CORE)
ispec = ibelm_top_inner_core(ispec2D)
ispec_selected = ibelm_bottom_outer_core(ispec2D)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -225,7 +225,7 @@
endif
! ymax
-
+
if (SIMULATION_TYPE == 3 .and. nspec2D_ymax_outer_core > 0) then
call read_abs(7,absorb_ymax_outer_core,reclen_ymax_outer_core,NSTEP-it+1)
endif
@@ -273,7 +273,7 @@
endif
! zmin
-
+
! for surface elements exactly on the ICB
if (SIMULATION_TYPE == 3 .and. nspec2D_zmin_outer_core > 0) then
call read_abs(8,absorb_zmin_outer_core,reclen_zmin,NSTEP-it+1)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/get_attenuation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/get_attenuation.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/get_attenuation.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -26,9 +26,11 @@
!=====================================================================
- subroutine get_attenuation_model_3D_or_1D(myrank, prname, one_minus_sum_beta, &
- factor_common, scale_factor, tau_s, &
- vx, vy, vz, vnspec)
+ subroutine get_attenuation_model_3D_or_1D(myrank, prname, &
+ one_minus_sum_beta, &
+ factor_common, &
+ scale_factor, tau_s, &
+ vx, vy, vz, vnspec)
use specfem_par,only: ATTENUATION_VAL, ATTENUATION_3D_VAL
@@ -36,12 +38,15 @@
include 'constants.h'
- integer :: myrank,vx,vy,vz,vnspec
- character(len=150) :: prname
+ integer :: myrank
+
+ integer :: vx,vy,vz,vnspec
double precision, dimension(vx,vy,vz,vnspec) :: one_minus_sum_beta, scale_factor
double precision, dimension(N_SLS,vx,vy,vz,vnspec) :: factor_common
double precision, dimension(N_SLS) :: tau_s
+ character(len=150) :: prname
+
! local parameters
integer :: i,j,k,ispec,ier
double precision, dimension(N_SLS) :: tau_e, fc
@@ -57,8 +62,8 @@
if( ier /= 0 ) call exit_MPI(myrank,'error opening file attenuation.bin')
read(27) tau_s
- read(27) factor_common
- read(27) scale_factor
+ read(27) factor_common ! tau_e_store
+ read(27) scale_factor ! Qmu_store
read(27) T_c_source
close(27)
@@ -118,10 +123,11 @@
include 'constants.h'
- double precision, dimension(N_SLS) :: tau_s, tau_e, beta, factor_common
- double precision one_minus_sum_beta
+ double precision, dimension(N_SLS),intent(in) :: tau_s, tau_e
+ double precision, dimension(N_SLS),intent(out) :: factor_common
+ double precision,intent(out) :: one_minus_sum_beta
- double precision, dimension(N_SLS) :: tauinv
+ double precision, dimension(N_SLS) :: tauinv,beta
integer i
tauinv(:) = -1.0d0 / tau_s(:)
@@ -215,8 +221,9 @@
include 'constants.h'
- double precision, dimension(N_SLS) :: tau_s, alphaval, betaval,gammaval
- real(kind=CUSTOM_REAL) deltat
+ double precision, dimension(N_SLS), intent(in) :: tau_s
+ double precision, dimension(N_SLS), intent(out) :: alphaval, betaval,gammaval
+ real(kind=CUSTOM_REAL), intent(in) :: deltat
double precision, dimension(N_SLS) :: tauinv
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -705,14 +705,12 @@
allocate(omsb_crust_mantle_dble(ATT1,ATT2,ATT3,ATT4), &
factor_scale_crust_mantle_dble(ATT1,ATT2,ATT3,ATT4),stat=ier)
if( ier /= 0 ) call exit_MPI(myrank,'error allocating omsb crust_mantle arrays')
+ allocate(factor_common_crust_mantle_dble(N_SLS,ATT1,ATT2,ATT3,ATT4),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating factor_common crust_mantle array')
allocate(omsb_inner_core_dble(ATT1,ATT2,ATT3,ATT5), &
factor_scale_inner_core_dble(ATT1,ATT2,ATT3,ATT5),stat=ier)
if( ier /= 0 ) call exit_MPI(myrank,'error allocating omsb inner_core arrays')
-
- allocate(factor_common_crust_mantle_dble(N_SLS,ATT1,ATT2,ATT3,ATT4),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating factor_common crust_mantle array')
-
allocate(factor_common_inner_core_dble(N_SLS,ATT1,ATT2,ATT3,ATT5),stat=ier)
if( ier /= 0 ) call exit_MPI(myrank,'error allocating factor_common inner_core array')
@@ -725,9 +723,11 @@
! reads in attenuation values
call create_name_database(prnamel, myrank, IREGION_CRUST_MANTLE, LOCAL_PATH)
- call get_attenuation_model_3D_or_1D(myrank, prnamel, omsb_crust_mantle_dble, &
- factor_common_crust_mantle_dble,factor_scale_crust_mantle_dble,tau_sigma_dble, &
- ATT1,ATT2,ATT3,ATT4)
+ call get_attenuation_model_3D_or_1D(myrank, prnamel, &
+ omsb_crust_mantle_dble, &
+ factor_common_crust_mantle_dble, &
+ factor_scale_crust_mantle_dble,tau_sigma_dble, &
+ ATT1,ATT2,ATT3,ATT4)
! INNER_CORE ATTENUATION
! initializes
@@ -738,19 +738,21 @@
! reads in attenuation values
call create_name_database(prnamel, myrank, IREGION_INNER_CORE, LOCAL_PATH)
- call get_attenuation_model_3D_or_1D(myrank, prnamel, omsb_inner_core_dble, &
- factor_common_inner_core_dble,factor_scale_inner_core_dble,tau_sigma_dble, &
- ATT1,ATT2,ATT3,ATT5)
+ call get_attenuation_model_3D_or_1D(myrank, prnamel, &
+ omsb_inner_core_dble, &
+ factor_common_inner_core_dble, &
+ factor_scale_inner_core_dble,tau_sigma_dble, &
+ ATT1,ATT2,ATT3,ATT5)
! converts to custom real arrays
if(CUSTOM_REAL == SIZE_REAL) then
- factor_scale_crust_mantle = sngl(factor_scale_crust_mantle_dble)
+ factor_scale_crust_mantle = sngl(factor_scale_crust_mantle_dble)
one_minus_sum_beta_crust_mantle = sngl(omsb_crust_mantle_dble)
- factor_common_crust_mantle = sngl(factor_common_crust_mantle_dble)
+ factor_common_crust_mantle = sngl(factor_common_crust_mantle_dble)
- factor_scale_inner_core = sngl(factor_scale_inner_core_dble)
- one_minus_sum_beta_inner_core = sngl(omsb_inner_core_dble)
- factor_common_inner_core = sngl(factor_common_inner_core_dble)
+ factor_scale_inner_core = sngl(factor_scale_inner_core_dble)
+ one_minus_sum_beta_inner_core = sngl(omsb_inner_core_dble)
+ factor_common_inner_core = sngl(factor_common_inner_core_dble)
else
factor_scale_crust_mantle = factor_scale_crust_mantle_dble
one_minus_sum_beta_crust_mantle = omsb_crust_mantle_dble
@@ -788,6 +790,7 @@
if(ANISOTROPIC_3D_MANTLE_VAL) then
scale_factor_minus_one = scale_factor - 1.d0
+
mul = c44store_crust_mantle(i,j,k,ispec)
c11store_crust_mantle(i,j,k,ispec) = c11store_crust_mantle(i,j,k,ispec) &
+ FOUR_THIRDS * scale_factor_minus_one * mul
@@ -812,6 +815,7 @@
! store the original value of \mu to comput \mu*\eps
muvstore_crust_mantle_3dmovie(i,j,k,ispec)=muvstore_crust_mantle(i,j,k,ispec)
endif
+
muvstore_crust_mantle(i,j,k,ispec) = muvstore_crust_mantle(i,j,k,ispec) * scale_factor
! scales transverse isotropic values for mu_h
@@ -830,12 +834,14 @@
do j=1,NGLLY
do i=1,NGLLX
if( ATTENUATION_3D_VAL ) then
- scale_factor_minus_one = factor_scale_inner_core(i,j,k,ispec) - 1.d0
+ scale_factor = factor_scale_inner_core(i,j,k,ispec)
else
- scale_factor_minus_one = factor_scale_inner_core(1,1,1,ispec) - 1.d0
+ scale_factor = factor_scale_inner_core(1,1,1,ispec)
endif
if(ANISOTROPIC_INNER_CORE_VAL) then
+ scale_factor_minus_one = scale_factor - 1.d0
+
mul = muvstore_inner_core(i,j,k,ispec)
c11store_inner_core(i,j,k,ispec) = c11store_inner_core(i,j,k,ispec) &
+ FOUR_THIRDS * scale_factor_minus_one * mul
@@ -849,11 +855,8 @@
+ scale_factor_minus_one * mul
endif
- if( ATTENUATION_3D_VAL ) then
- muvstore_inner_core(i,j,k,ispec) = muvstore_inner_core(i,j,k,ispec) * factor_scale_inner_core(i,j,k,ispec)
- else
- muvstore_inner_core(i,j,k,ispec) = muvstore_inner_core(i,j,k,ispec) * factor_scale_inner_core(1,1,1,ispec)
- endif
+ muvstore_inner_core(i,j,k,ispec) = muvstore_inner_core(i,j,k,ispec) * scale_factor
+
enddo
enddo
enddo
@@ -1129,6 +1132,7 @@
! local parameters
integer :: ier
+ integer :: i,j,k,ispec,ispec2D,ipoin,iglob
real :: free_mb,used_mb,total_mb
! dummy custom_real variables to convert from double precision
real(kind=CUSTOM_REAL),dimension(:,:,:),allocatable:: cr_wgll_cube
@@ -1153,7 +1157,7 @@
number_receiver_global,islice_selected_rec,ispec_selected_rec, &
nrec, nrec_local, nadj_rec_local, &
NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
- NSPEC_CRUST_MANTLE_STRAIN_ONLY,NGLOB_CRUST_MANTLE_OCEANS, &
+ NSPEC_CRUST_MANTLE_STRAIN_ONLY, &
NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
NSPEC_INNER_CORE_STRAIN_ONLY, &
@@ -1306,8 +1310,10 @@
if ( NOISE_TOMOGRAPHY > 0 ) then
if(myrank == 0 ) write(IMAIN,*) " loading noise arrays"
- call prepare_fields_noise_device(Mesh_pointer,nspec_top,ibelm_top_crust_mantle, &
- NSTEP,noise_sourcearray, &
+ call prepare_fields_noise_device(Mesh_pointer,NSPEC_TOP, &
+ NSTEP, &
+ ibelm_top_crust_mantle, &
+ noise_sourcearray, &
normal_x_noise,normal_y_noise,normal_z_noise, &
mask_noise,jacobian2D_top_crust_mantle)
@@ -1315,10 +1321,71 @@
! prepares oceans arrays
if ( OCEANS_VAL ) then
- if(myrank == 0 ) write(IMAIN,*) " loading oceans arrays"
+ if(myrank == 0 ) write(IMAIN,*) " loading oceans arrays"
- call prepare_oceans_device(Mesh_pointer,rmass_ocean_load)
+ ! prepares gpu arrays for coupling with oceans
+ !
+ ! note: handling of coupling on gpu is slightly different than in CPU routine to avoid using a mutex
+ ! to update acceleration; tests so far have shown, that with a simple mutex implementation
+ ! the results differ between successive runs (probably still due to some race conditions?)
+ ! here we now totally avoid mutex usage and still update each global point only once
+ ! counts global points on surface to oceans
+ updated_dof_ocean_load(:) = .false.
+ ipoin = 0
+ do ispec2D = 1,nspec_top
+ ispec = ibelm_top_crust_mantle(ispec2D)
+ k = NGLLZ
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ ! get global point number
+ iglob = ibool_crust_mantle(i,j,k,ispec)
+ if(.not. updated_dof_ocean_load(iglob)) then
+ ipoin = ipoin + 1
+ updated_dof_ocean_load(iglob) = .true.
+ endif
+ enddo
+ enddo
+ enddo
+
+ ! allocates arrays with all global points on ocean surface
+ npoin_oceans = ipoin
+ allocate(iglob_ocean_load(npoin_oceans), &
+ normal_ocean_load(NDIM,npoin_oceans), &
+ rmass_ocean_load_selected(npoin_oceans), &
+ stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating oceans arrays')
+
+ ! fills arrays for coupling surface at oceans
+ updated_dof_ocean_load(:) = .false.
+ ipoin = 0
+ do ispec2D = 1,nspec_top
+ ispec = ibelm_top_crust_mantle(ispec2D)
+ k = NGLLZ
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ ! get global point number
+ iglob = ibool_crust_mantle(i,j,k,ispec)
+ if(.not. updated_dof_ocean_load(iglob)) then
+ ipoin = ipoin + 1
+ ! fills arrays
+ iglob_ocean_load(ipoin) = iglob
+ rmass_ocean_load_selected(ipoin) = rmass_ocean_load(iglob)
+ normal_ocean_load(:,ipoin) = normal_top_crust_mantle(:,i,j,ispec2D)
+ ! masks this global point
+ updated_dof_ocean_load(iglob) = .true.
+ endif
+ enddo
+ enddo
+ enddo
+
+ ! prepares arrays on GPU
+ call prepare_oceans_device(Mesh_pointer,npoin_oceans, &
+ rmass_ocean_load_selected,iglob_ocean_load,normal_ocean_load)
+
+ ! frees memory
+ deallocate(iglob_ocean_load,rmass_ocean_load_selected,normal_ocean_load)
+
endif
! crust/mantle region
@@ -1335,9 +1402,6 @@
rmassx_crust_mantle, &
rmassy_crust_mantle, &
rmassz_crust_mantle, &
- normal_top_crust_mantle, &
- ibelm_top_crust_mantle, &
- ibelm_bottom_crust_mantle, &
ibool_crust_mantle, &
xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
ispec_is_tiso_crust_mantle, &
@@ -1350,8 +1414,8 @@
c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
num_phase_ispec_crust_mantle,phase_ispec_inner_crust_mantle, &
nspec_outer_crust_mantle,nspec_inner_crust_mantle, &
- NSPEC2D_TOP(IREGION_CRUST_MANTLE), &
NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE), &
+ ibelm_bottom_crust_mantle, &
NCHUNKS_VAL, &
num_colors_outer_crust_mantle,num_colors_inner_crust_mantle, &
num_elem_colors_crust_mantle)
@@ -1365,18 +1429,18 @@
gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
rhostore_outer_core,kappavstore_outer_core, &
rmass_outer_core, &
+ ibool_outer_core, &
+ xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+ num_phase_ispec_outer_core,phase_ispec_inner_outer_core, &
+ nspec_outer_outer_core,nspec_inner_outer_core, &
+ NSPEC2D_TOP(IREGION_OUTER_CORE), &
+ NSPEC2D_BOTTOM(IREGION_OUTER_CORE), &
normal_top_outer_core, &
normal_bottom_outer_core, &
jacobian2D_top_outer_core, &
jacobian2D_bottom_outer_core, &
ibelm_top_outer_core, &
ibelm_bottom_outer_core, &
- ibool_outer_core, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- num_phase_ispec_outer_core,phase_ispec_inner_outer_core, &
- nspec_outer_outer_core,nspec_inner_outer_core, &
- NSPEC2D_TOP(IREGION_OUTER_CORE), &
- NSPEC2D_BOTTOM(IREGION_OUTER_CORE), &
num_colors_outer_outer_core,num_colors_inner_outer_core, &
num_elem_colors_outer_core)
@@ -1389,7 +1453,6 @@
gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
rmass_inner_core, &
- ibelm_top_inner_core, &
ibool_inner_core, &
xstore_inner_core,ystore_inner_core,zstore_inner_core, &
c11store_inner_core,c12store_inner_core,c13store_inner_core, &
@@ -1398,6 +1461,7 @@
num_phase_ispec_inner_core,phase_ispec_inner_inner_core, &
nspec_outer_inner_core,nspec_inner_inner_core, &
NSPEC2D_TOP(IREGION_INNER_CORE), &
+ ibelm_top_inner_core, &
num_colors_outer_inner_core,num_colors_inner_inner_core, &
num_elem_colors_inner_core)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2012-08-14 23:15:21 UTC (rev 20569)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2012-08-15 19:55:44 UTC (rev 20570)
@@ -91,6 +91,11 @@
! flag to mask ocean-bottom degrees of freedom for ocean load
logical, dimension(NGLOB_CRUST_MANTLE_OCEANS) :: updated_dof_ocean_load
+ integer :: npoin_oceans
+ integer, dimension(:),allocatable :: iglob_ocean_load
+ real(kind=CUSTOM_REAL), dimension(:,:),allocatable :: normal_ocean_load
+ real(kind=CUSTOM_REAL), dimension(:),allocatable :: rmass_ocean_load_selected
+
!-----------------------------------------------------------------
! ellipticity
!-----------------------------------------------------------------
More information about the CIG-COMMITS
mailing list