[cig-commits] r21318 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER: . src/auxiliaries src/cuda src/specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Fri Feb 1 09:03:16 PST 2013
Author: danielpeter
Date: 2013-02-01 09:03:15 -0800 (Fri, 01 Feb 2013)
New Revision: 21318
Modified:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure.ac
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/auxiliaries/combine_vol_data_vtk.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/visual_vtk.cpp
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/visual_vtk_stubs.c
Log:
bug fix for anisotropic/transverse isotropic kernels in cuda routines; update in combine_vol_data_vtk for file output creation
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure.ac
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure.ac 2013-02-01 13:47:37 UTC (rev 21317)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure.ac 2013-02-01 17:03:15 UTC (rev 21318)
@@ -89,9 +89,11 @@
AC_ARG_VAR(MPILIBS, [extra libraries for linking MPI programs])
AC_ARG_VAR(FLAGS_CHECK, [Fortran compiler flags for non-critical subroutines])
AC_ARG_VAR(FLAGS_NO_CHECK, [Fortran compiler flags for creating fast, production-run code for critical subroutines])
+
AC_ARG_VAR(CUDA_LIB,[Location of CUDA library libcudart])
AC_ARG_VAR(CUDA_INC,[Location of CUDA include files])
AC_ARG_VAR(MPI_INC,[Location of MPI include mpi.h, which is needed by nvcc when compiling cuda files])
+
AC_ARG_VAR(AR, [ar library creation])
AC_ARG_VAR(ARFLAGS, [ar flag library creation])
AC_ARG_VAR(RANLIB, [ranlib library creation])
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/auxiliaries/combine_vol_data_vtk.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/auxiliaries/combine_vol_data_vtk.f90 2013-02-01 13:47:37 UTC (rev 21317)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/auxiliaries/combine_vol_data_vtk.f90 2013-02-01 17:03:15 UTC (rev 21318)
@@ -236,17 +236,6 @@
if( ier /= 0 ) stop 'error allocating total_dat_con array'
total_dat_con(:,:) = 0
- ! VTK
- ! opens unstructured grid file
- write(mesh_file,'(a,i1,a)') trim(outdir)//'/' // 'reg_',ir,'_'//trim(filename)//'.vtk'
- open(IOVTK,file=mesh_file(1:len_trim(mesh_file)),status='unknown',iostat=ios)
- if( ios /= 0 ) stop 'error opening vtk output file'
- write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
- write(IOVTK,'(a)') 'material model VTK file'
- write(IOVTK,'(a)') 'ASCII'
- write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
-
-
np = 0
ne = 0
@@ -262,8 +251,6 @@
write(prname_topo,'(a,i6.6,a,i1,a)') trim(in_topo_dir)//'/proc',iproc,'_reg',ir,'_'
write(prname_file,'(a,i6.6,a,i1,a)') trim(in_file_dir)//'/proc',iproc,'_reg',ir,'_'
-
-
! filename.bin
data_file = trim(prname_file) // trim(filename) // '.bin'
open(unit = 27,file = trim(data_file),status='old',action='read', iostat = ios,form ='unformatted')
@@ -528,6 +515,15 @@
print *
! VTK
+ ! opens unstructured grid file
+ write(mesh_file,'(a,i1,a)') trim(outdir)//'/' // 'reg_',ir,'_'//trim(filename)//'.vtk'
+ open(IOVTK,file=mesh_file(1:len_trim(mesh_file)),status='unknown',iostat=ios)
+ if( ios /= 0 ) stop 'error opening vtk output file'
+ write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+ write(IOVTK,'(a)') 'material model VTK file'
+ write(IOVTK,'(a)') 'ASCII'
+ write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+
! points
write(IOVTK, '(a,i16,a)') 'POINTS ', np, ' float'
do i = 1,np
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2013-02-01 13:47:37 UTC (rev 21317)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2013-02-01 17:03:15 UTC (rev 21318)
@@ -45,68 +45,76 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_kernels_cudakernel(int* ibool,
- realw* accel,
- realw* b_displ,
- realw* epsilondev_xx,
- realw* epsilondev_yy,
- realw* epsilondev_xy,
- realw* epsilondev_xz,
- realw* epsilondev_yz,
- realw* epsilon_trace_over_3,
- realw* b_epsilondev_xx,
- realw* b_epsilondev_yy,
- realw* b_epsilondev_xy,
- realw* b_epsilondev_xz,
- realw* b_epsilondev_yz,
- realw* b_epsilon_trace_over_3,
- realw* rho_kl,
- realw* mu_kl,
- realw* kappa_kl,
- int NSPEC,
- realw deltat,
- int ANISOTROPIC_KL) {
+__global__ void compute_kernels_rho_cudakernel(int* ibool,
+ realw* accel,
+ realw* b_displ,
+ realw* rho_kl,
+ int NSPEC,
+ realw deltat) {
int ispec = blockIdx.x + blockIdx.y*gridDim.x;
// handles case when there is 1 extra block (due to rectangular grid)
if(ispec < NSPEC) {
- int ijk = threadIdx.x;
- int ijk_ispec = ijk + NGLL3*ispec;
+ int ijk_ispec = threadIdx.x + NGLL3*ispec;
int iglob = ibool[ijk_ispec] - 1 ;
- // isotropic kernels:
// density kernel
rho_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_displ[3*iglob]+
accel[3*iglob+1]*b_displ[3*iglob+1]+
accel[3*iglob+2]*b_displ[3*iglob+2]);
+ }
+}
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_kernels_iso_cudakernel(realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz,
+ realw* epsilon_trace_over_3,
+ realw* b_epsilondev_xx,
+ realw* b_epsilondev_yy,
+ realw* b_epsilondev_xy,
+ realw* b_epsilondev_xz,
+ realw* b_epsilondev_yz,
+ realw* b_epsilon_trace_over_3,
+ realw* mu_kl,
+ realw* kappa_kl,
+ int NSPEC,
+ realw deltat) {
+
+ int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+
+ // handles case when there is 1 extra block (due to rectangular grid)
+ if(ispec < NSPEC) {
+
+ int ijk_ispec = threadIdx.x + NGLL3*ispec;
+
// isotropic kernel contributions
- if( ! ANISOTROPIC_KL ){
- // shear modulus kernel
- mu_kl[ijk_ispec] += deltat * (epsilondev_xx[ijk_ispec]*b_epsilondev_xx[ijk_ispec]+
- epsilondev_yy[ijk_ispec]*b_epsilondev_yy[ijk_ispec]+
- (epsilondev_xx[ijk_ispec]+epsilondev_yy[ijk_ispec])*
- (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])+
- 2*(epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+
- epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+
- epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec]));
+ // shear modulus kernel
+ mu_kl[ijk_ispec] += deltat * (epsilondev_xx[ijk_ispec]*b_epsilondev_xx[ijk_ispec]+
+ epsilondev_yy[ijk_ispec]*b_epsilondev_yy[ijk_ispec]+
+ (epsilondev_xx[ijk_ispec]+epsilondev_yy[ijk_ispec])*
+ (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])+
+ 2*(epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+
+ epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+
+ epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec]));
- // bulk modulus kernel
- kappa_kl[ijk_ispec] += deltat*(9*epsilon_trace_over_3[ijk_ispec]*
- b_epsilon_trace_over_3[ijk_ispec]);
- }
+ // bulk modulus kernel
+ kappa_kl[ijk_ispec] += deltat * ( 9 * epsilon_trace_over_3[ijk_ispec] * b_epsilon_trace_over_3[ijk_ispec]);
}
}
/* ----------------------------------------------------------------------------------------------- */
-__device__ void compute_strain_product(realw* prod,
- realw eps_trace_over_3,
- realw* epsdev,
- realw b_eps_trace_over_3,
- realw* b_epsdev){
+__device__ void compute_strain_product_cuda(realw* prod,
+ realw eps_trace_over_3,
+ realw* epsdev,
+ realw b_eps_trace_over_3,
+ realw* b_epsdev){
realw eps[6],b_eps[6];
@@ -139,14 +147,11 @@
for(int i=0; i<6; i++){
for(int j=i; j<6; j++){
prod[p]=eps[i]*b_eps[j];
-
if(j>i){
prod[p]=prod[p]+eps[j]*b_eps[i];
if(j>2 && i<3){ prod[p] = prod[p]*2.0f;}
}
-
if(i>2){ prod[p]=prod[p]*4.0f;}
-
p=p+1;
}
}
@@ -154,8 +159,7 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_kernels_ani_cudakernel(int* ibool,
- realw* epsilondev_xx,
+__global__ void compute_kernels_ani_cudakernel(realw* epsilondev_xx,
realw* epsilondev_yy,
realw* epsilondev_xy,
realw* epsilondev_xz,
@@ -176,11 +180,10 @@
// handles case when there is 1 extra block (due to rectangular grid)
if(ispec < NSPEC) {
- int ijk = threadIdx.x;
- int ijk_ispec = ijk + NGLL3*ispec;
+ int ijk_ispec = threadIdx.x + NGLL3*ispec;
// fully anisotropic kernel contributions
-
+ realw eps_trace_over_3,b_eps_trace_over_3;
realw prod[21];
realw epsdev[5];
realw b_epsdev[5];
@@ -197,10 +200,13 @@
b_epsdev[3] = b_epsilondev_xz[ijk_ispec];
b_epsdev[4] = b_epsilondev_yz[ijk_ispec];
+ eps_trace_over_3 = epsilon_trace_over_3[ijk_ispec];
+ b_eps_trace_over_3 = b_epsilon_trace_over_3[ijk_ispec];
+
// fully anisotropic kernel contributions
- compute_strain_product(prod,epsilon_trace_over_3[ijk_ispec],epsdev,
- b_epsilon_trace_over_3[ijk_ispec],b_epsdev);
+ compute_strain_product_cuda(prod,eps_trace_over_3,epsdev,b_eps_trace_over_3,b_epsdev);
+ // updates full anisotropic kernel
for(int i=0;i<21;i++){
cijkl_kl[i + 21*ijk_ispec] += deltat * prod[i];
}
@@ -233,50 +239,53 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- compute_kernels_cudakernel<<<grid,threads>>>(mp->d_ibool_crust_mantle,
- mp->d_accel_crust_mantle,
- mp->d_b_displ_crust_mantle,
- mp->d_epsilondev_xx_crust_mantle,
- mp->d_epsilondev_yy_crust_mantle,
- mp->d_epsilondev_xy_crust_mantle,
- mp->d_epsilondev_xz_crust_mantle,
- mp->d_epsilondev_yz_crust_mantle,
- mp->d_eps_trace_over_3_crust_mantle,
- mp->d_b_epsilondev_xx_crust_mantle,
- mp->d_b_epsilondev_yy_crust_mantle,
- mp->d_b_epsilondev_xy_crust_mantle,
- mp->d_b_epsilondev_xz_crust_mantle,
- mp->d_b_epsilondev_yz_crust_mantle,
- mp->d_b_eps_trace_over_3_crust_mantle,
- mp->d_rho_kl_crust_mantle,
- mp->d_beta_kl_crust_mantle,
- mp->d_alpha_kl_crust_mantle,
- mp->NSPEC_CRUST_MANTLE,
- deltat,
- mp->anisotropic_kl);
+ // density kernel
+ compute_kernels_rho_cudakernel<<<grid,threads>>>(mp->d_ibool_crust_mantle,
+ mp->d_accel_crust_mantle,
+ mp->d_b_displ_crust_mantle,
+ mp->d_rho_kl_crust_mantle,
+ mp->NSPEC_CRUST_MANTLE,
+ deltat);
- if(mp->anisotropic_kl){
- compute_kernels_ani_cudakernel<<<grid,threads>>>(mp->d_ibool_crust_mantle,
- mp->d_epsilondev_xx_crust_mantle,
- mp->d_epsilondev_yy_crust_mantle,
- mp->d_epsilondev_xy_crust_mantle,
- mp->d_epsilondev_xz_crust_mantle,
- mp->d_epsilondev_yz_crust_mantle,
- mp->d_eps_trace_over_3_crust_mantle,
- mp->d_b_epsilondev_xx_crust_mantle,
- mp->d_b_epsilondev_yy_crust_mantle,
- mp->d_b_epsilondev_xy_crust_mantle,
- mp->d_b_epsilondev_xz_crust_mantle,
- mp->d_b_epsilondev_yz_crust_mantle,
- mp->d_b_eps_trace_over_3_crust_mantle,
- mp->d_cijkl_kl_crust_mantle,
- mp->NSPEC_CRUST_MANTLE,
- deltat);
-
+ if(! mp->anisotropic_kl){
+ // isotropic kernels
+ compute_kernels_iso_cudakernel<<<grid,threads>>>(mp->d_epsilondev_xx_crust_mantle,
+ mp->d_epsilondev_yy_crust_mantle,
+ mp->d_epsilondev_xy_crust_mantle,
+ mp->d_epsilondev_xz_crust_mantle,
+ mp->d_epsilondev_yz_crust_mantle,
+ mp->d_eps_trace_over_3_crust_mantle,
+ mp->d_b_epsilondev_xx_crust_mantle,
+ mp->d_b_epsilondev_yy_crust_mantle,
+ mp->d_b_epsilondev_xy_crust_mantle,
+ mp->d_b_epsilondev_xz_crust_mantle,
+ mp->d_b_epsilondev_yz_crust_mantle,
+ mp->d_b_eps_trace_over_3_crust_mantle,
+ mp->d_beta_kl_crust_mantle,
+ mp->d_alpha_kl_crust_mantle,
+ mp->NSPEC_CRUST_MANTLE,
+ deltat);
+ }else{
+ // anisotropic kernels
+ compute_kernels_ani_cudakernel<<<grid,threads>>>(mp->d_epsilondev_xx_crust_mantle,
+ mp->d_epsilondev_yy_crust_mantle,
+ mp->d_epsilondev_xy_crust_mantle,
+ mp->d_epsilondev_xz_crust_mantle,
+ mp->d_epsilondev_yz_crust_mantle,
+ mp->d_eps_trace_over_3_crust_mantle,
+ mp->d_b_epsilondev_xx_crust_mantle,
+ mp->d_b_epsilondev_yy_crust_mantle,
+ mp->d_b_epsilondev_xy_crust_mantle,
+ mp->d_b_epsilondev_xz_crust_mantle,
+ mp->d_b_epsilondev_yz_crust_mantle,
+ mp->d_b_eps_trace_over_3_crust_mantle,
+ mp->d_cijkl_kl_crust_mantle,
+ mp->NSPEC_CRUST_MANTLE,
+ deltat);
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_kernels_elastic_cuda");
+ exit_on_cuda_error("compute_kernels_cm_cuda");
#endif
}
@@ -290,7 +299,7 @@
void FC_FUNC_(compute_kernels_ic_cuda,
COMPUTE_KERNELS_IC_CUDA)(long* Mesh_pointer,realw* deltat_f) {
- TRACE("compute_kernels_cm_cuda");
+ TRACE("compute_kernels_ic_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
@@ -307,33 +316,34 @@
dim3 threads(blocksize,1,1);
// only isotropic kernels in inner core so far implemented
- int aniso_flag = 0;
+ // density kernel
+ compute_kernels_rho_cudakernel<<<grid,threads>>>(mp->d_ibool_inner_core,
+ mp->d_accel_inner_core,
+ mp->d_b_displ_inner_core,
+ mp->d_rho_kl_inner_core,
+ mp->NSPEC_INNER_CORE,
+ deltat);
+ // isotropic kernels (shear, bulk)
+ compute_kernels_iso_cudakernel<<<grid,threads>>>(mp->d_epsilondev_xx_inner_core,
+ mp->d_epsilondev_yy_inner_core,
+ mp->d_epsilondev_xy_inner_core,
+ mp->d_epsilondev_xz_inner_core,
+ mp->d_epsilondev_yz_inner_core,
+ mp->d_eps_trace_over_3_inner_core,
+ mp->d_b_epsilondev_xx_inner_core,
+ mp->d_b_epsilondev_yy_inner_core,
+ mp->d_b_epsilondev_xy_inner_core,
+ mp->d_b_epsilondev_xz_inner_core,
+ mp->d_b_epsilondev_yz_inner_core,
+ mp->d_b_eps_trace_over_3_inner_core,
+ mp->d_beta_kl_inner_core,
+ mp->d_alpha_kl_inner_core,
+ mp->NSPEC_INNER_CORE,
+ deltat);
- compute_kernels_cudakernel<<<grid,threads>>>(mp->d_ibool_inner_core,
- mp->d_accel_inner_core,
- mp->d_b_displ_inner_core,
- mp->d_epsilondev_xx_inner_core,
- mp->d_epsilondev_yy_inner_core,
- mp->d_epsilondev_xy_inner_core,
- mp->d_epsilondev_xz_inner_core,
- mp->d_epsilondev_yz_inner_core,
- mp->d_eps_trace_over_3_inner_core,
- mp->d_b_epsilondev_xx_inner_core,
- mp->d_b_epsilondev_yy_inner_core,
- mp->d_b_epsilondev_xy_inner_core,
- mp->d_b_epsilondev_xz_inner_core,
- mp->d_b_epsilondev_yz_inner_core,
- mp->d_b_eps_trace_over_3_inner_core,
- mp->d_rho_kl_inner_core,
- mp->d_beta_kl_inner_core,
- mp->d_alpha_kl_inner_core,
- mp->NSPEC_INNER_CORE,
- deltat,
- aniso_flag);
-
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_kernels_elastic_cuda");
+ exit_on_cuda_error("compute_kernels_ic_cuda");
#endif
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2013-02-01 13:47:37 UTC (rev 21317)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2013-02-01 17:03:15 UTC (rev 21318)
@@ -1367,7 +1367,7 @@
// anisotropic kernels
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_cijkl_kl_crust_mantle),
21*size*sizeof(realw)),5206);
- print_CUDA_error_if_any(cudaMemset(mp->d_cijkl_kl_crust_mantle,0,size*sizeof(realw)),5209);
+ print_CUDA_error_if_any(cudaMemset(mp->d_cijkl_kl_crust_mantle,0,21*size*sizeof(realw)),5209);
}
// preconditioner
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2013-02-01 13:47:37 UTC (rev 21317)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2013-02-01 17:03:15 UTC (rev 21318)
@@ -763,10 +763,10 @@
void FC_FUNC_(transfer_kernels_ic_to_host,
TRANSFER_KERNELS_IC_TO_HOST)(long* Mesh_pointer,
- realw* h_rho_kl,
- realw* h_alpha_kl,
- realw* h_beta_kl,
- int* NSPEC) {}
+ realw* h_rho_kl,
+ realw* h_alpha_kl,
+ realw* h_beta_kl,
+ int* NSPEC) {}
void FC_FUNC_(transfer_kernels_oc_to_host,
TRANSFER_KERNELS_OC_TO_HOST)(long* Mesh_pointer,
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2013-02-01 13:47:37 UTC (rev 21317)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2013-02-01 17:03:15 UTC (rev 21318)
@@ -672,12 +672,10 @@
int size = NGLL3*mp->NSPEC_OUTER_CORE;
- cudaMemcpy(A_array_rotation,mp->d_A_array_rotation,size*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(B_array_rotation,mp->d_B_array_rotation,size*sizeof(realw),cudaMemcpyDeviceToHost);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_rotation_from_device");
-#endif
+ print_CUDA_error_if_any(cudaMemcpy(A_array_rotation,mp->d_A_array_rotation,
+ size*sizeof(realw),cudaMemcpyDeviceToHost),380001);
+ print_CUDA_error_if_any(cudaMemcpy(B_array_rotation,mp->d_B_array_rotation,
+ size*sizeof(realw),cudaMemcpyDeviceToHost),380002);
}
/* ----------------------------------------------------------------------------------------------- */
@@ -695,12 +693,10 @@
int size = NGLL3*mp->NSPEC_OUTER_CORE;
- cudaMemcpy(mp->d_b_A_array_rotation,A_array_rotation,size*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_B_array_rotation,B_array_rotation,size*sizeof(realw),cudaMemcpyHostToDevice);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_b_rotation_to_device");
-#endif
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_A_array_rotation,A_array_rotation,
+ size*sizeof(realw),cudaMemcpyHostToDevice),390001);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_B_array_rotation,B_array_rotation,
+ size*sizeof(realw),cudaMemcpyHostToDevice),390002);
}
@@ -727,15 +723,18 @@
int size = (*NSPEC)*NGLL3;
+ // density kernel
print_CUDA_error_if_any(cudaMemcpy(h_rho_kl,mp->d_rho_kl_crust_mantle,
size*sizeof(realw),cudaMemcpyDeviceToHost),40101);
if( ! mp->anisotropic_kl){
+ // isotropic kernels
print_CUDA_error_if_any(cudaMemcpy(h_alpha_kl,mp->d_alpha_kl_crust_mantle,
size*sizeof(realw),cudaMemcpyDeviceToHost),40102);
print_CUDA_error_if_any(cudaMemcpy(h_beta_kl,mp->d_beta_kl_crust_mantle,
- size*sizeof(realw),cudaMemcpyDeviceToHost),40103);
+ size*sizeof(realw),cudaMemcpyDeviceToHost),40103);
}else{
+ // anisotropic kernels
print_CUDA_error_if_any(cudaMemcpy(h_cijkl_kl,mp->d_cijkl_kl_crust_mantle,
21*size*sizeof(realw),cudaMemcpyDeviceToHost),40102);
}
@@ -748,10 +747,10 @@
extern "C"
void FC_FUNC_(transfer_kernels_ic_to_host,
TRANSFER_KERNELS_IC_TO_HOST)(long* Mesh_pointer,
- realw* h_rho_kl,
- realw* h_alpha_kl,
- realw* h_beta_kl,
- int* NSPEC) {
+ realw* h_rho_kl,
+ realw* h_alpha_kl,
+ realw* h_beta_kl,
+ int* NSPEC) {
TRACE("transfer_kernels_ic_to_host");
//get mesh pointer out of fortran integer container
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90 2013-02-01 13:47:37 UTC (rev 21317)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90 2013-02-01 17:03:15 UTC (rev 21318)
@@ -561,9 +561,9 @@
prod(p)=eps(i)*b_eps(j)
if(j>i) then
prod(p)=prod(p)+eps(j)*b_eps(i)
- if(j>3 .and. i<4) prod(p)=prod(p)*2
+ if(j>3 .and. i<4) prod(p) = prod(p) * 2.0_CUSTOM_REAL
endif
- if(i>3) prod(p)=prod(p)*4
+ if(i>3) prod(p) = prod(p) * 4.0_CUSTOM_REAL
p=p+1
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90 2013-02-01 13:47:37 UTC (rev 21317)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90 2013-02-01 17:03:15 UTC (rev 21318)
@@ -556,17 +556,17 @@
else if (SIMULATION_TYPE == 3) then
! to store kernels
call transfer_kernels_oc_to_host(Mesh_pointer, &
- rho_kl_outer_core,&
- alpha_kl_outer_core,NSPEC_OUTER_CORE)
+ rho_kl_outer_core,&
+ alpha_kl_outer_core,NSPEC_OUTER_CORE)
call transfer_kernels_cm_to_host(Mesh_pointer, &
- rho_kl_crust_mantle, &
- alpha_kl_crust_mantle, &
- beta_kl_crust_mantle, &
- cijkl_kl_crust_mantle,NSPEC_CRUST_MANTLE)
+ rho_kl_crust_mantle, &
+ alpha_kl_crust_mantle, &
+ beta_kl_crust_mantle, &
+ cijkl_kl_crust_mantle,NSPEC_CRUST_MANTLE)
call transfer_kernels_ic_to_host(Mesh_pointer, &
- rho_kl_inner_core, &
- alpha_kl_inner_core, &
- beta_kl_inner_core,NSPEC_INNER_CORE)
+ rho_kl_inner_core, &
+ alpha_kl_inner_core, &
+ beta_kl_inner_core,NSPEC_INNER_CORE)
! specific noise strength kernel
if( NOISE_TOMOGRAPHY == 3 ) then
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/visual_vtk.cpp
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/visual_vtk.cpp 2013-02-01 13:47:37 UTC (rev 21317)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/visual_vtk.cpp 2013-02-01 17:03:15 UTC (rev 21318)
@@ -1057,12 +1057,12 @@
double bounds[2];
double min,max;
- static int DEBUG = 0;
+ static int VERBOSE = 1;
int it = *it_h;
float time = *time_h;
- if( DEBUG ) printf(" visual: it = %d time = %f \n",it,time);
+ if( VERBOSE) printf(" visual: it = %d time = %f \n",it,time);
// time for calculating new wavefield
fs.timer->StopTimer();
@@ -1113,7 +1113,7 @@
double time_renderer = fs.timer->GetElapsedTime();
fs.timer->StartTimer();
- if( DEBUG ){
+ if( VERBOSE ){
printf(" visual: %s \n",inputString);
printf(" timer : time for rendering = %f (s) \n", time_renderer);
printf(" data : min = %e max = %e \n\n",min,max);
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/visual_vtk_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/visual_vtk_stubs.c 2013-02-01 13:47:37 UTC (rev 21317)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/visual_vtk_stubs.c 2013-02-01 17:03:15 UTC (rev 21318)
@@ -1,4 +1,4 @@
-/*
+/*
!=====================================================================
!
! S p e c f e m 3 D G l o b e V e r s i o n 5 . 1
More information about the CIG-COMMITS
mailing list