[cig-commits] r20529 - in seismo/3D/SPECFEM3D/trunk: src/cuda src/generate_databases src/specfem3D utils
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Thu Jul 19 14:55:39 PDT 2012
Author: danielpeter
Date: 2012-07-19 14:55:39 -0700 (Thu, 19 Jul 2012)
New Revision: 20529
Added:
seismo/3D/SPECFEM3D/trunk/src/cuda/initialize_cuda.cu
Modified:
seismo/3D/SPECFEM3D/trunk/src/cuda/check_fields_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_acoustic_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_elastic_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/it_update_displacement_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D/trunk/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c.bak
seismo/3D/SPECFEM3D/trunk/src/cuda/write_seismograms_cuda.cu
seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_regions_mesh.f90
seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases.f90
seismo/3D/SPECFEM3D/trunk/src/generate_databases/save_arrays_solver.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in
seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_kernels.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/initialize_simulation.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/save_adjoint_kernels.f90
seismo/3D/SPECFEM3D/trunk/src/specfem3D/setup_sources_receivers.f90
seismo/3D/SPECFEM3D/trunk/utils/create_specfem3D_gpu_cuda_method_stubs.pl
seismo/3D/SPECFEM3D/trunk/utils/update_headers_change_word_f90.pl
Log:
adds cuda initialization file initialize_cuda.cu
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/check_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/check_fields_cuda.cu 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/check_fields_cuda.cu 2012-07-19 21:55:39 UTC (rev 20529)
@@ -41,9 +41,455 @@
#include "mesh_constants_cuda.h"
#include "prepare_constants_cuda.h"
+/* ----------------------------------------------------------------------------------------------- */
+// Helper functions
+
/* ----------------------------------------------------------------------------------------------- */
+double get_time()
+{
+ struct timeval t;
+ struct timezone tzp;
+ gettimeofday(&t, &tzp);
+ return t.tv_sec + t.tv_usec*1e-6;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)() {
+ TRACE("pause_for_debug");
+
+ pause_for_debugger(1);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void pause_for_debugger(int pause) {
+ if(pause) {
+ int myrank;
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+#else
+ myrank = 0;
+#endif
+ printf("I'm rank %d\n",myrank);
+ int i = 0;
+ char hostname[256];
+ gethostname(hostname, sizeof(hostname));
+ printf("PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
+ FILE *file = fopen("./attach_gdb.txt","w+");
+ if (file != NULL){
+ fprintf(file,"PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
+ fclose(file);
+ }
+ fflush(stdout);
+ while (0 == i)
+ sleep(5);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void exit_on_cuda_error(char* kernel_name) {
+ // sync and check to catch errors from previous async operations
+ cudaThreadSynchronize();
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess){
+ fprintf(stderr,"Error after %s: %s\n", kernel_name, cudaGetErrorString(err));
+
+ //debugging
+ //pause_for_debugger(0);
+
+ // outputs error file
+ FILE* fp;
+ int myrank;
+ char filename[BUFSIZ];
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+#else
+ myrank = 0;
+#endif
+ sprintf(filename,"../in_out_files/OUTPUT_FILES/error_message_%06d.txt",myrank);
+ fp = fopen(filename,"a+");
+ if (fp != NULL){
+ fprintf(fp,"Error after %s: %s\n", kernel_name, cudaGetErrorString(err));
+ fclose(fp);
+ }
+
+ // stops program
+ //free(kernel_name);
+#ifdef WITH_MPI
+ MPI_Abort(MPI_COMM_WORLD,1);
+#endif
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void exit_on_error(char* info) {
+ printf("\nERROR: %s\n",info);
+ fflush(stdout);
+
+ // outputs error file
+ FILE* fp;
+ int myrank;
+ char filename[BUFSIZ];
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+#else
+ myrank = 0;
+#endif
+ sprintf(filename,"../in_out_files/OUTPUT_FILES/error_message_%06d.txt",myrank);
+ fp = fopen(filename,"a+");
+ if (fp != NULL){
+ fprintf(fp,"ERROR: %s\n",info);
+ fclose(fp);
+ }
+
+ // stops program
+#ifdef WITH_MPI
+ MPI_Abort(MPI_COMM_WORLD,1);
+#endif
+ //free(info);
+ exit(EXIT_FAILURE);
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void print_CUDA_error_if_any(cudaError_t err, int num) {
+ if (cudaSuccess != err)
+ {
+ printf("\nCUDA error !!!!! <%s> !!!!! \nat CUDA call error code: # %d\n",cudaGetErrorString(err),num);
+ fflush(stdout);
+
+ // outputs error file
+ FILE* fp;
+ int myrank;
+ char filename[BUFSIZ];
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+#else
+ myrank = 0;
+#endif
+ sprintf(filename,"../in_out_files/OUTPUT_FILES/error_message_%06d.txt",myrank);
+ fp = fopen(filename,"a+");
+ if (fp != NULL){
+ fprintf(fp,"\nCUDA error !!!!! <%s> !!!!! \nat CUDA call error code: # %d\n",cudaGetErrorString(err),num);
+ fclose(fp);
+ }
+
+ // stops program
+#ifdef WITH_MPI
+ MPI_Abort(MPI_COMM_WORLD,1);
+#endif
+ exit(EXIT_FAILURE);
+ }
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void get_free_memory(double* free_db, double* used_db, double* total_db) {
+
+ // gets memory usage in byte
+ size_t free_byte ;
+ size_t total_byte ;
+ cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
+ if ( cudaSuccess != cuda_status ){
+ printf("Error: cudaMemGetInfo fails, %s \n", cudaGetErrorString(cuda_status) );
+ exit(EXIT_FAILURE);
+ }
+
+ *free_db = (double)free_byte ;
+ *total_db = (double)total_byte ;
+ *used_db = *total_db - *free_db ;
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Saves GPU memory usage to file
+void output_free_memory(int myrank,char* info_str) {
+
+ FILE* fp;
+ char filename[BUFSIZ];
+ double free_db,used_db,total_db;
+
+ get_free_memory(&free_db,&used_db,&total_db);
+
+ sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_device_mem_usage_proc_%06d.txt",myrank);
+ fp = fopen(filename,"a+");
+ if (fp != NULL){
+ fprintf(fp,"%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+ fclose(fp);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Fortran-callable version of above method
+extern "C"
+void FC_FUNC_(output_free_device_memory,
+ OUTPUT_FREE_DEVICE_MEMORY)(int* myrank) {
+ TRACE("output_free_device_memory");
+
+ char info[6];
+ sprintf(info,"f %d:",*myrank);
+ output_free_memory(*myrank,info);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/*
+ void show_free_memory(char* info_str) {
+
+ // show memory usage of GPU
+ int myrank;
+ #ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+ #else
+ myrank = 0;
+ #endif
+ double free_db,used_db,total_db;
+
+ get_free_memory(&free_db,&used_db,&total_db);
+
+ printf("%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+
+ }
+ */
+
+/*
+ extern "C"
+ void FC_FUNC_(show_free_device_memory,
+ SHOW_FREE_DEVICE_MEMORY)() {
+ TRACE("show_free_device_memory");
+
+ show_free_memory("from fortran");
+ }
+ */
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(get_free_device_memory,
+ get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {
+ TRACE("get_free_device_memory");
+
+ double free_db,used_db,total_db;
+
+ get_free_memory(&free_db,&used_db,&total_db);
+
+ // converts to MB
+ *free = (realw) free_db/1024.0/1024.0;
+ *used = (realw) used_db/1024.0/1024.0;
+ *total = (realw) total_db/1024.0/1024.0;
+ return;
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+//daniel: helper function
+/*
+ __global__ void check_phase_ispec_kernel(int num_phase_ispec,
+ int* phase_ispec,
+ int NSPEC_AB,
+ int* ier) {
+
+ int i,ispec,iphase,count0,count1;
+ *ier = 0;
+
+ for(iphase=0; iphase < 2; iphase++){
+ count0 = 0;
+ count1 = 0;
+
+ for(i=0; i < num_phase_ispec; i++){
+ ispec = phase_ispec[iphase*num_phase_ispec + i] - 1;
+ if( ispec < -1 || ispec >= NSPEC_AB ){
+ printf("Error in d_phase_ispec_inner_elastic %d %d\n",i,ispec);
+ *ier = 1;
+ return;
+ }
+ if( ispec >= 0 ){ count0++;}
+ if( ispec < 0 ){ count1++;}
+ }
+
+ printf("check_phase_ispec done: phase %d, count = %d %d \n",iphase,count0,count1);
+
+ }
+ }
+
+ void check_phase_ispec(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check phase_ispec for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 1 ){
+ check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_elastic,
+ mp->d_phase_ispec_inner_elastic,
+ mp->NSPEC_AB,
+ d_debug);
+ }else if( type == 2 ){
+ check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_acoustic,
+ mp->d_phase_ispec_inner_acoustic,
+ mp->NSPEC_AB,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+ #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_phase_ispec");
+ #endif
+
+ }
+ */
+
+/* ----------------------------------------------------------------------------------------------- */
+//daniel: helper function
+/*
+ __global__ void check_ispec_is_kernel(int NSPEC_AB,
+ int* ispec_is,
+ int* ier) {
+
+ int ispec,count0,count1;
+
+ *ier = 0;
+ count0 = 0;
+ count1 = 0;
+ for(ispec=0; ispec < NSPEC_AB; ispec++){
+ if( ispec_is[ispec] < -1 || ispec_is[ispec] > 1 ){
+ printf("Error in ispec_is %d %d\n",ispec,ispec_is[ispec]);
+ *ier = 1;
+ return;
+ //exit(1);
+ }
+ if( ispec_is[ispec] == 0 ){count0++;}
+ if( ispec_is[ispec] != 0 ){count1++;}
+ }
+ printf("check_ispec_is done: count = %d %d\n",count0,count1);
+ }
+
+ void check_ispec_is(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check ispec_is for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 0 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_inner,
+ d_debug);
+ }else if( type == 1 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_elastic,
+ d_debug);
+ }else if( type == 2 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_acoustic,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+ #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_ispec_is");
+ #endif
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+//daniel: helper function
+/*
+ __global__ void check_array_ispec_kernel(int num_array_ispec,
+ int* array_ispec,
+ int NSPEC_AB,
+ int* ier) {
+
+ int i,ispec,count0,count1;
+
+ *ier = 0;
+ count0 = 0;
+ count1 = 0;
+
+ for(i=0; i < num_array_ispec; i++){
+ ispec = array_ispec[i] - 1;
+ if( ispec < -1 || ispec >= NSPEC_AB ){
+ printf("Error in d_array_ispec %d %d\n",i,ispec);
+ *ier = 1;
+ return;
+ }
+ if( ispec >= 0 ){ count0++;}
+ if( ispec < 0 ){ count1++;}
+ }
+
+ printf("check_array_ispec done: count = %d %d \n",count0,count1);
+ }
+
+ void check_array_ispec(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check array_ispec for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 1 ){
+ check_array_ispec_kernel<<<grid,threads>>>(mp->d_num_abs_boundary_faces,
+ mp->d_abs_boundary_ispec,
+ mp->NSPEC_AB,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+ #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_array_ispec");
+ #endif
+
+ }
+ */
+
+/* ----------------------------------------------------------------------------------------------- */
+
// Check functions
/* ----------------------------------------------------------------------------------------------- */
@@ -308,8 +754,8 @@
// load shared mem
unsigned int tid = threadIdx.x;
unsigned int bx = blockIdx.y*gridDim.x+blockIdx.x;
- unsigned int i = tid + bx*blockDim.x;
-
+ unsigned int i = tid + bx*blockDim.x;
+
// loads absolute values into shared memory
sdata[tid] = (i < size) ? fabs(array[i]) : 0.0 ;
@@ -347,7 +793,7 @@
realw max;
realw *d_max;
- max = 0;
+ max = 0.0;
/* way 1 : timing Elapsed time: 8.464813e-03
realw* h_array;
@@ -383,14 +829,16 @@
// launch simple reduction kernel
realw* h_max;
int blocksize = BLOCKSIZE_TRANSFER;
- int size_padded = ((int)ceil(((double)mp->NGLOB_AB)/((double)blocksize)))*blocksize;
- int num_blocks_x = size_padded/blocksize;
+
+ int size = mp->NGLOB_AB;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
-
+
//printf("num_blocks_x %i \n",num_blocks_x);
h_max = (realw*) calloc(num_blocks_x*num_blocks_y,sizeof(realw));
@@ -400,17 +848,11 @@
dim3 threads(blocksize,1,1);
if(*SIMULATION_TYPE == 1 ){
- get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
- mp->NGLOB_AB,
- d_max);
+ get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,size,d_max);
+ }else if(*SIMULATION_TYPE == 3 ){
+ get_maximum_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,size,d_max);
}
- if(*SIMULATION_TYPE == 3 ){
- get_maximum_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
- mp->NGLOB_AB,
- d_max);
- }
-
print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*num_blocks_y*sizeof(realw),
cudaMemcpyDeviceToHost),222);
@@ -483,7 +925,7 @@
// load shared mem
unsigned int tid = threadIdx.x;
unsigned int bx = blockIdx.y*gridDim.x+blockIdx.x;
- unsigned int i = tid + bx*blockDim.x;
+ unsigned int i = tid + bx*blockDim.x;
// loads values into shared memory: assume array is a vector array
sdata[tid] = (i < size) ? sqrt(array[i*3]*array[i*3]
@@ -529,14 +971,16 @@
// launch simple reduction kernel
realw* h_max;
int blocksize = BLOCKSIZE_TRANSFER;
- int size_padded = ((int)ceil(((double)mp->NGLOB_AB)/((double)blocksize)))*blocksize;
- int num_blocks_x = size_padded/blocksize;
+
+ int size = mp->NGLOB_AB;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
-
+
//printf("num_blocks_x %i \n",num_blocks_x);
h_max = (realw*) calloc(num_blocks_x*num_blocks_y,sizeof(realw));
cudaMalloc((void**)&d_max,num_blocks_x*num_blocks_y*sizeof(realw));
@@ -545,23 +989,17 @@
dim3 threads(blocksize,1,1);
if(*SIMULATION_TYPE == 1 ){
- get_maximum_vector_kernel<<<grid,threads>>>(mp->d_displ,
- mp->NGLOB_AB,
- d_max);
+ get_maximum_vector_kernel<<<grid,threads>>>(mp->d_displ,size,d_max);
+ }else if(*SIMULATION_TYPE == 3 ){
+ get_maximum_vector_kernel<<<grid,threads>>>(mp->d_b_displ,size,d_max);
}
- if(*SIMULATION_TYPE == 3 ){
- get_maximum_vector_kernel<<<grid,threads>>>(mp->d_b_displ,
- mp->NGLOB_AB,
- d_max);
- }
-
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
exit_on_cuda_error("kernel get_norm_elastic_from_device");
#endif
-
+
print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*num_blocks_y*sizeof(realw),
cudaMemcpyDeviceToHost),222);
@@ -583,5 +1021,3 @@
exit_on_cuda_error("get_norm_elastic_from_device");
#endif
}
-
-
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_acoustic_cuda.cu 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_acoustic_cuda.cu 2012-07-19 21:55:39 UTC (rev 20529)
@@ -185,7 +185,7 @@
// cudaEventCreate(&start);
// cudaEventCreate(&stop);
// cudaEventRecord( start, 0 );
-
+
if( *num_interfaces_ext_mesh == 0 ) return;
// copies buffer onto GPU
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_elastic_cuda.cu 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/compute_forces_elastic_cuda.cu 2012-07-19 21:55:39 UTC (rev 20529)
@@ -150,7 +150,7 @@
// cudaEventDestroy( stop );
// printf("boundary xfer d->h Time: %f ms\n",time);
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("transfer_boun_accel_from_device");
#endif
@@ -170,7 +170,7 @@
Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
if( mp->size_mpi_buffer > 0 ){
-
+
//daniel: todo - check below with this...
int blocksize = BLOCKSIZE_TRANSFER;
int size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
@@ -277,7 +277,7 @@
if( mp->size_mpi_buffer > 0 ){
// copy on host memory
memcpy(mp->h_recv_accel_buffer,buffer_recv_vector_ext_mesh,mp->size_mpi_buffer*sizeof(realw));
-
+
// cudaMemcpyAsync(mp->d_send_accel_buffer, buffer_recv_vector_ext_mesh,
// mp->size_mpi_buffer*sizeof(realw),cudaMemcpyHostToDevice,mp->compute_stream);
//printf("xfer to device\n");
@@ -372,7 +372,7 @@
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
if( mp->size_mpi_buffer > 0 ){
-
+
//daniel: todo - check if this copy is only needed for adjoint simulation, otherwise it is called asynchronously?
if(*FORWARD_OR_ADJOINT == 1 ){
// Wait until previous copy stream finishes. We assemble while other compute kernels execute.
@@ -422,7 +422,7 @@
// cudaEventDestroy( stop );
// printf("Boundary Assemble Kernel Execution Time: %f ms\n",time);
}
-
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
@@ -1636,10 +1636,10 @@
//daniel: we use the same stream, so kernels are executed one after the other
// synchronizes in case we run on only 1 process to avoid race-conditions
//if( mp->NPROC == 1 ){
- // // Wait until previous compute stream finishes.
- // cudaStreamSynchronize(mp->compute_stream);
+ // // Wait until previous compute stream finishes.
+ // cudaStreamSynchronize(mp->compute_stream);
//}
-
+
}
}else{
Added: seismo/3D/SPECFEM3D/trunk/src/cuda/initialize_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/initialize_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/initialize_cuda.cu 2012-07-19 21:55:39 UTC (rev 20529)
@@ -0,0 +1,181 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 1
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and CNRS / INRIA / University of Pau
+ ! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+ ! July 2012
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+#include "prepare_constants_cuda.h"
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// GPU initialization
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(initialize_cuda_device,
+ INITIALIZE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) {
+ TRACE("initialize_cuda_device");
+
+ // Gets rank number of MPI process
+ int myrank = *myrank_f;
+
+ /*
+ // cuda initialization (needs -lcuda library)
+ // note: cuInit initializes the driver API.
+ // it is needed for any following CUDA driver API function call (format cuFUNCTION(..) )
+ // however, for the CUDA runtime API functions (format cudaFUNCTION(..) )
+ // the initialization is implicit, thus cuInit() here would not be needed...
+ CUresult status = cuInit(0);
+ if ( CUDA_SUCCESS != status ) exit_on_error("CUDA driver API device initialization failed\n");
+
+ // returns a handle to the first cuda compute device
+ CUdevice dev;
+ status = cuDeviceGet(&dev, 0);
+ if ( CUDA_SUCCESS != status ) exit_on_error("CUDA device not found\n");
+
+ // gets device properties
+ int major,minor;
+ status = cuDeviceComputeCapability(&major,&minor,dev);
+ if ( CUDA_SUCCESS != status ) exit_on_error("CUDA device information not found\n");
+
+ // make sure that the device has compute capability >= 1.3
+ if (major < 1){
+ fprintf(stderr,"Compute capability major number should be at least 1, got: %d \nexiting...\n",major);
+ exit_on_error("CUDA Compute capability major number should be at least 1\n");
+ }
+ if (major == 1 && minor < 3){
+ fprintf(stderr,"Compute capability should be at least 1.3, got: %d.%d \nexiting...\n",major,minor);
+ exit_on_error("CUDA Compute capability major number should be at least 1.3\n");
+ }
+ */
+
+ // note: from here on we use the runtime API ...
+
+ // Gets number of GPU devices
+ int device_count = 0;
+ cudaGetDeviceCount(&device_count);
+ exit_on_cuda_error("CUDA runtime error: cudaGetDeviceCount failed\ncheck if driver and runtime libraries work together\nexiting...\n");
+
+ // returns device count to fortran
+ if (device_count == 0) exit_on_error("CUDA runtime error: there is no device supporting CUDA\n");
+ *ncuda_devices = device_count;
+
+ // Sets the active device
+ if(device_count >= 1) {
+ // generalized for more GPUs per node
+ // note: without previous context release, cudaSetDevice will complain with the cuda error
+ // "setting the device when a process is active is not allowed"
+ // releases previous contexts
+ cudaThreadExit();
+
+ //printf("rank %d: cuda device count = %d sets device = %d \n",myrank,device_count,myrank % device_count);
+ //MPI_Barrier(MPI_COMM_WORLD);
+
+ // sets active device
+ cudaSetDevice( myrank % device_count );
+ exit_on_cuda_error("cudaSetDevice");
+ }
+
+ // returns a handle to the active device
+ int device;
+ cudaGetDevice(&device);
+
+ // get device properties
+ struct cudaDeviceProp deviceProp;
+ cudaGetDeviceProperties(&deviceProp,device);
+
+ // exit if the machine has no CUDA-enabled device
+ if (deviceProp.major == 9999 && deviceProp.minor == 9999){
+ fprintf(stderr,"No CUDA-enabled device found, exiting...\n\n");
+ exit_on_error("CUDA runtime error: there is no CUDA-enabled device found\n");
+ }
+
+ // outputs device infos to file
+ char filename[BUFSIZ];
+ FILE* fp;
+ sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
+ fp = fopen(filename,"a+");
+ if (fp != NULL){
+ // display device properties
+ fprintf(fp,"Device Name = %s\n",deviceProp.name);
+ fprintf(fp,"multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
+ fprintf(fp,"totalGlobalMem (in MB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f));
+ fprintf(fp,"totalGlobalMem (in GB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f * 1024.f));
+ fprintf(fp,"sharedMemPerBlock (in bytes): %lu\n",(unsigned long) deviceProp.sharedMemPerBlock);
+ fprintf(fp,"Maximum number of threads per block: %d\n",deviceProp.maxThreadsPerBlock);
+ fprintf(fp,"Maximum size of each dimension of a block: %d x %d x %d\n",
+ deviceProp.maxThreadsDim[0],deviceProp.maxThreadsDim[1],deviceProp.maxThreadsDim[2]);
+ fprintf(fp,"Maximum sizes of each dimension of a grid: %d x %d x %d\n",
+ deviceProp.maxGridSize[0],deviceProp.maxGridSize[1],deviceProp.maxGridSize[2]);
+ fprintf(fp,"Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
+ if(deviceProp.canMapHostMemory){
+ fprintf(fp,"canMapHostMemory: TRUE\n");
+ }else{
+ fprintf(fp,"canMapHostMemory: FALSE\n");
+ }
+ if(deviceProp.deviceOverlap){
+ fprintf(fp,"deviceOverlap: TRUE\n");
+ }else{
+ fprintf(fp,"deviceOverlap: FALSE\n");
+ }
+
+ // outputs initial memory infos via cudaMemGetInfo()
+ double free_db,used_db,total_db;
+ get_free_memory(&free_db,&used_db,&total_db);
+ fprintf(fp,"%d: GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank,
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+
+ fclose(fp);
+ }
+
+ // make sure that the device has compute capability >= 1.3
+ if (deviceProp.major < 1){
+ fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
+ exit_on_error("CUDA Compute capability major number should be at least 1\n");
+ }
+ if (deviceProp.major == 1 && deviceProp.minor < 3){
+ fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
+ exit_on_error("CUDA Compute capability major number should be at least 1.3\n");
+ }
+ // we use pinned memory for asynchronous copy
+ if( ! deviceProp.canMapHostMemory){
+ fprintf(stderr,"Device capability should allow to map host memory, exiting...\n");
+ exit_on_error("CUDA Device capability canMapHostMemory should be TRUE\n");
+ }
+}
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/it_update_displacement_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/it_update_displacement_cuda.cu 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/it_update_displacement_cuda.cu 2012-07-19 21:55:39 UTC (rev 20529)
@@ -107,8 +107,8 @@
realw deltat = *deltat_F;
realw deltatsqover2 = *deltatsqover2_F;
realw deltatover2 = *deltatover2_F;
-
- //launch kernel
+
+ //launch kernel
UpdateDispVeloc_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_displ,mp->d_veloc,mp->d_accel,
size,deltat,deltatsqover2,deltatover2);
@@ -124,7 +124,7 @@
realw b_deltat = *b_deltat_F;
realw b_deltatsqover2 = *b_deltatsqover2_F;
realw b_deltatover2 = *b_deltatover2_F;
-
+
UpdateDispVeloc_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_b_displ,mp->d_b_veloc,mp->d_b_accel,
size,b_deltat,b_deltatsqover2,b_deltatover2);
@@ -205,7 +205,7 @@
realw deltat = *deltat_F;
realw deltatsqover2 = *deltatsqover2_F;
realw deltatover2 = *deltatover2_F;
-
+
//launch kernel
UpdatePotential_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_potential_acoustic,
mp->d_potential_dot_acoustic,
@@ -216,7 +216,7 @@
realw b_deltat = *b_deltat_F;
realw b_deltatsqover2 = *b_deltatsqover2_F;
realw b_deltatover2 = *b_deltatover2_F;
-
+
UpdatePotential_kernel<<<grid,threads,0,mp->compute_stream>>>(mp->d_b_potential_acoustic,
mp->d_b_potential_dot_acoustic,
mp->d_b_potential_dot_dot_acoustic,
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/mesh_constants_cuda.h 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/mesh_constants_cuda.h 2012-07-19 21:55:39 UTC (rev 20529)
@@ -73,21 +73,18 @@
// error checking after cuda function calls
//#define ENABLE_VERY_SLOW_ERROR_CHECKING
-// helper functions
+// maximum function
#define MAX(x,y) (((x) < (y)) ? (y) : (x))
+// utility functions: defined in check_fields_cuda.cu
double get_time();
-
+void get_free_memory(double* free_db, double* used_db, double* total_db);
void print_CUDA_error_if_any(cudaError_t err, int num);
-
void pause_for_debugger(int pause);
-
void exit_on_cuda_error(char* kernel_name);
-
void exit_on_error(char* info);
+//void get_blocks_xy(int num_blocks,int* num_blocks_x,int* num_blocks_y);
-void get_blocks_xy(int num_blocks,int* num_blocks_x,int* num_blocks_y);
-
/* ----------------------------------------------------------------------------------------------- */
// cuda constant arrays
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/prepare_mesh_constants_cuda.cu 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/prepare_mesh_constants_cuda.cu 2012-07-19 21:55:39 UTC (rev 20529)
@@ -44,595 +44,11 @@
/* ----------------------------------------------------------------------------------------------- */
-// Helper functions
-
-/* ----------------------------------------------------------------------------------------------- */
-
-double get_time()
-{
- struct timeval t;
- struct timezone tzp;
- gettimeofday(&t, &tzp);
- return t.tv_sec + t.tv_usec*1e-6;
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
-void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)() {
-TRACE("pause_for_debug");
-
- pause_for_debugger(1);
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-void pause_for_debugger(int pause) {
- if(pause) {
- int myrank;
-#ifdef WITH_MPI
- MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
-#else
- myrank = 0;
-#endif
- printf("I'm rank %d\n",myrank);
- int i = 0;
- char hostname[256];
- gethostname(hostname, sizeof(hostname));
- printf("PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
- FILE *file = fopen("./attach_gdb.txt","w+");
- if (file != NULL){
- fprintf(file,"PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
- fclose(file);
- }
- fflush(stdout);
- while (0 == i)
- sleep(5);
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-void exit_on_cuda_error(char* kernel_name) {
- // sync and check to catch errors from previous async operations
- cudaThreadSynchronize();
- cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess){
- fprintf(stderr,"Error after %s: %s\n", kernel_name, cudaGetErrorString(err));
-
- //debugging
- //pause_for_debugger(0);
-
- // outputs error file
- FILE* fp;
- int myrank;
- char filename[BUFSIZ];
-#ifdef WITH_MPI
- MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
-#else
- myrank = 0;
-#endif
- sprintf(filename,"../in_out_files/OUTPUT_FILES/error_message_%06d.txt",myrank);
- fp = fopen(filename,"a+");
- if (fp != NULL){
- fprintf(fp,"Error after %s: %s\n", kernel_name, cudaGetErrorString(err));
- fclose(fp);
- }
-
- // stops program
- //free(kernel_name);
-#ifdef WITH_MPI
- MPI_Abort(MPI_COMM_WORLD,1);
-#endif
- exit(EXIT_FAILURE);
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-void exit_on_error(char* info)
-{
- printf("\nERROR: %s\n",info);
- fflush(stdout);
-
- // outputs error file
- FILE* fp;
- int myrank;
- char filename[BUFSIZ];
-#ifdef WITH_MPI
- MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
-#else
- myrank = 0;
-#endif
- sprintf(filename,"../in_out_files/OUTPUT_FILES/error_message_%06d.txt",myrank);
- fp = fopen(filename,"a+");
- if (fp != NULL){
- fprintf(fp,"ERROR: %s\n",info);
- fclose(fp);
- }
-
- // stops program
-#ifdef WITH_MPI
- MPI_Abort(MPI_COMM_WORLD,1);
-#endif
- //free(info);
- exit(EXIT_FAILURE);
- return;
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-void print_CUDA_error_if_any(cudaError_t err, int num)
-{
- if (cudaSuccess != err)
- {
- printf("\nCUDA error !!!!! <%s> !!!!! \nat CUDA call error code: # %d\n",cudaGetErrorString(err),num);
- fflush(stdout);
-
- // outputs error file
- FILE* fp;
- int myrank;
- char filename[BUFSIZ];
-#ifdef WITH_MPI
- MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
-#else
- myrank = 0;
-#endif
- sprintf(filename,"../in_out_files/OUTPUT_FILES/error_message_%06d.txt",myrank);
- fp = fopen(filename,"a+");
- if (fp != NULL){
- fprintf(fp,"\nCUDA error !!!!! <%s> !!!!! \nat CUDA call error code: # %d\n",cudaGetErrorString(err),num);
- fclose(fp);
- }
-
- // stops program
-#ifdef WITH_MPI
- MPI_Abort(MPI_COMM_WORLD,1);
-#endif
- exit(EXIT_FAILURE);
- }
- return;
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-void get_free_memory(double* free_db, double* used_db, double* total_db) {
-
- // gets memory usage in byte
- size_t free_byte ;
- size_t total_byte ;
- cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
- if ( cudaSuccess != cuda_status ){
- printf("Error: cudaMemGetInfo fails, %s \n", cudaGetErrorString(cuda_status) );
- exit(EXIT_FAILURE);
- }
-
- *free_db = (double)free_byte ;
- *total_db = (double)total_byte ;
- *used_db = *total_db - *free_db ;
- return;
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-// Saves GPU memory usage to file
-void output_free_memory(int myrank,char* info_str) {
-
- FILE* fp;
- char filename[BUFSIZ];
- double free_db,used_db,total_db;
-
- get_free_memory(&free_db,&used_db,&total_db);
-
- sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_device_mem_usage_proc_%06d.txt",myrank);
- fp = fopen(filename,"a+");
- if (fp != NULL){
- fprintf(fp,"%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
- used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
- fclose(fp);
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-// Fortran-callable version of above method
-extern "C"
-void FC_FUNC_(output_free_device_memory,
- OUTPUT_FREE_DEVICE_MEMORY)(int* myrank) {
-TRACE("output_free_device_memory");
-
- char info[6];
- sprintf(info,"f %d:",*myrank);
- output_free_memory(*myrank,info);
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-/*
-void show_free_memory(char* info_str) {
-
- // show memory usage of GPU
- int myrank;
-#ifdef WITH_MPI
- MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
-#else
- myrank = 0;
-#endif
- double free_db,used_db,total_db;
-
- get_free_memory(&free_db,&used_db,&total_db);
-
- printf("%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
- used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
-
-}
-*/
-
-/*
-extern "C"
-void FC_FUNC_(show_free_device_memory,
- SHOW_FREE_DEVICE_MEMORY)() {
- TRACE("show_free_device_memory");
-
- show_free_memory("from fortran");
-}
-*/
-
-/* ----------------------------------------------------------------------------------------------- */
-
-
-extern "C"
-void FC_FUNC_(get_free_device_memory,
- get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {
-TRACE("get_free_device_memory");
-
- double free_db,used_db,total_db;
-
- get_free_memory(&free_db,&used_db,&total_db);
-
- // converts to MB
- *free = (realw) free_db/1024.0/1024.0;
- *used = (realw) used_db/1024.0/1024.0;
- *total = (realw) total_db/1024.0/1024.0;
- return;
-}
-
-
-/* ----------------------------------------------------------------------------------------------- */
-//daniel: helper function
-/*
-__global__ void check_phase_ispec_kernel(int num_phase_ispec,
- int* phase_ispec,
- int NSPEC_AB,
- int* ier) {
-
- int i,ispec,iphase,count0,count1;
- *ier = 0;
-
- for(iphase=0; iphase < 2; iphase++){
- count0 = 0;
- count1 = 0;
-
- for(i=0; i < num_phase_ispec; i++){
- ispec = phase_ispec[iphase*num_phase_ispec + i] - 1;
- if( ispec < -1 || ispec >= NSPEC_AB ){
- printf("Error in d_phase_ispec_inner_elastic %d %d\n",i,ispec);
- *ier = 1;
- return;
- }
- if( ispec >= 0 ){ count0++;}
- if( ispec < 0 ){ count1++;}
- }
-
- printf("check_phase_ispec done: phase %d, count = %d %d \n",iphase,count0,count1);
-
- }
-}
-
-void check_phase_ispec(long* Mesh_pointer_f,int type){
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- printf("check phase_ispec for type=%d\n",type);
-
- dim3 grid(1,1);
- dim3 threads(1,1,1);
-
- int* h_debug = (int*) calloc(1,sizeof(int));
- int* d_debug;
- cudaMalloc((void**)&d_debug,sizeof(int));
-
- if( type == 1 ){
- check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_elastic,
- mp->d_phase_ispec_inner_elastic,
- mp->NSPEC_AB,
- d_debug);
- }else if( type == 2 ){
- check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_acoustic,
- mp->d_phase_ispec_inner_acoustic,
- mp->NSPEC_AB,
- d_debug);
- }
-
- cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
- cudaFree(d_debug);
- if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
- free(h_debug);
- fflush(stdout);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("check_phase_ispec");
-#endif
-
-}
-*/
-
-/* ----------------------------------------------------------------------------------------------- */
-//daniel: helper function
-/*
-__global__ void check_ispec_is_kernel(int NSPEC_AB,
- int* ispec_is,
- int* ier) {
-
- int ispec,count0,count1;
-
- *ier = 0;
- count0 = 0;
- count1 = 0;
- for(ispec=0; ispec < NSPEC_AB; ispec++){
- if( ispec_is[ispec] < -1 || ispec_is[ispec] > 1 ){
- printf("Error in ispec_is %d %d\n",ispec,ispec_is[ispec]);
- *ier = 1;
- return;
- //exit(1);
- }
- if( ispec_is[ispec] == 0 ){count0++;}
- if( ispec_is[ispec] != 0 ){count1++;}
- }
- printf("check_ispec_is done: count = %d %d\n",count0,count1);
-}
-
-void check_ispec_is(long* Mesh_pointer_f,int type){
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- printf("check ispec_is for type=%d\n",type);
-
- dim3 grid(1,1);
- dim3 threads(1,1,1);
-
- int* h_debug = (int*) calloc(1,sizeof(int));
- int* d_debug;
- cudaMalloc((void**)&d_debug,sizeof(int));
-
- if( type == 0 ){
- check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
- mp->d_ispec_is_inner,
- d_debug);
- }else if( type == 1 ){
- check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
- mp->d_ispec_is_elastic,
- d_debug);
- }else if( type == 2 ){
- check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
- mp->d_ispec_is_acoustic,
- d_debug);
- }
-
- cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
- cudaFree(d_debug);
- if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
- free(h_debug);
- fflush(stdout);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("check_ispec_is");
-#endif
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-//daniel: helper function
-/*
-__global__ void check_array_ispec_kernel(int num_array_ispec,
- int* array_ispec,
- int NSPEC_AB,
- int* ier) {
-
- int i,ispec,count0,count1;
-
- *ier = 0;
- count0 = 0;
- count1 = 0;
-
- for(i=0; i < num_array_ispec; i++){
- ispec = array_ispec[i] - 1;
- if( ispec < -1 || ispec >= NSPEC_AB ){
- printf("Error in d_array_ispec %d %d\n",i,ispec);
- *ier = 1;
- return;
- }
- if( ispec >= 0 ){ count0++;}
- if( ispec < 0 ){ count1++;}
- }
-
- printf("check_array_ispec done: count = %d %d \n",count0,count1);
-}
-
-void check_array_ispec(long* Mesh_pointer_f,int type){
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- printf("check array_ispec for type=%d\n",type);
-
- dim3 grid(1,1);
- dim3 threads(1,1,1);
-
- int* h_debug = (int*) calloc(1,sizeof(int));
- int* d_debug;
- cudaMalloc((void**)&d_debug,sizeof(int));
-
- if( type == 1 ){
- check_array_ispec_kernel<<<grid,threads>>>(mp->d_num_abs_boundary_faces,
- mp->d_abs_boundary_ispec,
- mp->NSPEC_AB,
- d_debug);
- }
-
- cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
- cudaFree(d_debug);
- if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
- free(h_debug);
- fflush(stdout);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("check_array_ispec");
-#endif
-
-}
-*/
-
-/* ----------------------------------------------------------------------------------------------- */
-
// GPU preparation
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(prepare_cuda_device,
- PREPARE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) {
- TRACE("prepare_cuda_device");
-
- // Gets rank number of MPI process
- int myrank = *myrank_f;
-
-/*
- // cuda initialization (needs -lcuda library)
- // note: cuInit initializes the driver API.
- // it is needed for any following CUDA driver API function call (format cuFUNCTION(..) )
- // however, for the CUDA runtime API functions (format cudaFUNCTION(..) )
- // the initialization is implicit, thus cuInit() here would not be needed...
- CUresult status = cuInit(0);
- if ( CUDA_SUCCESS != status ) exit_on_error("CUDA driver API device initialization failed\n");
-
- // returns a handle to the first cuda compute device
- CUdevice dev;
- status = cuDeviceGet(&dev, 0);
- if ( CUDA_SUCCESS != status ) exit_on_error("CUDA device not found\n");
-
- // gets device properties
- int major,minor;
- status = cuDeviceComputeCapability(&major,&minor,dev);
- if ( CUDA_SUCCESS != status ) exit_on_error("CUDA device information not found\n");
-
- // make sure that the device has compute capability >= 1.3
- if (major < 1){
- fprintf(stderr,"Compute capability major number should be at least 1, got: %d \nexiting...\n",major);
- exit_on_error("CUDA Compute capability major number should be at least 1\n");
- }
- if (major == 1 && minor < 3){
- fprintf(stderr,"Compute capability should be at least 1.3, got: %d.%d \nexiting...\n",major,minor);
- exit_on_error("CUDA Compute capability major number should be at least 1.3\n");
- }
-*/
-
- // note: from here on we use the runtime API ...
-
- // Gets number of GPU devices
- int device_count = 0;
- cudaGetDeviceCount(&device_count);
- exit_on_cuda_error("CUDA runtime error: cudaGetDeviceCount failed\ncheck if driver and runtime libraries work together\nexiting...\n");
-
- // returns device count to fortran
- if (device_count == 0) exit_on_error("CUDA runtime error: there is no device supporting CUDA\n");
- *ncuda_devices = device_count;
-
- // Sets the active device
- if(device_count >= 1) {
- // generalized for more GPUs per node
- // note: without previous context release, cudaSetDevice will complain with the cuda error
- // "setting the device when a process is active is not allowed"
- // releases previous contexts
- cudaThreadExit();
-
- //printf("rank %d: cuda device count = %d sets device = %d \n",myrank,device_count,myrank % device_count);
- //MPI_Barrier(MPI_COMM_WORLD);
-
- // sets active device
- cudaSetDevice( myrank % device_count );
- exit_on_cuda_error("cudaSetDevice");
- }
-
- // returns a handle to the active device
- int device;
- cudaGetDevice(&device);
-
- // get device properties
- struct cudaDeviceProp deviceProp;
- cudaGetDeviceProperties(&deviceProp,device);
-
- // exit if the machine has no CUDA-enabled device
- if (deviceProp.major == 9999 && deviceProp.minor == 9999){
- fprintf(stderr,"No CUDA-enabled device found, exiting...\n\n");
- exit_on_error("CUDA runtime error: there is no CUDA-enabled device found\n");
- }
-
- // outputs device infos to file
- char filename[BUFSIZ];
- FILE* fp;
- sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
- fp = fopen(filename,"a+");
- if (fp != NULL){
- // display device properties
- fprintf(fp,"Device Name = %s\n",deviceProp.name);
- fprintf(fp,"multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
- fprintf(fp,"totalGlobalMem (in MB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f));
- fprintf(fp,"totalGlobalMem (in GB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f * 1024.f));
- fprintf(fp,"sharedMemPerBlock (in bytes): %lu\n",(unsigned long) deviceProp.sharedMemPerBlock);
- fprintf(fp,"Maximum number of threads per block: %d\n",deviceProp.maxThreadsPerBlock);
- fprintf(fp,"Maximum size of each dimension of a block: %d x %d x %d\n",
- deviceProp.maxThreadsDim[0],deviceProp.maxThreadsDim[1],deviceProp.maxThreadsDim[2]);
- fprintf(fp,"Maximum sizes of each dimension of a grid: %d x %d x %d\n",
- deviceProp.maxGridSize[0],deviceProp.maxGridSize[1],deviceProp.maxGridSize[2]);
- fprintf(fp,"Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
- if(deviceProp.canMapHostMemory){
- fprintf(fp,"canMapHostMemory: TRUE\n");
- }else{
- fprintf(fp,"canMapHostMemory: FALSE\n");
- }
- if(deviceProp.deviceOverlap){
- fprintf(fp,"deviceOverlap: TRUE\n");
- }else{
- fprintf(fp,"deviceOverlap: FALSE\n");
- }
-
- // outputs initial memory infos via cudaMemGetInfo()
- double free_db,used_db,total_db;
- get_free_memory(&free_db,&used_db,&total_db);
- fprintf(fp,"%d: GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank,
- used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
-
- fclose(fp);
- }
-
- // make sure that the device has compute capability >= 1.3
- if (deviceProp.major < 1){
- fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
- exit_on_error("CUDA Compute capability major number should be at least 1\n");
- }
- if (deviceProp.major == 1 && deviceProp.minor < 3){
- fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
- exit_on_error("CUDA Compute capability major number should be at least 1.3\n");
- }
- // we use pinned memory for asynchronous copy
- if( ! deviceProp.canMapHostMemory){
- fprintf(stderr,"Device capability should allow to map host memory, exiting...\n");
- exit_on_error("CUDA Device capability canMapHostMemory should be TRUE\n");
- }
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
void FC_FUNC_(prepare_constants_device,
PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
int* h_NGLLX,
@@ -793,7 +209,7 @@
// device
int size_mpi_buffer = 3 * (mp->num_interfaces_ext_mesh) * (mp->max_nibool_interfaces_ext_mesh);
// send buffer
- mp->size_mpi_buffer = size_mpi_buffer;
+ mp->size_mpi_buffer = size_mpi_buffer;
if( mp->size_mpi_buffer > 0 ){
print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_send_accel_buffer),sizeof(float)*(mp->size_mpi_buffer)),8004);
mp->send_buffer = (float*)malloc((mp->size_mpi_buffer)*sizeof(float));
@@ -803,7 +219,7 @@
// receive buffer
print_CUDA_error_if_any(cudaMallocHost((void**)&(mp->h_recv_accel_buffer),sizeof(float)*(mp->size_mpi_buffer)),8004);
- mp->recv_buffer = (float*)malloc((mp->size_mpi_buffer)*sizeof(float));
+ mp->recv_buffer = (float*)malloc((mp->size_mpi_buffer)*sizeof(float));
}
//daniel: check if needed
@@ -812,16 +228,16 @@
//mp->request_send_vector_ext_mesh = request_send_vector_ext_mesh;
//mp->request_recv_vector_ext_mesh = request_recv_vector_ext_mesh;
//mp->buffer_recv_vector_ext_mesh = buffer_recv_vector_ext_mesh;
-
+
// setup two streams, one for compute and one for host<->device memory copies
// compute stream
cudaStreamCreate(&mp->compute_stream);
// copy stream (needed to transfer mpi buffers)
if( mp->size_mpi_buffer > 0 ){
cudaStreamCreate(&mp->copy_stream);
- //cudaStreamCreate(&mp->b_copy_stream);
+ //cudaStreamCreate(&mp->b_copy_stream);
}
-
+
// inner elements
print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,mp->NSPEC_AB*sizeof(int)),1205);
print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_inner, h_ispec_is_inner,
@@ -968,7 +384,7 @@
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_potential_dot_dot_buffer),
(mp->max_nibool_interfaces_ext_mesh)*(mp->num_interfaces_ext_mesh)*sizeof(realw)),2004);
}
-
+
// mass matrix
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(realw)*size_glob),2005);
print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_acoustic,rmass_acoustic,
@@ -1210,7 +626,7 @@
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer),
mp->size_mpi_buffer*sizeof(realw)),4004);
}
-
+
// mass matrix
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass),sizeof(realw)*mp->NGLOB_AB),4005);
// transfer element data
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-07-19 21:55:39 UTC (rev 20529)
@@ -5,9 +5,9 @@
! ---------------------------------------
!
! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-! July 2012
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
!
! This program is free software; you can redistribute it and/or modify
! it under the terms of the GNU General Public License as published by
@@ -39,6 +39,17 @@
// src/cuda/check_fields_cuda.cu
//
+void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)() {}
+
+void FC_FUNC_(output_free_device_memory,
+ OUTPUT_FREE_DEVICE_MEMORY)(int* myrank) {}
+
+ void FC_FUNC_(show_free_device_memory,
+ SHOW_FREE_DEVICE_MEMORY)() {}
+
+void FC_FUNC_(get_free_device_memory,
+ get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {}
+
void FC_FUNC_(check_max_norm_displ_gpu,
CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {}
@@ -76,8 +87,8 @@
void FC_FUNC_(get_norm_elastic_from_device,
GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
- long* Mesh_pointer_f,
- int* SIMULATION_TYPE) {}
+ long* Mesh_pointer_f,
+ int* SIMULATION_TYPE) {}
//
@@ -353,6 +364,17 @@
//
+// src/cuda/initialize_cuda.cu
+//
+
+void FC_FUNC_(initialize_cuda_device,
+ INITIALIZE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) {
+ fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
+ exit(1);
+}
+
+
+//
// src/cuda/it_update_displacement_cuda.cu
//
@@ -407,23 +429,6 @@
// src/cuda/prepare_mesh_constants_cuda.cu
//
-void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)() {}
-
-void FC_FUNC_(output_free_device_memory,
- OUTPUT_FREE_DEVICE_MEMORY)(int* myrank) {}
-
-void FC_FUNC_(show_free_device_memory,
- SHOW_FREE_DEVICE_MEMORY)() {}
-
-void FC_FUNC_(get_free_device_memory,
- get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {}
-
-void FC_FUNC_(prepare_cuda_device,
- PREPARE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) {
- fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
- exit(1);
-}
-
void FC_FUNC_(prepare_constants_device,
PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
int* h_NGLLX,
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c.bak
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c.bak 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c.bak 2012-07-19 21:55:39 UTC (rev 20529)
@@ -5,9 +5,9 @@
! ---------------------------------------
!
! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
+! Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+! July 2012
!
! This program is free software; you can redistribute it and/or modify
! it under the terms of the GNU General Public License as published by
Modified: seismo/3D/SPECFEM3D/trunk/src/cuda/write_seismograms_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/cuda/write_seismograms_cuda.cu 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/cuda/write_seismograms_cuda.cu 2012-07-19 21:55:39 UTC (rev 20529)
@@ -55,6 +55,7 @@
*/
+/* ----------------------------------------------------------------------------------------------- */
// Initially sets the blocks_x to be the num_blocks, and adds rows as
// needed. If an additional row is added, the row length is cut in
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_regions_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_regions_mesh.f90 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/create_regions_mesh.f90 2012-07-19 21:55:39 UTC (rev 20529)
@@ -334,53 +334,10 @@
write(IMAIN,*) ' ...saving databases'
endif
!call create_name_database(prname,myrank,LOCAL_PATH)
- call save_arrays_solver_ext_mesh(nspec,nglob_dummy, &
-! xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore,&
-! gammaxstore,gammaystore,gammazstore, &
-! jacobianstore, rho_vp,rho_vs,qmu_attenuation_store, &
-! rhostore,kappastore,mustore, &
-! rhoarraystore,kappaarraystore,etastore,phistore,tortstore,permstore, &
-! rho_vpI,rho_vpII,rho_vsI, &
-! rmass,rmass_acoustic,rmass_solid_poroelastic,rmass_fluid_poroelastic, &
- OCEANS, &
-! rmass_ocean_load,NGLOB_OCEAN, &
- ibool, &
-! xstore_dummy,ystore_dummy,zstore_dummy, &
-! abs_boundary_normal,abs_boundary_jacobian2Dw, &
-! abs_boundary_ijk,abs_boundary_ispec,num_abs_boundary_faces, &
-! free_surface_normal,free_surface_jacobian2Dw, &
-! free_surface_ijk,free_surface_ispec, &
-! num_free_surface_faces, &
-! coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
-! coupling_ac_el_ijk,coupling_ac_el_ispec, &
-! num_coupling_ac_el_faces, &
-! coupling_ac_po_normal,coupling_ac_po_jacobian2Dw, &
-! coupling_ac_po_ijk,coupling_ac_po_ispec, &
-! num_coupling_ac_po_faces, &
-! coupling_el_po_normal,coupling_el_po_jacobian2Dw, &
-! coupling_el_po_ijk,coupling_po_el_ijk,coupling_el_po_ispec, &
-! coupling_po_el_ispec,num_coupling_el_po_faces, &
+ call save_arrays_solver_ext_mesh(nspec,nglob_dummy,OCEANS,ibool, &
num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
max_interface_size_ext_mesh,ibool_interfaces_ext_mesh, &
-! prname, &
- SAVE_MESH_FILES, &
- ANISOTROPY &
-! NSPEC_ANISO, &
-! c11store,c12store,c13store,c14store,c15store,c16store, &
-! c22store,c23store,c24store,c25store,c26store,c33store, &
-! c34store,c35store,c36store,c44store,c45store,c46store, &
-! c55store,c56store,c66store, &
-! ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic, &
-! ispec_is_inner,nspec_inner_acoustic,nspec_inner_elastic,nspec_inner_poroelastic, &
-! nspec_outer_acoustic,nspec_outer_elastic,nspec_outer_poroelastic, &
-! num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
-! num_phase_ispec_elastic,phase_ispec_inner_elastic, &
-! num_phase_ispec_poroelastic,phase_ispec_inner_poroelastic, &
-! num_colors_outer_acoustic,num_colors_inner_acoustic, &
-! num_elem_colors_acoustic, &
-! num_colors_outer_elastic,num_colors_inner_elastic, &
-! num_elem_colors_elastic, &
- )
+ SAVE_MESH_FILES,ANISOTROPY)
! saves moho surface
if( SAVE_MOHO_MESH ) then
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases.f90 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/generate_databases.f90 2012-07-19 21:55:39 UTC (rev 20529)
@@ -933,15 +933,6 @@
endif
call create_regions_mesh()
-! now done inside create_regions_mesh_ext routine...
-! now done inside create_regions_mesh_ext routine...
-! Moho boundary parameters, 2-D jacobians and normals
-! if( SAVE_MOHO_MESH ) then
-! call create_regions_mesh_save_moho(myrank,nglob,NSPEC_AB, &
-! nspec2D_moho_ext,ibelm_moho,nodes_ibelm_moho, &
-! nodes_coords_ext_mesh,nnodes_ext_mesh,ibool )
-! endif
-
! print min and max of topography included
min_elevation = HUGEVAL
max_elevation = -HUGEVAL
Modified: seismo/3D/SPECFEM3D/trunk/src/generate_databases/save_arrays_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/generate_databases/save_arrays_solver.f90 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/generate_databases/save_arrays_solver.f90 2012-07-19 21:55:39 UTC (rev 20529)
@@ -27,181 +27,38 @@
! for external mesh
- subroutine save_arrays_solver_ext_mesh(nspec,nglob, &
-! xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, &
-! gammaxstore,gammaystore,gammazstore, &
-! jacobianstore, rho_vp,rho_vs,qmu_attenuation_store, &
-! rhostore,kappastore,mustore, &
-! rhoarraystore,kappaarraystore,etastore,phistore,tortstore,permstore, &
-! rho_vpI,rho_vpII,rho_vsI, &
-! rmass,rmass_acoustic,rmass_solid_poroelastic,rmass_fluid_poroelastic, &
- OCEANS, &
-! rmass_ocean_load,NGLOB_OCEAN,&
- ibool, &
-! xstore_dummy,ystore_dummy,zstore_dummy, &
-! abs_boundary_normal,abs_boundary_jacobian2Dw, &
-! abs_boundary_ijk,abs_boundary_ispec, &
-! num_abs_boundary_faces, &
-! free_surface_normal,free_surface_jacobian2Dw, &
-! free_surface_ijk,free_surface_ispec, &
-! num_free_surface_faces, &
-! coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
-! coupling_ac_el_ijk,coupling_ac_el_ispec, &
-! num_coupling_ac_el_faces, &
-! coupling_ac_po_normal,coupling_ac_po_jacobian2Dw, &
-! coupling_ac_po_ijk,coupling_ac_po_ispec, &
-! num_coupling_ac_po_faces, &
-! coupling_el_po_normal,coupling_el_po_jacobian2Dw, &
-! coupling_el_po_ijk,coupling_po_el_ijk,coupling_el_po_ispec, &
-! coupling_po_el_ispec,num_coupling_el_po_faces, &
+ subroutine save_arrays_solver_ext_mesh(nspec,nglob,OCEANS,ibool, &
num_interfaces_ext_mesh,my_neighbours_ext_mesh,nibool_interfaces_ext_mesh, &
max_interface_size_ext_mesh,ibool_interfaces_ext_mesh, &
-! prname, &
- SAVE_MESH_FILES, &
- ANISOTROPY &
-! NSPEC_ANISO, &
-! c11store,c12store,c13store,c14store,c15store,c16store, &
-! c22store,c23store,c24store,c25store,c26store,c33store, &
-! c34store,c35store,c36store,c44store,c45store,c46store, &
-! c55store,c56store,c66store, &
-! ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic, &
-! ispec_is_inner,nspec_inner_acoustic,nspec_inner_elastic,nspec_inner_poroelastic, &
-! nspec_outer_acoustic,nspec_outer_elastic,nspec_outer_poroelastic, &
-! num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
-! num_phase_ispec_elastic,phase_ispec_inner_elastic, &
-! num_phase_ispec_poroelastic,phase_ispec_inner_poroelastic, &
-! num_colors_outer_acoustic,num_colors_inner_acoustic, &
-! num_elem_colors_acoustic, &
-! num_colors_outer_elastic,num_colors_inner_elastic, &
-! num_elem_colors_elastic, &
- )
+ SAVE_MESH_FILES,ANISOTROPY)
use create_regions_mesh_ext_par
implicit none
-! include "constants.h"
-
integer :: nspec,nglob
-
-! jacobian
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: xixstore,xiystore,xizstore, &
-! etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore,jacobianstore
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rho_vp,rho_vs
-
-! attenuation
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: qmu_attenuation_store
-
-! material
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rhostore,kappastore,mustore
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: etastore,phistore,tortstore
-! real(kind=CUSTOM_REAL), dimension(2,NGLLX,NGLLY,NGLLZ,nspec) :: rhoarraystore
-! real(kind=CUSTOM_REAL), dimension(3,NGLLX,NGLLY,NGLLZ,nspec) :: kappaarraystore
-! real(kind=CUSTOM_REAL), dimension(6,NGLLX,NGLLY,NGLLZ,nspec) :: permstore
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rho_vpI,rho_vpII,rho_vsI
-! real(kind=CUSTOM_REAL), dimension(nglob) :: rmass,rmass_acoustic, &
-! rmass_solid_poroelastic,rmass_fluid_poroelastic
-! ocean load
+ ! ocean load
logical :: OCEANS
-! integer :: NGLOB_OCEAN
-! real(kind=CUSTOM_REAL),dimension(NGLOB_OCEAN) :: rmass_ocean_load
-
-! mesh coordinates
+ ! mesh coordinates
integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-! real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
-
-! absorbing boundary surface
-! integer :: num_abs_boundary_faces
-! real(kind=CUSTOM_REAL) :: abs_boundary_normal(NDIM,NGLLSQUARE,num_abs_boundary_faces)
-! real(kind=CUSTOM_REAL) :: abs_boundary_jacobian2Dw(NGLLSQUARE,num_abs_boundary_faces)
-! integer :: abs_boundary_ijk(3,NGLLSQUARE,num_abs_boundary_faces)
-! integer :: abs_boundary_ispec(num_abs_boundary_faces)
-
-! free surface
-! integer :: num_free_surface_faces
-! real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
-! real(kind=CUSTOM_REAL) :: free_surface_jacobian2Dw(NGLLSQUARE,num_free_surface_faces)
-! integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
-! integer :: free_surface_ispec(num_free_surface_faces)
-
-! acoustic-elastic coupling surface
-! integer :: num_coupling_ac_el_faces
-! real(kind=CUSTOM_REAL) :: coupling_ac_el_normal(NDIM,NGLLSQUARE,num_coupling_ac_el_faces)
-! real(kind=CUSTOM_REAL) :: coupling_ac_el_jacobian2Dw(NGLLSQUARE,num_coupling_ac_el_faces)
-! integer :: coupling_ac_el_ijk(3,NGLLSQUARE,num_coupling_ac_el_faces)
-! integer :: coupling_ac_el_ispec(num_coupling_ac_el_faces)
-
-! acoustic-poroelastic coupling surface
-! integer :: num_coupling_ac_po_faces
-! real(kind=CUSTOM_REAL) :: coupling_ac_po_normal(NDIM,NGLLSQUARE,num_coupling_ac_po_faces)
-! real(kind=CUSTOM_REAL) :: coupling_ac_po_jacobian2Dw(NGLLSQUARE,num_coupling_ac_po_faces)
-! integer :: coupling_ac_po_ijk(3,NGLLSQUARE,num_coupling_ac_po_faces)
-! integer :: coupling_ac_po_ispec(num_coupling_ac_po_faces)
-
-! elastic-poroelastic coupling surface
-! integer :: num_coupling_el_po_faces
-! real(kind=CUSTOM_REAL) :: coupling_el_po_normal(NDIM,NGLLSQUARE,num_coupling_el_po_faces)
-! real(kind=CUSTOM_REAL) :: coupling_el_po_jacobian2Dw(NGLLSQUARE,num_coupling_el_po_faces)
-! integer :: coupling_el_po_ijk(3,NGLLSQUARE,num_coupling_el_po_faces)
-! integer :: coupling_po_el_ijk(3,NGLLSQUARE,num_coupling_el_po_faces)
-! integer :: coupling_el_po_ispec(num_coupling_el_po_faces)
-! integer :: coupling_po_el_ispec(num_coupling_el_po_faces)
-
-! MPI interfaces
+ ! MPI interfaces
integer :: num_interfaces_ext_mesh
integer, dimension(num_interfaces_ext_mesh) :: my_neighbours_ext_mesh
integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
integer :: max_interface_size_ext_mesh
integer, dimension(NGLLX*NGLLX*max_interface_size_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
- integer :: max_nibool_interfaces_ext_mesh
-! file name
-! character(len=256) prname
logical :: SAVE_MESH_FILES
-
-! anisotropy
logical :: ANISOTROPY
-! integer :: NSPEC_ANISO
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
-! c11store,c12store,c13store,c14store,c15store,c16store, &
-! c22store,c23store,c24store,c25store,c26store,c33store, &
-! c34store,c35store,c36store,c44store,c45store,c46store, &
-! c55store,c56store,c66store
-! material domain flags
-! logical, dimension(nspec) :: ispec_is_acoustic,ispec_is_elastic,ispec_is_poroelastic
-
-! inner/outer elements
-! logical,dimension(nspec) :: ispec_is_inner
-! integer :: nspec_inner_acoustic,nspec_outer_acoustic
-! integer :: nspec_inner_elastic,nspec_outer_elastic
-! integer :: nspec_inner_poroelastic,nspec_outer_poroelastic
-
-! integer :: num_phase_ispec_acoustic
-! integer,dimension(num_phase_ispec_acoustic,2) :: phase_ispec_inner_acoustic
-
-! integer :: num_phase_ispec_elastic
-! integer,dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
-
-! integer :: num_phase_ispec_poroelastic
-! integer,dimension(num_phase_ispec_poroelastic,2) :: phase_ispec_inner_poroelastic
-
- ! mesh coloring
-! integer :: num_colors_outer_acoustic,num_colors_inner_acoustic
-! integer, dimension(num_colors_outer_acoustic + num_colors_inner_acoustic) :: &
-! num_elem_colors_acoustic
-! integer :: num_colors_outer_elastic,num_colors_inner_elastic
-! integer, dimension(num_colors_outer_elastic + num_colors_inner_elastic) :: &
-! num_elem_colors_elastic
-
-! local parameters
+ ! local parameters
real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: v_tmp
integer,dimension(:),allocatable :: v_tmp_i
- !real(kind=CUSTOM_REAL) :: minimum(1)
integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh_dummy
+ integer :: max_nibool_interfaces_ext_mesh
+
integer :: ier,i
-! logical :: ACOUSTIC_SIMULATION,ELASTIC_SIMULATION,POROELASTIC_SIMULATION
character(len=256) :: filename
integer, dimension(:), allocatable :: iglob_tmp
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in 2012-07-19 21:55:39 UTC (rev 20529)
@@ -29,14 +29,6 @@
# @configure_input@
-## example:
-# CUDA_LIBS = -L/apps/eiger/Cuda-4.0/cuda/lib64 -lcuda -lcudart -lcublas
-# CUDA_INC = -I/apps/eiger/Cuda-4.0/cuda/include
-# MPI_INC = -I/apps/eiger/mvapich2/1.5.1p1/mvapich2-gnu/include
-##
-#CUDA_LIBS= -L/u/dpeter/install/cuda/lib64 -lcudart -lcublas
-#MPI_INC= -I/usr/local/openmpi/1.4.3/gcc/x86_64/include
-
# CUDA
# with configure: ./configure --with-cuda CUDA_LIB=.. CUDA_INC=.. MPI_INC=..
@@ -150,6 +142,7 @@
$O/compute_kernels_cuda.cuda.o \
$O/compute_stacey_acoustic_cuda.cuda.o \
$O/compute_stacey_elastic_cuda.cuda.o \
+ $O/initialize_cuda.cuda.o \
$O/it_update_displacement_cuda.cuda.o \
$O/noise_tomography_cuda.cuda.o \
$O/prepare_mesh_constants_cuda.cuda.o \
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_kernels.f90 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_kernels.f90 2012-07-19 21:55:39 UTC (rev 20529)
@@ -287,7 +287,7 @@
+ deltat * dot_product(accelw_poroelastic(:,iglob), b_displw_poroelastic(:,iglob))
! kernel for viscous damping, see e.g. Morency et al. (2009),
- ! equation (42)
+ ! equation (42)
eta_kl(i,j,k,ispec) = eta_kl(i,j,k,ispec) &
+ deltat * dot_product(velocw_poroelastic(:,iglob), b_displw_poroelastic(:,iglob))
@@ -320,7 +320,7 @@
B_kl(i,j,k,ispec) = B_kl(i,j,k,ispec) &
+ deltat * (9 * epsilons_trace_over_3(i,j,k,ispec) &
* b_epsilons_trace_over_3(i,j,k,ispec))
-
+
C_kl(i,j,k,ispec) = C_kl(i,j,k,ispec) &
+ deltat * (9 * epsilons_trace_over_3(i,j,k,ispec) &
* b_epsilonw_trace_over_3(i,j,k,ispec) &
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/initialize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/initialize_simulation.f90 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/initialize_simulation.f90 2012-07-19 21:55:39 UTC (rev 20529)
@@ -348,6 +348,7 @@
use specfem_par_acoustic
use specfem_par_poroelastic
implicit none
+ ! local parameters
integer :: ncuda_devices,ncuda_devices_min,ncuda_devices_max
! GPU_MODE now defined in Par_file
@@ -372,7 +373,7 @@
endif
! initializes GPU and outputs info to files for all processes
- call prepare_cuda_device(myrank,ncuda_devices)
+ call initialize_cuda_device(myrank,ncuda_devices)
! collects min/max of local devices found for statistics
call sync_all()
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90 2012-07-19 21:55:39 UTC (rev 20529)
@@ -723,7 +723,7 @@
!=====================================================================
subroutine it_print_elapsed_time()
-
+
use specfem_par
use specfem_par_elastic
use specfem_par_acoustic
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90 2012-07-19 21:55:39 UTC (rev 20529)
@@ -33,7 +33,7 @@
use specfem_par_elastic
use specfem_par_poroelastic
use specfem_par_movie
- implicit none
+ implicit none
! local parameters
double precision :: tCPU
@@ -103,8 +103,8 @@
!daniel debug: time estimation
! elastic elements: time per element t_per_element = 1.40789368e-05 s
! total time = nspec * nstep * t_per_element
- endif
-
+ endif
+
end subroutine prepare_timerun
!
@@ -117,7 +117,7 @@
use specfem_par_acoustic
use specfem_par_elastic
use specfem_par_poroelastic
- use specfem_par_movie
+ use specfem_par_movie
implicit none
! flag for any movie simulation
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/save_adjoint_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/save_adjoint_kernels.f90 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/save_adjoint_kernels.f90 2012-07-19 21:55:39 UTC (rev 20529)
@@ -38,7 +38,7 @@
real(kind=CUSTOM_REAL) :: rhol,mul,kappal
real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: weights_kernel
real(kind=CUSTOM_REAL) :: rhol_s,rhol_f,rhol_bar,phil,tortl
- real(kind=CUSTOM_REAL) :: mul_s,kappal_s
+ real(kind=CUSTOM_REAL) :: kappal_s ! mul_s
real(kind=CUSTOM_REAL) :: kappal_f,etal_f
real(kind=CUSTOM_REAL) :: mul_fr,kappal_fr
real(kind=CUSTOM_REAL) :: permlxx,permlxy,permlxz,permlyz,permlyy,permlzz
Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/setup_sources_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/setup_sources_receivers.f90 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/setup_sources_receivers.f90 2012-07-19 21:55:39 UTC (rev 20529)
@@ -53,7 +53,7 @@
write(IMAIN,*)
write(IMAIN,*) 'found a total of ',nrec_tot_found,' receivers in all the slices'
if(NSOURCES > 1) write(IMAIN,*) 'Using ',NSOURCES,' point sources'
- write(IMAIN,*)
+ write(IMAIN,*)
endif
end subroutine setup_sources_receivers
Modified: seismo/3D/SPECFEM3D/trunk/utils/create_specfem3D_gpu_cuda_method_stubs.pl
===================================================================
--- seismo/3D/SPECFEM3D/trunk/utils/create_specfem3D_gpu_cuda_method_stubs.pl 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/utils/create_specfem3D_gpu_cuda_method_stubs.pl 2012-07-19 21:55:39 UTC (rev 20529)
@@ -107,7 +107,7 @@
# function declaration
if($line =~ /{/){
# function declaration ends
- if( $line =~ /PREPARE_CUDA_DEVICE/ ){
+ if( $line =~ /INITIALIZE_CUDA_DEVICE/ ){
# adds warning
print IOUT "$line \n$warning\} \n\n";
}else{
Modified: seismo/3D/SPECFEM3D/trunk/utils/update_headers_change_word_f90.pl
===================================================================
--- seismo/3D/SPECFEM3D/trunk/utils/update_headers_change_word_f90.pl 2012-07-18 22:19:20 UTC (rev 20528)
+++ seismo/3D/SPECFEM3D/trunk/utils/update_headers_change_word_f90.pl 2012-07-19 21:55:39 UTC (rev 20529)
@@ -16,40 +16,40 @@
# in the code (which is dangerous, but really easier to program...)
#
- @objects = `ls ../src/*/*.f90 ../src/*/*.F90 ../src/*/*.c ../src/*/*.cu ../src/*/*.h.in ../src/*/*.h`;
+ at objects = `ls ../src/*/*.f90 ../src/*/*.F90 ../src/*/*.c ../src/*/*.cu ../src/*/*.h.in ../src/*/*.h`;
- foreach $name (@objects) {
- chop $name;
-# change tabs to white spaces
- system("expand -2 < $name > _____tutu01_____");
- $f90name = $name;
- print STDOUT "Changing word in file $name ...\n";
+foreach $name (@objects) {
+ chop $name;
+ # change tabs to white spaces
+ system("expand -2 < $name > _____tutu01_____");
+ $f90name = $name;
+ print STDOUT "Changing word in file $name ...\n";
- open(FILEF77,"<_____tutu01_____");
- open(FILEF90,">$f90name");
+ open(FILEF77,"<_____tutu01_____");
+ open(FILEF90,">$f90name");
-# open the source file
- while($line = <FILEF77>) {
- chop $line;
+ # open the source file
+ while($line = <FILEF77>) {
+ chop $line;
-# suppress trailing white spaces and carriage return
- $line =~ s/\s*$//;
+ # suppress trailing white spaces and carriage return
+ $line =~ s/\s*$//;
-# change the version number and copyright information
- $line =~ s#! April 2011#! July 2012#og;
- $line =~ s#! Princeton University, USA and University of Pau / CNRS / INRIA#! Princeton University, USA and CNRS / INRIA / University of Pau#og;
- $line =~ s#! \(c\) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA#! \(c\) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau#og;
-# $line =~ s#rmass_sigma#rmass_time_integral_of_sigma#og;
+ # change the version number and copyright information
+ #$line =~ s#! April 2011#! July 2012#og;
+ #$line =~ s#! Princeton University, USA and University of Pau / CNRS / INRIA#! Princeton University, USA and CNRS / INRIA / University of Pau#og;
+ #$line =~ s#! \(c\) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA#! \(c\) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau#og;
+ # $line =~ s#rmass_sigma#rmass_time_integral_of_sigma#og;
-# write the modified line to the output file
- print FILEF90 "$line\n";
+ # write the modified line to the output file
+ print FILEF90 "$line\n";
}
- close(FILEF77);
- close(FILEF90);
+ close(FILEF77);
+ close(FILEF90);
- }
+}
- system("rm -f _____tutu01_____");
+system("rm -f _____tutu01_____");
More information about the CIG-COMMITS
mailing list