[cig-commits] [commit] master: Adding the memory requirement before initializing the GPU (5f28ffa)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Mon Aug 11 03:41:38 PDT 2014
Repository : https://github.com/geodynamics/relax
On branch : master
Link : https://github.com/geodynamics/relax/compare/afc5fb16f867dce2b0d364b8b96d28b885a47c09...2f330a8481a5df32477d6505a981a34b69ce9d38
>---------------------------------------------------------------
commit 5f28ffacc081f042130e11da4bd8484154cd8554
Author: sagar masuti <sagar.masuti at gmail.com>
Date: Mon Aug 11 18:26:45 2014 +0800
Adding the memory requirement before initializing the GPU
>---------------------------------------------------------------
5f28ffacc081f042130e11da4bd8484154cd8554
INSTALL | 22 ++++++++++--------
cuda.py | 48 +++++++++++++++++++--------------------
src/cu_elastic.cu | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
wscript | 14 ++++++------
4 files changed, 108 insertions(+), 44 deletions(-)
diff --git a/INSTALL b/INSTALL
index 91a31ec..6007eb3 100644
--- a/INSTALL
+++ b/INSTALL
@@ -45,16 +45,6 @@ configured Relax with the command
CPPFLAGS="-I/usr/include/netcdf-4" LDFLAGS="-L/usr/lib64" ./waf configure --proj-dir=/home/walter/src/relax/relax-bin --gmt-dir=/home/walter/src/relax/relax-bin/ --mkl-incdir=/opt/intel/composerxe-2011.1.107/mkl/include/ --mkl-libdir=/opt/intel/composerxe-2011.1.107/mkl/lib/intel64/ --check-c-compiler=icc --check-fortran-compiler=ifort
-To build Relax for NVIDIA GPUs on Linux, we need to provide certain command-line options
-
- ./waf configure --cuda-dir=/usr/local/cuda --use-fftw
- ./waf
-
-If you get an error 'CUDA Runtime API error 38: no CUDA-capable device is detected', you might need to explicitly enable the GPUs with the following command
-
- sudo nvidia-xconfig --enable-all-gpus
-
-
Depending on how your libraries were built, you may have to set
LD_LIBRARY_PATH (Linux) or DYLD_LIBRARY_PATH (Mac) so that Relax can
find them. For example, on Mac OS X using Fink, you may have to run the command
@@ -70,6 +60,18 @@ CPPFLAGS="-I/opt/GMT-4.5.7/netcdf/include" LDFLAGS="-L/usr/lib64 -lgdal" ./waf c
./waf build
+---------
+GPU
+---------
+To build Relax for NVIDIA GPUs on Linux, we need to provide certain command-line options
+
+ ./waf configure --use-cuda --cuda-dir=/usr/local/cuda
+ ./waf
+
+If you get an error 'CUDA Runtime API error 38: no CUDA-capable device is detected', you might need to explicitly enable the GPUs with the following command
+
+ sudo nvidia-xconfig --enable-all-gpus
+
-------
Doxygen
-------
diff --git a/cuda.py b/cuda.py
index edf6cc3..d0b694e 100644
--- a/cuda.py
+++ b/cuda.py
@@ -11,40 +11,40 @@ from waflib.Tools import ccroot, c_preproc
from waflib.Configure import conf
class cuda(Task.Task):
- run_str = '${NVCC} ${CUDAFLAGS} ${CXXFLAGS} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${CPPPATH_ST:INCPATHS} ${DEFINES_ST:DEFINES} ${CXX_SRC_F}${SRC} ${CXX_TGT_F}${TGT}'
- color = 'GREEN'
- ext_in = ['.h']
- vars = ['CCDEPS']
- scan = c_preproc.scan
- shell = False
+ run_str = '${NVCC} ${CUDAFLAGS} ${CXXFLAGS} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${CPPPATH_ST:INCPATHS} ${DEFINES_ST:DEFINES} ${CXX_SRC_F}${SRC} ${CXX_TGT_F}${TGT}'
+ color = 'GREEN'
+ ext_in = ['.h']
+ vars = ['CCDEPS']
+ scan = c_preproc.scan
+ shell = False
@extension('.cu', '.cuda')
def c_hook(self, node):
- return self.create_compiled_task('cuda', node)
+ return self.create_compiled_task('cuda', node)
def configure(conf):
- conf.find_program('nvcc', var='NVCC')
- conf.options.cuda_dir=conf.root.find_node(conf.env.NVCC).parent.parent.abspath()
- conf.find_cuda_libs()
+ conf.find_program('nvcc', var='NVCC')
+ conf.options.cuda_dir=conf.root.find_node(conf.env.NVCC).parent.parent.abspath()
+ conf.find_cuda_libs()
@conf
def find_cuda_libs(self):
- if not self.env.NVCC:
- self.fatal('check for nvcc first')
+ if not self.env.NVCC:
+ self.fatal('check for nvcc first')
- d = self.root.find_node(self.env.NVCC).parent.parent
+ d = self.root.find_node(self.env.NVCC).parent.parent
- node = d.find_node('include')
- _includes = node and node.abspath() or ''
+ node = d.find_node('include')
+ _includes = node and node.abspath() or ''
- _libpath=[]
- for x in ('lib64', 'lib'):
- try:
- _libpath.append(d.find_node(x).abspath())
- except:
- pass
- # this should not raise any error
+ _libpath=[]
+ for x in ('lib64', 'lib'):
+ try:
+ _libpath.append(d.find_node(x).abspath())
+ except:
+ pass
+ # this should not raise any error
# self.check_cxx(header='cuda.h', lib='cuda', libpath=_libpath, includes=_includes)
- self.check_cxx(header='cuda.h', lib='cudart', libpath=_libpath, includes=_includes)
- self.check_cxx(header='cufft.h', lib='cufft', libpath=_libpath, includes=_includes)
+ self.check_cxx(header='cuda.h', lib='cudart', libpath=_libpath, includes=_includes)
+ self.check_cxx(header='cufft.h', lib='cufft', libpath=_libpath, includes=_includes)
diff --git a/src/cu_elastic.cu b/src/cu_elastic.cu
index cc1b0fe..469b1b2 100644
--- a/src/cu_elastic.cu
+++ b/src/cu_elastic.cu
@@ -465,6 +465,9 @@ __host__ __device__ void cuMulSub (double dTaun,
__host__ __device__ double mycuSinh (double dX) ;
+int checkMemRequirement(int iSx1,
+ int iSx2,
+ int iSx3) ;
/* --------------------------------------------------------------------------------------------- */
@@ -819,6 +822,15 @@ extern "C" void cuinit_ (int iSx1,
ihSx2 = iSx2 ;
ihSx3 = iSx3 ;
+ if (-1 == checkMemRequirement(iSx1,iSx2,iSx3))
+ {
+ printf ("********************** ERROR ******************\n") ;
+ printf ("Memory required to run on GPU is insufficient\n");
+ printf ("Either try reducing the grid size or run on CPU only\n") ;
+ printf ("********************** ERROR ******************\n\n") ;
+ return ;
+ }
+
iSize = sizeof (float) * (iSx1 + 2) * iSx2 * iSx3 ;
cuError = cudaMalloc((void**)&gpV1, iSize) ;
@@ -906,8 +918,7 @@ extern "C" void cuinit_ (int iSx1,
printf ("cuinit : Failed to allocate memory 11\n") ;
goto CUINIT_FAILURE ;
}
- iSize = sizeof (ST_TENSOR) * iSx1 * iSx2 * (iSx3/2) ;
- cuError = cudaMalloc ((void **)&pstMoment, iSize) ;
+ cuError = cudaMalloc ((void **)&pstMoment, iSize2) ;
if (cudaSuccess != cuError)
{
printf ("cuinit : Failed to allocate memory 12\n") ;
@@ -943,7 +954,8 @@ extern "C" void cuinit_ (int iSx1,
#ifdef PRINT_DEBUG_INFO
cudaMemGetInfo(&iFreeMem, &iTotalMem);
- printf ("cuinit: Memory available after allocation is : %lu\n", iFreeMem);
+ printf ("cuinit: Memory available after allocation is : %lu MB\n", iFreeMem/(1024*1024));
+ printf ("cuinit: Total memory available is : %lu MB\n",iTotalMem/(1024*1024));
printf ("cuinit: exited with no errors\n") ;
#endif
@@ -1000,6 +1012,10 @@ extern "C" void custressupdatewrapper_ (E_TYPE eType,
pstHostSig, fData1, fData2, fData3, gpV1, gpV2, gpV3) ;
}
break ;
+ case E_INVALID_TYPE:
+ {
+ printf ("custressupdatewrapper_: Invalid input\n") ;
+ }
}
}
@@ -1598,6 +1614,11 @@ extern "C" void cutensormemset_ (E_TENSOR_TYPE eType)
cuError = cudaMemset (pstTau, 0, sizeof (ST_TENSOR) * ihSx1 * ihSx2 * ihSx3/2) ;
}
break ;
+ case E_INVALID_TENSOR_TYPE:
+ {
+ printf ("Invalid input\n") ;
+ }
+ break ;
}
if (cudaSuccess != cuError)
{
@@ -2000,6 +2021,47 @@ void cuFreeCudaMemory()
}
+int checkMemRequirement(int iSx1,
+ int iSx2,
+ int iSx3)
+{
+ int liReq = 0 ;
+ long int iTemp = 0 ;
+ size_t iTotalMem = 0 ;
+ size_t iFreeMem = 0 ;
+
+ /* Ui's, Vi's and fft's */
+ iTemp=((iSx1+2)*iSx2*iSx3*sizeof(float)*8)/(1024*1024) ;
+ liReq+=iTemp ;
+
+ /* sig, moment and tau */
+ iTemp=((iSx1*iSx2*iSx3/2)*sizeof(ST_TENSOR)*3)/(1024*1024) ;
+ liReq+=iTemp ;
+
+ /* Ti's */
+ iTemp=((iSx1+2)*iSx2*sizeof(float)*3)/(1024*1024) ;
+ liReq+=iTemp ;
+
+ iTemp=(iSx3/2)*sizeof(ST_LAYER)/(1024*1024) ;
+ liReq+=iTemp ;
+
+ /* dMinArray */
+ iTemp=((iSx1+2)*iSx2*iSx3*sizeof(float))/(1024*1024) ;
+
+ cudaMemGetInfo(&iFreeMem, &iTotalMem);
+ iTotalMem/=(1024*1024) ;
+
+ if ((liReq+iTemp) > iTotalMem)
+ {
+ printf ("\nTotal memory required is : %d MB\n", (int)(liReq+iTemp)) ;
+ printf ("Total available is is : %lu MB \n", iTotalMem) ;
+ return -1 ;
+ }
+
+ return 0;
+}
+
+
/* ------------------------------------------- utility end -------------------------------------- */
diff --git a/wscript b/wscript
index 5d8c83d..551f13a 100644
--- a/wscript
+++ b/wscript
@@ -124,13 +124,13 @@ def configure(cnf):
if cnf.options.use_cuda:
cnf.env.CUDA=cnf.options.use_cuda
cnf.load('cuda',tooldir='.')
- if not cnf.env.CUDAFLAGS:
- cnf.env.CUDAFLAGS = ['-gencode','arch=compute_35,code=sm_35']
-# cnf.env.CUDAFLAGS += ['-Xptxas', '-dlcm=cg']
-# cnf.env.CUDAFLAGS += ['--maxrregcount=32']
-# cnf.env.CUDAFLAGS = ['-gencode','arch=compute_30,code=sm_30']
-# cnf.env.CUDAFLAGS = ['-gencode','arch=compute_20,code=sm_20']
- cnf.env.CXXFLAGS=['-m64']
+ if not cnf.env.CUDAFLAGS:
+ cnf.env.CUDAFLAGS = ['-gencode','arch=compute_35,code=sm_35']
+# cnf.env.CUDAFLAGS += ['-Xptxas', '-dlcm=cg']
+# cnf.env.CUDAFLAGS += ['--maxrregcount=32']
+# cnf.env.CUDAFLAGS = ['-gencode','arch=compute_30,code=sm_30']
+# cnf.env.CUDAFLAGS = ['-gencode','arch=compute_20,code=sm_20']
+ cnf.env.CXXFLAGS=['-m64']
if cnf.options.cuda_dir:
if not cnf.options.cuda_incdir:
cnf.options.cuda_incdir=cnf.options.cuda_dir + "/include"
More information about the CIG-COMMITS
mailing list