[cig-commits] commit: A bunch of fixes to move the backend to Ranger
Mercurial
hg at geodynamics.org
Sun Jul 3 20:04:13 PDT 2011
changeset: 2:f7b19ac61924
user: Walter Landry <wlandry at caltech.edu>
date: Fri Jun 17 12:57:51 2011 -0700
files: backend/daemon.py backend/sge_script backend/specfem_launcher.sh backend/web-portal-daemon.cfg
description:
A bunch of fixes to move the backend to Ranger
diff -r ebb567933a1a -r f7b19ac61924 backend/daemon.py
--- a/backend/daemon.py Fri Jun 17 12:54:41 2011 -0700
+++ b/backend/daemon.py Fri Jun 17 12:57:51 2011 -0700
@@ -13,8 +13,8 @@ class RSLUnquoted(object):
def __init__(self, value):
self.value = value
-TG_CLUSTER_SCRATCH = "/work/teragrid/tg459131"
-TG_COMMUNITY = "/projects/tg"
+TG_CLUSTER_SCRATCH = "/work/00828/tg459131"
+TG_COMMUNITY = "/scratch/projects/tg"
# I'm not sure what to do about this yet.
TACC_ENVIRONMENT = """(environment=(TG_CLUSTER_SCRATCH "%s") (TG_COMMUNITY "%s") (PATH "/opt/lsf/bin:/opt/lsf/etc:/opt/MPI/intel9/mvapich-gen2/0.9.8/bin:/opt/apps/binutils/binutils-2.17/bin:/opt/intel/compiler9.1//idb/bin:/opt/intel/compiler9.1//cc/bin:/opt/intel/compiler9.1//fc/bin:/usr/local/first:/usr/local/bin:~/bin:.:/opt/apps/pki_apps:/opt/apps/gsi-openssh-3.9/bin:/opt/lsf/bin:/opt/lsf/etc:/sbin:/usr/sbin:/usr/local/sbin:/bin:/usr/bin:/usr/local/bin:/usr/X11R6/bin:/home/teragrid/tg459131/bin:/data/TG/srb-client-3.4.1-r1/bin:/data/TG/softenv-1.6.2-r3/bin:/data/TG/tg-policy/bin:/data/TG/gx-map-0.5.3.2-r1/bin:/data/TG/tgusage-2.9-r2/bin:/usr/java/j2sdk1.4.2_12/bin:/usr/java/j2sdk1.4.2_12/jre/bin:/data/TG/globus-4.0.1-r3/sbin:/data/TG/globus-4.0.1-r3/bin:/data/TG/tgcp-1.0.0-r2/bin:/data/TG/condor-6.7.18-r1/bin:/data/TG/condor-6.7.18-r1/sbin:/data/TG/hdf4-4.2r1-r1/bin:/opt/apps/hdf5/hdf5-1.6.5/bin:/data/TG/phdf5-1.6.5/bin") (MPICH_HOME "/opt/MPI/intel9/mvapich-gen2/0.9.8"))""" % (TG_CLUSTER_SCRATCH, TG_COMMUNITY)
@@ -193,7 +193,7 @@ class ForkJobManager(JobManager):
def argvForJob(self, job, extraArgs=[]):
return ([job.resSpec['executable']] + job.resSpec['arguments'] +
- ["--progress-url=%s" % job.progressUrl] + extraArgs)
+ extraArgs)
def uploadOutputFilesForJob(self, job):
@@ -541,22 +541,12 @@ class SpecfemRun(Run):
jobType = "single",
count = 1,
executable = self.specfemPathname,
- arguments = ['--par-file=' + parameters,
- '--cmt-solution=' + event,
- '--stations=' + stations,
- '--model=' + model,
- "--scheduler.wait=True",
- "--job.name=run%05d" % self.id,
- "--macros.run.id=%05d" % self.id,
- #"--job.walltime=2*hour",
- "--job.stdout=stdout.txt",
- "--job.stderr=stderr.txt",
- ] + dry,
+ arguments = ["%05d" % self.id],
)
job.urlForInputFile = self.urlForInputFile
job.inputFiles = [parameters, event, stations, model]
job.outputFiles = ["specfem3dglobe.tar.gz", "output_mesher.txt", "output_solver.txt", "output_build.txt"]
- job.monitorArgs = ["--context=monitor"]
+ job.monitorArgs = ["monitor"]
yield job
return
@@ -608,6 +598,11 @@ class PortalConnection(object):
runs = {}
while True:
+ self.clock.tick.wait()
+ self.clock.tick.wait()
+ self.clock.tick.wait()
+ self.clock.tick.wait()
+ self.clock.tick.wait()
self.clock.tick.wait()
#self.info.log("GET %s" % self.portal.runsUrl)
diff -r ebb567933a1a -r f7b19ac61924 backend/sge_script
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/sge_script Fri Jun 17 12:57:51 2011 -0700
@@ -0,0 +1,14 @@
+#!/bin/bash
+#$ -V # Inherit the submission environment
+#$ -cwd # Start job in submission directory
+#$ -N Specfem3D_Portal_RUNID # Job Name
+#$ -j y # Combine stderr and stdout
+#$ -o ../specfem_output # Name of the output file (eg. myMPI.oJobID)
+#$ -pe 16way NPROCS # Requests 16 tasks/node, NPROCS cores total
+#$ -q development # Queue name "normal"
+#$ -l h_rt=HOURS:MINUTES:00 # Run time (hh:mm:ss) - 1.5 hours
+#$ -M portal at geodynamics.org # Use email notification address
+#$ -m be # Email at Begin and End of job
+set -x # Echo commands, use "set echo" with csh
+cd $WORK/seismo/runRUNID/SPECFEM3D_GLOBE_preconfigured
+ibrun bin/xmeshfem3D && ibrun bin/xspecfem3D
diff -r ebb567933a1a -r f7b19ac61924 backend/specfem_launcher.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/specfem_launcher.sh Fri Jun 17 12:57:51 2011 -0700
@@ -0,0 +1,57 @@
+#!/bin/bash
+run_id=$1
+
+# If just checking status, exit early
+
+if [ $# -ge 2 -a $2 == "monitor" ]; then
+ qstat -r | grep Specfem3D_Portal_$1 > /dev/null
+ if [ $? -eq 0 ]; then
+ exit 1
+ else
+ if [ -d $WORK/seismo/run${run_id}/SPECFEM3D_GLOBE_preconfigured -a \
+ ! -f $WORK/seismo/run${run_id}/specfem3dglobe.tar.gz ]; then
+ cd $WORK/seismo/run${run_id}/SPECFEM3D_GLOBE_preconfigured
+ cp OUTPUT_FILES/output_mesher.txt OUTPUT_FILES/output_solver.txt ..
+ tar -zcf ../specfem3dglobe.tar.gz OUTPUT_FILES/
+ fi
+ exit 0
+ fi
+fi
+
+# Submit the job
+
+cd $WORK/seismo/run${run_id}
+tar -zxf /scratch/projects/tg/CIG/SPECFEM3D_GLOBE_preconfigured.tgz
+mkdir -p SPECFEM3D_GLOBE_preconfigured/DATA
+cp event.txt SPECFEM3D_GLOBE_preconfigured/DATA/CMTSOLUTION
+cp par_file.txt SPECFEM3D_GLOBE_preconfigured/DATA/Par_file
+cp stations.txt SPECFEM3D_GLOBE_preconfigured/DATA/STATIONS
+
+cd SPECFEM3D_GLOBE_preconfigured
+make meshfem3D > ../output_build.txt
+make specfem3D >> ../output_build.txt
+
+n_chunks=`grep NCHUNKS DATA/Par_file | cut -d = -f 2`
+nproc_xi=`grep NPROC_XI DATA/Par_file | cut -d = -f 2`
+nproc_eta=`grep NPROC_ETA DATA/Par_file | cut -d = -f 2`
+n_procs=`echo $n_chunks*$nproc_xi*$nproc_eta | bc`
+
+n_xi=`grep NEX_XI DATA/Par_file | cut -d = -f 2`
+n_eta=`grep NEX_ETA DATA/Par_file | cut -d = -f 2`
+
+record_length=`grep RECORD_LENGTH_IN_MINUTES DATA/Par_file | cut -d = -f 2`
+
+setup_time=30
+factor=`echo '(192*192*192*10/(6*4*4))/400' | bc`
+
+run_time=`echo "$setup_time + $n_xi*$n_xi*$n_xi*$record_length/($n_procs*$factor)" | bc`
+
+hours=`echo "$run_time/60" | bc`
+minutes=`echo "$run_time%60/1" | bc | xargs -n 1 printf "%02d"`
+
+perl -pi -e s/RUNID/${run_id}/ sge_script
+perl -pi -e s/HOURS/${hours}/ sge_script
+perl -pi -e s/MINUTES/${minutes}/ sge_script
+perl -pi -e s/NPROCS/${n_procs}/ sge_script
+
+qsub sge_script
diff -r ebb567933a1a -r f7b19ac61924 backend/web-portal-daemon.cfg
--- a/backend/web-portal-daemon.cfg Fri Jun 17 12:54:41 2011 -0700
+++ b/backend/web-portal-daemon.cfg Fri Jun 17 12:57:51 2011 -0700
@@ -1,21 +1,22 @@
[web-portal-daemon]
-sleep-interval = 5*second
output-root-pathname = /home/portal/public_html/output/
-output-root-url = http://crust.geodynamics.org/~portal/output/
-mineos-pathname = /home/portal/opt/mineos/bin/mineos.py
+output-root-url = http://www.geodynamics.org/~portal/output/
+dry = True
+mineos-pathname = /projects/tg/CIG/mineos/bin/mineos.sh
remote = True
-specfem-pathname = /projects/tg/CIG/SPECFEM3D_GLOBE-4.0.1/specfem3D.py
-remote-shell-command = gsissh tg-login.tacc.teragrid.org
-remote-output-root = /work/teragrid/tgXXXXXX/output
-remote-download-command = gsiscp %(source)s tg-login.tacc.teragrid.org:%(dest)s
-remote-upload-command = gsiscp tg-login.tacc.teragrid.org:%(source)s %(dest)s
+specfem-pathname = /scratch/projects/tg/CIG/specfem_launcher.sh
+remote-shell-command = ssh -l tg459131 -i /home/portal/.ssh/daemon_id_rsa ranger.tacc.utexas.edu
+remote-output-root = /work/00828/tg459131/seismo
+remote-download-command = scp -i /home/portal/.ssh/daemon_id_rsa %(source)s tg459131 at ranger.tacc.utexas.edu:%(dest)s
+remote-upload-command = scp -i /home/portal/.ssh/daemon_id_rsa tg459131 at ranger.tacc.utexas.edu:%(source)s %(dest)s
journal.device = file
[web-portal-daemon.portal]
-host = crust.geodynamics.org
+host = geodynamics.org
scheme = https
+url-root = /portals/seismo/ksil4d0z5jhegmyu/
More information about the CIG-COMMITS
mailing list