[cig-commits] commit: A bunch of fixes to move the backend to Ranger

Mercurial hg at geodynamics.org
Sun Jul 3 20:04:13 PDT 2011


changeset:   2:f7b19ac61924
user:        Walter Landry <wlandry at caltech.edu>
date:        Fri Jun 17 12:57:51 2011 -0700
files:       backend/daemon.py backend/sge_script backend/specfem_launcher.sh backend/web-portal-daemon.cfg
description:
A bunch of fixes to move the backend to Ranger


diff -r ebb567933a1a -r f7b19ac61924 backend/daemon.py
--- a/backend/daemon.py	Fri Jun 17 12:54:41 2011 -0700
+++ b/backend/daemon.py	Fri Jun 17 12:57:51 2011 -0700
@@ -13,8 +13,8 @@ class RSLUnquoted(object):
     def __init__(self, value):
         self.value = value
 
-TG_CLUSTER_SCRATCH = "/work/teragrid/tg459131"
-TG_COMMUNITY = "/projects/tg"
+TG_CLUSTER_SCRATCH = "/work/00828/tg459131"
+TG_COMMUNITY = "/scratch/projects/tg"
 
 # I'm not sure what to do about this yet.
 TACC_ENVIRONMENT = """(environment=(TG_CLUSTER_SCRATCH "%s") (TG_COMMUNITY "%s") (PATH "/opt/lsf/bin:/opt/lsf/etc:/opt/MPI/intel9/mvapich-gen2/0.9.8/bin:/opt/apps/binutils/binutils-2.17/bin:/opt/intel/compiler9.1//idb/bin:/opt/intel/compiler9.1//cc/bin:/opt/intel/compiler9.1//fc/bin:/usr/local/first:/usr/local/bin:~/bin:.:/opt/apps/pki_apps:/opt/apps/gsi-openssh-3.9/bin:/opt/lsf/bin:/opt/lsf/etc:/sbin:/usr/sbin:/usr/local/sbin:/bin:/usr/bin:/usr/local/bin:/usr/X11R6/bin:/home/teragrid/tg459131/bin:/data/TG/srb-client-3.4.1-r1/bin:/data/TG/softenv-1.6.2-r3/bin:/data/TG/tg-policy/bin:/data/TG/gx-map-0.5.3.2-r1/bin:/data/TG/tgusage-2.9-r2/bin:/usr/java/j2sdk1.4.2_12/bin:/usr/java/j2sdk1.4.2_12/jre/bin:/data/TG/globus-4.0.1-r3/sbin:/data/TG/globus-4.0.1-r3/bin:/data/TG/tgcp-1.0.0-r2/bin:/data/TG/condor-6.7.18-r1/bin:/data/TG/condor-6.7.18-r1/sbin:/data/TG/hdf4-4.2r1-r1/bin:/opt/apps/hdf5/hdf5-1.6.5/bin:/data/TG/phdf5-1.6.5/bin") (MPICH_HOME "/opt/MPI/intel9/mvapich-gen2/0.9.8"))""" % (TG_CLUSTER_SCRATCH, TG_COMMUNITY)
@@ -193,7 +193,7 @@ class ForkJobManager(JobManager):
 
     def argvForJob(self, job, extraArgs=[]):
         return ([job.resSpec['executable']] + job.resSpec['arguments'] +
-                ["--progress-url=%s" % job.progressUrl] + extraArgs)
+                extraArgs)
 
 
     def uploadOutputFilesForJob(self, job):
@@ -541,22 +541,12 @@ class SpecfemRun(Run):
             jobType = "single",
             count = 1,
             executable = self.specfemPathname,
-            arguments = ['--par-file=' + parameters,
-                         '--cmt-solution=' + event,
-                         '--stations=' + stations,
-                         '--model=' + model,
-                         "--scheduler.wait=True",
-                         "--job.name=run%05d" % self.id,
-                         "--macros.run.id=%05d" % self.id,
-                         #"--job.walltime=2*hour",
-                         "--job.stdout=stdout.txt",
-                         "--job.stderr=stderr.txt",
-                         ] + dry,
+            arguments = ["%05d" % self.id],
             )
         job.urlForInputFile = self.urlForInputFile
         job.inputFiles = [parameters, event, stations, model]
         job.outputFiles = ["specfem3dglobe.tar.gz", "output_mesher.txt", "output_solver.txt", "output_build.txt"]
-        job.monitorArgs = ["--context=monitor"]
+        job.monitorArgs = ["monitor"]
         yield job
         return
 
@@ -608,6 +598,11 @@ class PortalConnection(object):
         runs = {}
         
         while True:
+            self.clock.tick.wait()
+            self.clock.tick.wait()
+            self.clock.tick.wait()
+            self.clock.tick.wait()
+            self.clock.tick.wait()
             self.clock.tick.wait()
             
             #self.info.log("GET %s" % self.portal.runsUrl)
diff -r ebb567933a1a -r f7b19ac61924 backend/sge_script
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/sge_script	Fri Jun 17 12:57:51 2011 -0700
@@ -0,0 +1,14 @@
+#!/bin/bash  	 
+#$ -V 	# Inherit the submission environment
+#$ -cwd 	# Start job in submission directory
+#$ -N Specfem3D_Portal_RUNID 	# Job Name
+#$ -j y 	# Combine stderr and stdout
+#$ -o ../specfem_output 	# Name of the output file (eg. myMPI.oJobID)
+#$ -pe 16way NPROCS 	# Requests 16 tasks/node, NPROCS cores total
+#$ -q development 	# Queue name "normal"
+#$ -l h_rt=HOURS:MINUTES:00 	# Run time (hh:mm:ss) - 1.5 hours
+#$ -M portal at geodynamics.org	# Use email notification address
+#$ -m be 	# Email at Begin and End of job
+set -x 	# Echo commands, use "set echo" with csh
+cd $WORK/seismo/runRUNID/SPECFEM3D_GLOBE_preconfigured
+ibrun bin/xmeshfem3D && ibrun bin/xspecfem3D
diff -r ebb567933a1a -r f7b19ac61924 backend/specfem_launcher.sh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/specfem_launcher.sh	Fri Jun 17 12:57:51 2011 -0700
@@ -0,0 +1,57 @@
+#!/bin/bash
+run_id=$1
+
+# If just checking status, exit early
+
+if [ $# -ge 2 -a $2 == "monitor" ]; then
+    qstat -r | grep Specfem3D_Portal_$1 > /dev/null
+    if [ $? -eq 0 ]; then
+        exit 1
+    else
+        if [ -d $WORK/seismo/run${run_id}/SPECFEM3D_GLOBE_preconfigured -a \
+                ! -f $WORK/seismo/run${run_id}/specfem3dglobe.tar.gz ]; then
+            cd $WORK/seismo/run${run_id}/SPECFEM3D_GLOBE_preconfigured
+            cp OUTPUT_FILES/output_mesher.txt OUTPUT_FILES/output_solver.txt ..
+            tar -zcf ../specfem3dglobe.tar.gz OUTPUT_FILES/
+        fi
+        exit 0
+    fi
+fi
+
+# Submit the job
+
+cd $WORK/seismo/run${run_id}
+tar -zxf /scratch/projects/tg/CIG/SPECFEM3D_GLOBE_preconfigured.tgz
+mkdir -p SPECFEM3D_GLOBE_preconfigured/DATA
+cp event.txt SPECFEM3D_GLOBE_preconfigured/DATA/CMTSOLUTION
+cp par_file.txt SPECFEM3D_GLOBE_preconfigured/DATA/Par_file
+cp stations.txt SPECFEM3D_GLOBE_preconfigured/DATA/STATIONS
+
+cd SPECFEM3D_GLOBE_preconfigured
+make meshfem3D > ../output_build.txt
+make specfem3D >> ../output_build.txt
+
+n_chunks=`grep NCHUNKS DATA/Par_file | cut -d = -f 2`
+nproc_xi=`grep NPROC_XI DATA/Par_file | cut -d = -f 2`
+nproc_eta=`grep NPROC_ETA DATA/Par_file | cut -d = -f 2`
+n_procs=`echo $n_chunks*$nproc_xi*$nproc_eta | bc`
+
+n_xi=`grep NEX_XI DATA/Par_file | cut -d = -f 2`
+n_eta=`grep NEX_ETA DATA/Par_file | cut -d = -f 2`
+
+record_length=`grep RECORD_LENGTH_IN_MINUTES DATA/Par_file | cut -d = -f 2`
+
+setup_time=30
+factor=`echo '(192*192*192*10/(6*4*4))/400' | bc`
+
+run_time=`echo "$setup_time + $n_xi*$n_xi*$n_xi*$record_length/($n_procs*$factor)" | bc`
+
+hours=`echo "$run_time/60" | bc`
+minutes=`echo "$run_time%60/1" | bc | xargs -n 1 printf "%02d"`
+
+perl -pi -e s/RUNID/${run_id}/ sge_script
+perl -pi -e s/HOURS/${hours}/ sge_script
+perl -pi -e s/MINUTES/${minutes}/ sge_script
+perl -pi -e s/NPROCS/${n_procs}/ sge_script
+
+qsub sge_script
diff -r ebb567933a1a -r f7b19ac61924 backend/web-portal-daemon.cfg
--- a/backend/web-portal-daemon.cfg	Fri Jun 17 12:54:41 2011 -0700
+++ b/backend/web-portal-daemon.cfg	Fri Jun 17 12:57:51 2011 -0700
@@ -1,21 +1,22 @@
 
 [web-portal-daemon]
-sleep-interval = 5*second
 output-root-pathname = /home/portal/public_html/output/
-output-root-url = http://crust.geodynamics.org/~portal/output/
-mineos-pathname = /home/portal/opt/mineos/bin/mineos.py
+output-root-url = http://www.geodynamics.org/~portal/output/
+dry = True
+mineos-pathname = /projects/tg/CIG/mineos/bin/mineos.sh
 
 remote = True
-specfem-pathname = /projects/tg/CIG/SPECFEM3D_GLOBE-4.0.1/specfem3D.py
-remote-shell-command = gsissh tg-login.tacc.teragrid.org
-remote-output-root = /work/teragrid/tgXXXXXX/output
-remote-download-command = gsiscp %(source)s tg-login.tacc.teragrid.org:%(dest)s
-remote-upload-command = gsiscp tg-login.tacc.teragrid.org:%(source)s %(dest)s
+specfem-pathname = /scratch/projects/tg/CIG/specfem_launcher.sh
+remote-shell-command = ssh -l tg459131 -i /home/portal/.ssh/daemon_id_rsa ranger.tacc.utexas.edu
+remote-output-root = /work/00828/tg459131/seismo
+remote-download-command = scp -i /home/portal/.ssh/daemon_id_rsa %(source)s tg459131 at ranger.tacc.utexas.edu:%(dest)s
+remote-upload-command = scp -i /home/portal/.ssh/daemon_id_rsa tg459131 at ranger.tacc.utexas.edu:%(source)s %(dest)s
 
 journal.device = file
 
 [web-portal-daemon.portal]
-host = crust.geodynamics.org
+host = geodynamics.org
 scheme = https
+url-root = /portals/seismo/ksil4d0z5jhegmyu/
 
 



More information about the CIG-COMMITS mailing list