[cig-commits] r15750 - doc/geodynamics.org/benchmarks/trunk
luis at geodynamics.org
luis at geodynamics.org
Mon Oct 5 15:39:38 PDT 2009
Author: luis
Date: 2009-10-05 15:39:37 -0700 (Mon, 05 Oct 2009)
New Revision: 15750
Added:
doc/geodynamics.org/benchmarks/trunk/common.py
Modified:
doc/geodynamics.org/benchmarks/trunk/upload.py
Log:
Made various improvements to upload.py
Also, refactored some functionality into a separate file called
common.py, since we'll be using that in other files.
Added: doc/geodynamics.org/benchmarks/trunk/common.py
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/common.py (rev 0)
+++ doc/geodynamics.org/benchmarks/trunk/common.py 2009-10-05 22:39:37 UTC (rev 15750)
@@ -0,0 +1,53 @@
+#!/usr/bin/env python2.6
+#
+# Reference docs:
+# http://www.brunningonline.net/simon/blog/archives/002022.html
+# http://docs.python.org/library/os.path.html#os.path.dirname
+# http://docs.python.org/library/os.path.html#os.path.basename
+#
+
+import os
+
+
+groups = [
+ 'cs',
+ 'geodyn',
+ 'long',
+ 'magma',
+ 'mc',
+ 'seismo',
+ 'short',
+]
+
+
+def locate(pattern, root=os.getcwd()):
+ """
+ Return filenames that match given pattern.
+ """
+ from fnmatch import fnmatch
+ for (path, dirs, files) in os.walk(root):
+ matches = [os.path.join(path, filename)
+ for filename in files if fnmatch(filename, pattern)]
+ for filename in matches:
+ yield filename
+
+
+def readlines(filename):
+ """
+ Returns lines in a given file.
+ """
+ with open(filename, 'r') as fp:
+ for line in fp:
+ yield line.strip()
+
+
+def split_path(filename):
+ """
+ Split path into three components: (dirname, base, extension)
+ """
+ collection = os.path.dirname(filename)
+ basename = os.path.basename(filename)
+ (base, extension) = os.path.splitext(basename)
+ return (collection, base, extension)
+
+
Modified: doc/geodynamics.org/benchmarks/trunk/upload.py
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/upload.py 2009-10-05 22:39:30 UTC (rev 15749)
+++ doc/geodynamics.org/benchmarks/trunk/upload.py 2009-10-05 22:39:37 UTC (rev 15750)
@@ -1,14 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2.6
#
+# Reference docs:
# http://pexpect.sourceforge.net/pexpect.html
# http://linux.byexamples.com/archives/346/python-how-to-access-ssh-with-pexpect/
# http://www.palovick.com/code/python/python-ssh-client.php
-# http://www.brunningonline.net/simon/blog/archives/002022.html
-# http://docs.python.org/library/os.path.html
-# http://docs.python.org/library/os.path.html#os.path.dirname
-# http://docs.python.org/library/os.path.html#os.path.basename
-# http://docs.python.org/library/os.path.html#os.path.exists
-# http://docs.python.org/library/os.path.html#os.path.getmtime
# http://docs.python.org/library/getpass.html
# http://docs.python.org/library/pickle.html
# http://docs.python.org/library/hashlib.html
@@ -16,12 +11,15 @@
import os
import pexpect
-import pickle, hashlib
+import pickle
+import hashlib
+# -----------------------------------------------------------------------------
+
class DB(object):
"""
- Database object to keep track of file hashes.
+ Persistent dictionary which can reload itself on creation.
"""
def __init__(self):
@@ -48,65 +46,12 @@
self.db[key] = value
+# actual database object where we'll be storing our file hashes
db = DB()
-def md5sum(filename):
- """
- Calculate the md5sum of the file contents.
- """
+# -----------------------------------------------------------------------------
- fp = open(filename, 'rb')
- contents = fp.read()
- fp.close()
-
- m = hashlib.md5()
- m.update(contents)
-
- return m.hexdigest()
-
-
-def locate(pattern, root=os.getcwd()):
- """
- Return filenames that match given pattern.
- """
- from fnmatch import fnmatch
- for (path, dirs, files) in os.walk(root):
- matches = [os.path.join(path, filename)
- for filename in files if fnmatch(filename, pattern)]
- for filename in matches:
- yield filename
-
-
-def geturl(filename):
- """
- Programmatically build a URL given a filename.
- """
-
- # target-url pattern
- url = 'http://geodynamics.org/cig/software/benchmarks/%s/%s'
-
- # strip leading ./ if present
- if filename[0:2] == './':
- filename = filename[2:]
-
- # strip out the file extension
- coll = os.path.dirname(filename)
- basename = os.path.basename(filename)
- (base, ext) = os.path.splitext(basename)
-
- # while we're ignoring the file extension, we only want
- # to operate on html files (fail otherwise)
- assert ext == '.html'
-
- # plone index pages are named 'index_html'
- if base == 'index':
- base = 'index_html'
-
- # we're done. fill in the url pattern
- return url % (coll, base)
-
-
def make_login_function():
"""
Returns a function that prompts for user/password once, and
@@ -118,7 +63,7 @@
def login(p):
"""
- Act on a pexpect instance, which will forward the
+ Acts on a pexpect instance, which will forward the
user/password credentials to a child process.
"""
@@ -138,34 +83,85 @@
return login
+# actual login function
login = make_login_function()
+# -----------------------------------------------------------------------------
+
+def md5sum(filename):
+ """
+ Calculate the md5sum of the file contents.
+ """
+
+ m = hashlib.md5()
+
+ with open(filename, 'rb') as fp:
+ m.update(fp.read())
+
+ return m.hexdigest()
+
+
def has_changed(filename):
"""
This function decides whether the html file in question
has changed since the last upload.
"""
- x = md5sum(filename)
+ current_md5 = md5sum(filename)
if filename not in db:
- db[filename] = x
+ db[filename] = current_md5
- y = db[filename]
+ old_md5 = db[filename]
# file has changed when the hashes are different
- return (x != y)
+ return (current_md5 != old_md5)
-def upload(filename, url):
+def geturl(filename):
"""
- Use nd to upload (PUT) given file into a target url
+ Programmatically build target URL given the local .html filename.
"""
- # spawn a pexpect child process
- p = pexpect.spawn("nd -p '%s' '%s'" % (filename, url))
+ # define the target-url pattern (note the single '%s')
+ target_url = 'http://geodynamics.org/cig/software/benchmarks/%s'
+ # strip leading ./ if present
+ if filename[0:2] == './':
+ filename = filename[2:]
+
+ # break up path into (parent collection, file base, file extension)
+ from common import split_path
+ coll, base, ext = split_path(filename)
+ assert ext == '.html'
+
+ # plone index pages are named 'index_html'
+ if base == 'index':
+ base = 'index_html'
+
+ # finally, we pick the suffix (accounting for empty coll)
+ suffix = base
+ if coll:
+ suffix = '%s/%s' % (coll, base)
+
+ # once we have the suffix, apply it to the target_url pattern
+ return target_url % suffix
+
+
+# -----------------------------------------------------------------------------
+
+def upload(filename):
+ """
+ Use nd to upload (PUT) the given .html file.
+ """
+
+ # calculate the target URL for our .html file
+ target_url = geturl(filename)
+
+ # spawn a pexpect child process that will upload the desired file
+ p = pexpect.spawn("nd -p '%s' '%s'" % (filename, target_url))
+
# nd will prompt for a password, so we forward the pexpect object
# to our special login function, which will remember your user/password
# after you've entered it once.
@@ -175,39 +171,39 @@
print "Uploaded %s" % url
# now that we've uploaded the file, we can update our database
- # with the current hash (replacing the old one)
+ # with the current hash (replacing the old hash)
db[filename] = md5sum(filename)
def upload_all_html_files():
"""
- Upload all *.html files under the group subdirectories.
+ Upload all modified *.html files.
"""
- groups = ['cs', 'geodyn', 'long', 'magma', 'mc', 'seismo', 'short']
- print "Scanning for modified files..."
+ from common import groups, locate
+
+ print "Scanning for files to upload..."
for group in groups:
for filename in locate('*.html', root=group):
if has_changed(filename):
- upload(filename, geturl(filename))
+ upload(filename)
def upload_from_list():
"""
- Upload all files in 'upload.txt'
+ Upload files in 'upload.txt'.
"""
- def upload_list():
- fp = open('upload.txt', 'r')
- for line in fp.readlines():
- yield line.strip()
- fp.close()
- print "Scanning for modified files..."
- for filename in upload_list():
+ from common import readlines
+
+ print "Scanning for files to upload..."
+ for filename in readlines('upload.txt'):
if has_changed(filename):
- upload(filename, geturl(filename))
+ upload(filename)
+# -----------------------------------------------------------------------------
+
def main():
upload_from_list()
db.save()
More information about the CIG-COMMITS
mailing list