[cig-commits] r15750 - doc/geodynamics.org/benchmarks/trunk

Mon Oct 5 15:39:38 PDT 2009

Author: luis
Date: 2009-10-05 15:39:37 -0700 (Mon, 05 Oct 2009)
New Revision: 15750

Added:
   doc/geodynamics.org/benchmarks/trunk/common.py
Modified:
   doc/geodynamics.org/benchmarks/trunk/upload.py
Log:
Made various improvements to upload.py

Also, refactored some functionality into a separate file called
common.py, since we'll be using that in other files.

Added: doc/geodynamics.org/benchmarks/trunk/common.py
===================================================================

--- doc/geodynamics.org/benchmarks/trunk/common.py	                        (rev 0)
+++ doc/geodynamics.org/benchmarks/trunk/common.py	2009-10-05 22:39:37 UTC (rev 15750)
@@ -0,0 +1,53 @@
+#!/usr/bin/env python2.6
+#
+# Reference docs:
+# http://www.brunningonline.net/simon/blog/archives/002022.html
+# http://docs.python.org/library/os.path.html#os.path.dirname
+# http://docs.python.org/library/os.path.html#os.path.basename
+#
+
+import os
+
+
+groups = [
+    'cs', 
+    'geodyn',
+    'long',
+    'magma',
+    'mc',
+    'seismo',
+    'short',
+]
+
+
+def locate(pattern, root=os.getcwd()):
+    """
+    Return filenames that match given pattern.
+    """
+    from fnmatch import fnmatch
+    for (path, dirs, files) in os.walk(root):
+        matches = [os.path.join(path, filename)
+                    for filename in files if fnmatch(filename, pattern)]
+        for filename in matches:
+            yield filename
+
+
+def readlines(filename):
+    """
+    Returns lines in a given file.
+    """
+    with open(filename, 'r') as fp:
+        for line in fp:
+            yield line.strip()
+
+
+def split_path(filename):
+    """
+    Split path into three components: (dirname, base, extension)
+    """
+    collection = os.path.dirname(filename)
+    basename = os.path.basename(filename)
+    (base, extension) = os.path.splitext(basename)
+    return (collection, base, extension)
+
+

Modified: doc/geodynamics.org/benchmarks/trunk/upload.py
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/upload.py	2009-10-05 22:39:30 UTC (rev 15749)
+++ doc/geodynamics.org/benchmarks/trunk/upload.py	2009-10-05 22:39:37 UTC (rev 15750)
@@ -1,14 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2.6
 #
+# Reference docs:
 # http://pexpect.sourceforge.net/pexpect.html
 # http://linux.byexamples.com/archives/346/python-how-to-access-ssh-with-pexpect/
 # http://www.palovick.com/code/python/python-ssh-client.php
-# http://www.brunningonline.net/simon/blog/archives/002022.html
-# http://docs.python.org/library/os.path.html
-# http://docs.python.org/library/os.path.html#os.path.dirname
-# http://docs.python.org/library/os.path.html#os.path.basename
-# http://docs.python.org/library/os.path.html#os.path.exists
-# http://docs.python.org/library/os.path.html#os.path.getmtime
 # http://docs.python.org/library/getpass.html
 # http://docs.python.org/library/pickle.html
 # http://docs.python.org/library/hashlib.html
@@ -16,12 +11,15 @@
 
 import os
 import pexpect
-import pickle, hashlib
+import pickle
+import hashlib
 
 
+# -----------------------------------------------------------------------------
+
 class DB(object):
     """
-    Database object to keep track of file hashes.
+    Persistent dictionary which can reload itself on creation.
     """
 
     def __init__(self):
@@ -48,65 +46,12 @@
         self.db[key] = value
 
 
+# actual database object where we'll be storing our file hashes
 db = DB()
 
 
-def md5sum(filename):
-    """
-    Calculate the md5sum of the file contents.
-    """
+# -----------------------------------------------------------------------------
 
-    fp = open(filename, 'rb')
-    contents = fp.read()
-    fp.close()
-
-    m = hashlib.md5()
-    m.update(contents)
-
-    return m.hexdigest()
-
-
-def locate(pattern, root=os.getcwd()):
-    """
-    Return filenames that match given pattern.
-    """
-    from fnmatch import fnmatch
-    for (path, dirs, files) in os.walk(root):
-        matches = [os.path.join(path, filename)
-                    for filename in files if fnmatch(filename, pattern)]
-        for filename in matches:
-            yield filename
-
-
-def geturl(filename):
-    """
-    Programmatically build a URL given a filename.
-    """
-
-    # target-url pattern
-    url = 'http://geodynamics.org/cig/software/benchmarks/%s/%s'
-
-    # strip leading ./ if present
-    if filename[0:2] == './':
-        filename = filename[2:]
-
-    # strip out the file extension
-    coll = os.path.dirname(filename)
-    basename = os.path.basename(filename)
-    (base, ext) = os.path.splitext(basename)
-
-    # while we're ignoring the file extension, we only want
-    # to operate on html files (fail otherwise)
-    assert ext == '.html'
-
-    # plone index pages are named 'index_html'
-    if base == 'index':
-        base = 'index_html'
-
-    # we're done. fill in the url pattern
-    return url % (coll, base)
-
-
 def make_login_function():
     """
     Returns a function that prompts for user/password once, and
@@ -118,7 +63,7 @@
 
     def login(p):
         """
-        Act on a pexpect instance, which will forward the
+        Acts on a pexpect instance, which will forward the
         user/password credentials to a child process.
         """
 
@@ -138,34 +83,85 @@
     return login
 
 
+# actual login function
 login = make_login_function()
 
 
+# -----------------------------------------------------------------------------
+
+def md5sum(filename):
+    """
+    Calculate the md5sum of the file contents.
+    """
+
+    m = hashlib.md5()
+
+    with open(filename, 'rb') as fp:
+        m.update(fp.read())
+
+    return m.hexdigest()
+
+
 def has_changed(filename):
     """
     This function decides whether the html file in question
     has changed since the last upload.
     """
 
-    x = md5sum(filename)
+    current_md5 = md5sum(filename)
 
     if filename not in db:
-        db[filename] = x
+        db[filename] = current_md5
 
-    y = db[filename]
+    old_md5 = db[filename]
 
     # file has changed when the hashes are different
-    return (x != y)
+    return (current_md5 != old_md5)
 
 
-def upload(filename, url):
+def geturl(filename):
     """
-    Use nd to upload (PUT) given file into a target url
+    Programmatically build target URL given the local .html filename.
     """
 
-    # spawn a pexpect child process
-    p = pexpect.spawn("nd -p '%s' '%s'" % (filename, url))
+    # define the target-url pattern (note the single '%s')
+    target_url = 'http://geodynamics.org/cig/software/benchmarks/%s'
 
+    # strip leading ./ if present
+    if filename[0:2] == './':
+        filename = filename[2:]
+
+    # break up path into (parent collection, file base, file extension)
+    from common import split_path
+    coll, base, ext = split_path(filename)
+    assert ext == '.html'
+
+    # plone index pages are named 'index_html'
+    if base == 'index':
+        base = 'index_html'
+
+    # finally, we pick the suffix (accounting for empty coll)
+    suffix = base
+    if coll:
+        suffix = '%s/%s' % (coll, base)
+
+    # once we have the suffix, apply it to the target_url pattern
+    return target_url % suffix
+
+
+# -----------------------------------------------------------------------------
+
+def upload(filename):
+    """
+    Use nd to upload (PUT) the given .html file.
+    """
+
+    # calculate the target URL for our .html file
+    target_url = geturl(filename)
+
+    # spawn a pexpect child process that will upload the desired file
+    p = pexpect.spawn("nd -p '%s' '%s'" % (filename, target_url))
+
     # nd will prompt for a password, so we forward the pexpect object
     # to our special login function, which will remember your user/password
     # after you've entered it once.
@@ -175,39 +171,39 @@
     print "Uploaded %s" % url
 
     # now that we've uploaded the file, we can update our database
-    # with the current hash (replacing the old one)
+    # with the current hash (replacing the old hash)
     db[filename] = md5sum(filename)
 
 
 def upload_all_html_files():
     """
-    Upload all *.html files under the group subdirectories.
+    Upload all modified *.html files.
     """
-    groups = ['cs', 'geodyn', 'long', 'magma', 'mc', 'seismo', 'short']
 
-    print "Scanning for modified files..."
+    from common import groups, locate
+
+    print "Scanning for files to upload..."
     for group in groups:
         for filename in locate('*.html', root=group):
             if has_changed(filename):
-                upload(filename, geturl(filename))
+                upload(filename)
 
 
 def upload_from_list():
     """
-    Upload all files in 'upload.txt'
+    Upload files in 'upload.txt'.
     """
-    def upload_list():
-        fp = open('upload.txt', 'r')
-        for line in fp.readlines():
-            yield line.strip()
-        fp.close()
 
-    print "Scanning for modified files..."
-    for filename in upload_list():
+    from common import readlines
+
+    print "Scanning for files to upload..."
+    for filename in readlines('upload.txt'):
         if has_changed(filename):
-            upload(filename, geturl(filename))
+            upload(filename)
 
 
+# -----------------------------------------------------------------------------
+
 def main():
     upload_from_list()
     db.save()