[cig-commits] r15747 - doc/geodynamics.org/benchmarks/trunk

luis at geodynamics.org luis at geodynamics.org
Mon Oct 5 15:39:22 PDT 2009


Author: luis
Date: 2009-10-05 15:39:21 -0700 (Mon, 05 Oct 2009)
New Revision: 15747

Modified:
   doc/geodynamics.org/benchmarks/trunk/.gitignore
   doc/geodynamics.org/benchmarks/trunk/upload.py
Log:
Upload only html files that have changed.

Instead of uploading all the html files every time, we keep track
of which files have changed by storing an md5 hash in a
"database", and uploading only the files whose hash has changed.

Modified: doc/geodynamics.org/benchmarks/trunk/.gitignore
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/.gitignore	2009-10-05 20:44:00 UTC (rev 15746)
+++ doc/geodynamics.org/benchmarks/trunk/.gitignore	2009-10-05 22:39:21 UTC (rev 15747)
@@ -1,4 +1,5 @@
 .*.swp
+/upload.pkl
 
 # for now, ignore these
 /utils

Modified: doc/geodynamics.org/benchmarks/trunk/upload.py
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/upload.py	2009-10-05 20:44:00 UTC (rev 15746)
+++ doc/geodynamics.org/benchmarks/trunk/upload.py	2009-10-05 22:39:21 UTC (rev 15747)
@@ -5,65 +5,70 @@
 # http://www.palovick.com/code/python/python-ssh-client.php
 # http://www.brunningonline.net/simon/blog/archives/002022.html
 # http://docs.python.org/library/os.path.html
+# http://docs.python.org/library/os.path.html#os.path.dirname
+# http://docs.python.org/library/os.path.html#os.path.basename
+# http://docs.python.org/library/os.path.html#os.path.exists
+# http://docs.python.org/library/os.path.html#os.path.getmtime
 # http://docs.python.org/library/getpass.html
+# http://docs.python.org/library/pickle.html
+# http://docs.python.org/library/hashlib.html
 #
 
+import os
 import pexpect
-import os
+import pickle, hashlib
 
-def make_login_function():
+
+class DB(object):
     """
-    Returns a function that prompts for user/password once, and
-    remembers those in subsequent calls.
+    Database object to keep track of file hashes.
     """
 
-    env = dict(user=None, password=None)
+    def __init__(self):
+        self.db = dict()
+        self.name = 'upload.pkl'
+        if os.path.exists(self.name):
+            pkl = open(self.name, 'rb')
+            self.db = pickle.load(pkl)
+            pkl.close()
+        self.save()
 
-    def login(p):
-        """
-        Act on a pexpect instance, which will forward the
-        user/password credentials to a child process.
-        """
+    def save(self):
+        pkl = open(self.name, 'wb')
+        pickle.dump(self.db, pkl)
+        pkl.close()
 
-        if env['user'] is None:
-            env['user'] = raw_input('Plone Username: ')
+    def __contains__(self, item):
+        return (item in self.db)
 
-        if env['password'] is None:
-            from getpass import getpass
-            env['password'] = getpass('Plone Password: ')
+    def __getitem__(self, key):
+        return self.db.get(key)
 
-        p.expect('Username for "Zope":*')
-        p.sendline(env['user'])
+    def __setitem__(self, key, value):
+        self.db[key] = value
 
-        p.expect('Password:*')
-        p.sendline(env['password'])
 
-    return login
+db = DB()
 
 
-login = make_login_function()
-
-
-def upload(filename, url):
+def md5sum(filename):
     """
-    Use nd to upload (PUT) given file into a target url
+    Calculate the md5sum of the file contents.
     """
 
-    # spawn a pexpect child process
-    p = pexpect.spawn("nd -p '%s' '%s'" % (filename, url))
+    fp = open(filename, 'rb')
+    contents = fp.read()
+    fp.close()
 
-    # nd will prompt for a password, so we forward the pexpect object
-    # to our special login function, which will remember your user/password
-    # after you've entered it once.
-    login(p)
+    m = hashlib.md5()
+    m.update(contents)
 
-    # show something as feedback
-    print "Uploaded %s" % url
+    return m.hexdigest()
 
 
 def locate(pattern, root=os.getcwd()):
     """
-    Return filenames that match given pattern
+    Return filenames that match given pattern.
     """
     from fnmatch import fnmatch
     for (path, dirs, files) in os.walk(root):
@@ -75,7 +80,7 @@
 
 def geturl(filename):
     """
-    Programmatically build a URL given a filename
+    Programmatically build a URL given a filename.
     """
 
     # target-url pattern
@@ -87,10 +92,11 @@
 
     # strip out the file extension
     coll = os.path.dirname(filename)
-    (base, ext) = os.path.splitext(os.path.basename(filename))
+    basename = os.path.basename(filename)
+    (base, ext) = os.path.splitext(basename)
 
-    # while we're ignoring the file extension, we only want to operate
-    # on html files (fail otherwise)
+    # while we're ignoring the file extension, we only want
+    # to operate on html files (fail otherwise)
     assert ext == '.html'
 
     # plone index pages are named 'index_html'
@@ -101,14 +107,89 @@
     return url % (coll, base)
 
 
+def make_login_function():
+    """
+    Returns a function that prompts for user/password once, and
+    remembers those in subsequent calls.
+    """
+
+    # store the user/password in a closure over our login() function
+    env = dict(user=None, password=None)
+
+    def login(p):
+        """
+        Act on a pexpect instance, which will forward the
+        user/password credentials to a child process.
+        """
+
+        if env['user'] is None:
+            env['user'] = raw_input('Plone Username: ')
+
+        if env['password'] is None:
+            from getpass import getpass
+            env['password'] = getpass('Plone Password: ')
+
+        p.expect('Username for "Zope":*')
+        p.sendline(env['user'])
+
+        p.expect('Password:*')
+        p.sendline(env['password'])
+
+    return login
+
+
+login = make_login_function()
+
+
+def has_changed(filename):
+    """
+    This function decides whether the html file in question
+    has changed since the last upload.
+    """
+
+    x = md5sum(filename)
+
+    if filename not in db:
+        db[filename] = x
+
+    y = db[filename]
+
+    # file has changed when the hashes are different
+    return (x != y)
+
+
+def upload(filename, url):
+    """
+    Use nd to upload (PUT) given file into a target url
+    """
+
+    # spawn a pexpect child process
+    p = pexpect.spawn("nd -p '%s' '%s'" % (filename, url))
+
+    # nd will prompt for a password, so we forward the pexpect object
+    # to our special login function, which will remember your user/password
+    # after you've entered it once.
+    login(p)
+
+    # give some feedback while the file uploads
+    print "Uploaded %s" % url
+
+    # now that we've uploaded the file, we can update our database
+    # with the current hash (replacing the old one)
+    db[filename] = md5sum(filename)
+
+
 def upload_all_html_files():
     """
     Upload all *.html files under the group subdirectories.
     """
     groups = ['cs', 'geodyn', 'long', 'magma', 'mc', 'seismo', 'short']
+
+    print "Scanning for modified files..."
     for group in groups:
         for filename in locate('*.html', root=group):
-            upload(filename, geturl(filename))
+            if has_changed(filename):
+                upload(filename, geturl(filename))
 
 
 def upload_from_list():
@@ -121,12 +202,15 @@
             yield line.strip()
         fp.close()
 
+    print "Scanning for modified files..."
     for filename in upload_list():
-        upload(filename, geturl(filename))
+        if has_changed(filename):
+            upload(filename, geturl(filename))
 
 
 def main():
     upload_from_list()
+    db.save()
 
 
 if __name__ == '__main__':



More information about the CIG-COMMITS mailing list