[cig-commits] r15747 - doc/geodynamics.org/benchmarks/trunk
luis at geodynamics.org
luis at geodynamics.org
Mon Oct 5 15:39:22 PDT 2009
Author: luis
Date: 2009-10-05 15:39:21 -0700 (Mon, 05 Oct 2009)
New Revision: 15747
Modified:
doc/geodynamics.org/benchmarks/trunk/.gitignore
doc/geodynamics.org/benchmarks/trunk/upload.py
Log:
Upload only html files that have changed.
Instead of uploading all the html files every time, we keep track
of which files have changed by storing an md5 hash in a
"database", and uploading only the files whose hash has changed.
Modified: doc/geodynamics.org/benchmarks/trunk/.gitignore
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/.gitignore 2009-10-05 20:44:00 UTC (rev 15746)
+++ doc/geodynamics.org/benchmarks/trunk/.gitignore 2009-10-05 22:39:21 UTC (rev 15747)
@@ -1,4 +1,5 @@
.*.swp
+/upload.pkl
# for now, ignore these
/utils
Modified: doc/geodynamics.org/benchmarks/trunk/upload.py
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/upload.py 2009-10-05 20:44:00 UTC (rev 15746)
+++ doc/geodynamics.org/benchmarks/trunk/upload.py 2009-10-05 22:39:21 UTC (rev 15747)
@@ -5,65 +5,70 @@
# http://www.palovick.com/code/python/python-ssh-client.php
# http://www.brunningonline.net/simon/blog/archives/002022.html
# http://docs.python.org/library/os.path.html
+# http://docs.python.org/library/os.path.html#os.path.dirname
+# http://docs.python.org/library/os.path.html#os.path.basename
+# http://docs.python.org/library/os.path.html#os.path.exists
+# http://docs.python.org/library/os.path.html#os.path.getmtime
# http://docs.python.org/library/getpass.html
+# http://docs.python.org/library/pickle.html
+# http://docs.python.org/library/hashlib.html
#
+import os
import pexpect
-import os
+import pickle, hashlib
-def make_login_function():
+
+class DB(object):
"""
- Returns a function that prompts for user/password once, and
- remembers those in subsequent calls.
+ Database object to keep track of file hashes.
"""
- env = dict(user=None, password=None)
+ def __init__(self):
+ self.db = dict()
+ self.name = 'upload.pkl'
+ if os.path.exists(self.name):
+ pkl = open(self.name, 'rb')
+ self.db = pickle.load(pkl)
+ pkl.close()
+ self.save()
- def login(p):
- """
- Act on a pexpect instance, which will forward the
- user/password credentials to a child process.
- """
+ def save(self):
+ pkl = open(self.name, 'wb')
+ pickle.dump(self.db, pkl)
+ pkl.close()
- if env['user'] is None:
- env['user'] = raw_input('Plone Username: ')
+ def __contains__(self, item):
+ return (item in self.db)
- if env['password'] is None:
- from getpass import getpass
- env['password'] = getpass('Plone Password: ')
+ def __getitem__(self, key):
+ return self.db.get(key)
- p.expect('Username for "Zope":*')
- p.sendline(env['user'])
+ def __setitem__(self, key, value):
+ self.db[key] = value
- p.expect('Password:*')
- p.sendline(env['password'])
- return login
+db = DB()
-login = make_login_function()
-
-
-def upload(filename, url):
+def md5sum(filename):
"""
- Use nd to upload (PUT) given file into a target url
+ Calculate the md5sum of the file contents.
"""
- # spawn a pexpect child process
- p = pexpect.spawn("nd -p '%s' '%s'" % (filename, url))
+ fp = open(filename, 'rb')
+ contents = fp.read()
+ fp.close()
- # nd will prompt for a password, so we forward the pexpect object
- # to our special login function, which will remember your user/password
- # after you've entered it once.
- login(p)
+ m = hashlib.md5()
+ m.update(contents)
- # show something as feedback
- print "Uploaded %s" % url
+ return m.hexdigest()
def locate(pattern, root=os.getcwd()):
"""
- Return filenames that match given pattern
+ Return filenames that match given pattern.
"""
from fnmatch import fnmatch
for (path, dirs, files) in os.walk(root):
@@ -75,7 +80,7 @@
def geturl(filename):
"""
- Programmatically build a URL given a filename
+ Programmatically build a URL given a filename.
"""
# target-url pattern
@@ -87,10 +92,11 @@
# strip out the file extension
coll = os.path.dirname(filename)
- (base, ext) = os.path.splitext(os.path.basename(filename))
+ basename = os.path.basename(filename)
+ (base, ext) = os.path.splitext(basename)
- # while we're ignoring the file extension, we only want to operate
- # on html files (fail otherwise)
+ # while we're ignoring the file extension, we only want
+ # to operate on html files (fail otherwise)
assert ext == '.html'
# plone index pages are named 'index_html'
@@ -101,14 +107,89 @@
return url % (coll, base)
+def make_login_function():
+ """
+ Returns a function that prompts for user/password once, and
+ remembers those in subsequent calls.
+ """
+
+ # store the user/password in a closure over our login() function
+ env = dict(user=None, password=None)
+
+ def login(p):
+ """
+ Act on a pexpect instance, which will forward the
+ user/password credentials to a child process.
+ """
+
+ if env['user'] is None:
+ env['user'] = raw_input('Plone Username: ')
+
+ if env['password'] is None:
+ from getpass import getpass
+ env['password'] = getpass('Plone Password: ')
+
+ p.expect('Username for "Zope":*')
+ p.sendline(env['user'])
+
+ p.expect('Password:*')
+ p.sendline(env['password'])
+
+ return login
+
+
+login = make_login_function()
+
+
+def has_changed(filename):
+ """
+ This function decides whether the html file in question
+ has changed since the last upload.
+ """
+
+ x = md5sum(filename)
+
+ if filename not in db:
+ db[filename] = x
+
+ y = db[filename]
+
+ # file has changed when the hashes are different
+ return (x != y)
+
+
+def upload(filename, url):
+ """
+ Use nd to upload (PUT) given file into a target url
+ """
+
+ # spawn a pexpect child process
+ p = pexpect.spawn("nd -p '%s' '%s'" % (filename, url))
+
+ # nd will prompt for a password, so we forward the pexpect object
+ # to our special login function, which will remember your user/password
+ # after you've entered it once.
+ login(p)
+
+ # give some feedback while the file uploads
+ print "Uploaded %s" % url
+
+ # now that we've uploaded the file, we can update our database
+ # with the current hash (replacing the old one)
+ db[filename] = md5sum(filename)
+
+
def upload_all_html_files():
"""
Upload all *.html files under the group subdirectories.
"""
groups = ['cs', 'geodyn', 'long', 'magma', 'mc', 'seismo', 'short']
+
+ print "Scanning for modified files..."
for group in groups:
for filename in locate('*.html', root=group):
- upload(filename, geturl(filename))
+ if has_changed(filename):
+ upload(filename, geturl(filename))
def upload_from_list():
@@ -121,12 +202,15 @@
yield line.strip()
fp.close()
+ print "Scanning for modified files..."
for filename in upload_list():
- upload(filename, geturl(filename))
+ if has_changed(filename):
+ upload(filename, geturl(filename))
def main():
upload_from_list()
+ db.save()
if __name__ == '__main__':
More information about the CIG-COMMITS
mailing list