[cig-commits] r15980 - doc/geodynamics.org/benchmarks/trunk

Mon Nov 16 14:49:30 PST 2009

Author: luis
Date: 2009-11-16 14:49:28 -0800 (Mon, 16 Nov 2009)
New Revision: 15980

Modified:
   doc/geodynamics.org/benchmarks/trunk/clean.sh
   doc/geodynamics.org/benchmarks/trunk/common.py
   doc/geodynamics.org/benchmarks/trunk/copy.sh
   doc/geodynamics.org/benchmarks/trunk/diff.sh
   doc/geodynamics.org/benchmarks/trunk/download.py
   doc/geodynamics.org/benchmarks/trunk/generate.py
   doc/geodynamics.org/benchmarks/trunk/publish.py
Log:
Allow mix of .rst and .html files in sources.txt

Some formatting options are not possible to achieve
using the .rst format alone. Since we don't really want
to modify any plone content filters, we'll just
modify the synchronization scripts so that we can
also publish/download .html files

Modified: doc/geodynamics.org/benchmarks/trunk/clean.sh
===================================================================

--- doc/geodynamics.org/benchmarks/trunk/clean.sh	2009-11-16 22:49:21 UTC (rev 15979)
+++ doc/geodynamics.org/benchmarks/trunk/clean.sh	2009-11-16 22:49:28 UTC (rev 15980)
@@ -1,9 +1,6 @@
 #!/bin/bash
-echo "Cleaning up html files..."
-rm -f -v ./index.html
-for d in cs geodyn long magma mc seismo short; do
-    for f in $(find ./$d -name '*.html'); do
-        rm -f -v $f
-    done
+echo "Cleaning up generated html files..."
+for rst in $(grep .rst sources.txt); do
+    html=${rst%.rst}.html
+    rm -f -v ${html}
 done
-rm -f -v ./upload.pkl

Modified: doc/geodynamics.org/benchmarks/trunk/common.py
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/common.py	2009-11-16 22:49:21 UTC (rev 15979)
+++ doc/geodynamics.org/benchmarks/trunk/common.py	2009-11-16 22:49:28 UTC (rev 15980)
@@ -10,6 +10,10 @@
 import re
 import hashlib
 
+
+# -----------------------------------------------------------------------------
+# Some useful constants
+
 # Site URLs
 siteroot = "http://geodynamics.org/cig"
 softroot = siteroot + "/software"
@@ -22,9 +26,9 @@
 webdav_benchroot = webdav_softroot + "/benchmarks"
 webdav_testroot = webdav_benchroot + "/test"
 
-# Working-group names
+# Top-level directories
 groups = [
-    'cs', 
+    'cigma',
     'geodyn',
     'long',
     'magma',
@@ -33,10 +37,15 @@
     'short',
 ]
 
-# Regular expression for parsing site content
-site_content_pattern = re.compile(r"(?P<header>.*)Content-Type: (?P<content_type>.*)\n\n(?P<content>.*)", re.DOTALL)
+# Regular expression for parsing site pages (extracts header & content)
+site_content_pattern = re.compile(
+    r"(?P<header>.*)Content-Type: (?P<content_type>.*)\n\n(?P<content>.*)",
+    re.DOTALL)
 
 
+# -----------------------------------------------------------------------------
+# Some generic file utility functions
+
 def locate(pattern, root=os.getcwd()):
     """
     Return filenames that match given pattern.
@@ -84,71 +93,129 @@
     return (collection, base, extension)
 
 
-def localname(url, ext='.rst'):
+def md5(s):
     """
-    Given a relative url, calculate the local filename.
+    Given a string, calculate its MD5 hash digest.
     """
-    return "%s%s" % (url.replace("/index_html", "/index"), ext)
+    m = hashlib.md5()
+    m.update(s)
+    return m.hexdigest()
 
+def md5sum(filename):
+    """
+    Get MD5 sum of the filename contents.
+    """
+    return md5(readfile(filename))
 
-def sitename(filename, prefix=benchroot):
+
+# -----------------------------------------------------------------------------
+# More utility functions
+
+def source_files():
     """
-    Given a source filename, deduce the corresponding relative url
-    that will be used on the site. If a prefix is given, the url
-    will be made absolute by prepending that prefix.
+    Iterate through all lines in the file ``sources.txt``
+    We assume every line is significant and corresponds to
+    an existing filename.
     """
+    for line in readlines("sources.txt"):
+        src = line.strip()
+        yield src
 
-    coll, base, ext = split_path(os.path.normpath(filename))
 
+def get_site_path(src):
+    """
+    Calculate the site path that corresponds to a given
+    source filename. This path is always based on the
+    mounted WebDAV directory at ``webdav_benchroot``.
+    """
+
+    # split the path information
+    coll, base, ext = split_path(os.path.normpath(src))
+
     # plone index pages are named 'index_html'
     if base == "index":
         base = "index_html"
 
-    # start with base as our
+    # start with base
     url = base
 
     # prepend coll if nonempty
     if coll:
         url = "%s/%s" % (coll, url)
 
-    # prepend prefix if nonempty
-    if prefix:
-        url = "%s/%s" % (prefix, url)
+    # prepend webdav root for benchmarking section
+    url = "%s/%s" % (webdav_benchroot, url)
 
     return url
 
 
-def headername(filename):
-    root, ext = os.path.splitext(filename)
+def get_cached(filename):
+    """
+    Given a filename, calculate the location of latest cached version.
+    This cache is synchronized directly from the website source pages
+    by the ``download.py`` script.
+    """
+    return os.path.normpath(os.path.join('./_latest', filename))
+
+
+def get_header_filename(src):
+    """
+    Given a source file, get name of its corresponding header file.
+    """
+    root, ext = os.path.splitext(src)
     return root + '.header'
 
 
-def latestname(filename):
-    return os.path.normpath(os.path.join('./_latest', filename))
+def get_cached_header_filename(src):
+    return get_cached(get_header_filename(src))
 
 
-def split_content(filename):
-    all = readfile(filename)
-    match = re.match(site_content_pattern, all)
-    meta, content = None, None
-    if match:
-        meta = match.group('header')
-        meta += 'Content-Type: %s\n\n' % match.group('content_type')
-        content = match.group('content')
-    return meta, content
+def get_cached_content_filename(src):
+    return get_cached(src)
 
 
-def md5sum(s):
+def get_cached_page_filename(src):
+    return get_cached(src) + '.orig'
+
+
+def get_header(src):
     """
-    Calculate the md5sum of a string
+    Return contents of header file.
     """
-    m = hashlib.md5()
-    m.update(s)
-    return m.hexdigest()
+    return readfile(get_header_filename(src))
 
-def local_md5sum(filename):
-    return md5sum(readfile(filename))
 
-def latest_md5sum(filename):
-    return md5sum(readfile(latestname(filename)))
+def get_content(src):
+    """
+    Return contents of source file.
+    """
+    return readfile(src)
 
+
+def get_page(src):
+    """
+    Return the exact contents of the page we will be publishing,
+    which we obtain by concatenating the file header and contents.
+    Note, however that we must also need to convert the line endings
+    in the content string, otherwise Plone will complain.
+    """
+    header = get_header(src)
+    content = get_content(src)
+    return header + content.replace('\n', '\r\n')
+
+
+
+def split_page(filename):
+    """
+    Read page from filename into memory and split the string into
+    a (header, content) tuple.
+    """
+    all = readfile(filename)
+    match = re.match(site_content_pattern, all)
+    header, content = None, None
+    if match:
+        header  = match.group('header')
+        header += 'Content-Type: %s\n\n' % match.group('content_type')
+        content = match.group('content')
+    return (header, content)
+

Modified: doc/geodynamics.org/benchmarks/trunk/copy.sh
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/copy.sh	2009-11-16 22:49:21 UTC (rev 15979)
+++ doc/geodynamics.org/benchmarks/trunk/copy.sh	2009-11-16 22:49:28 UTC (rev 15980)
@@ -1,8 +1,8 @@
 #!/bin/bash
 for file in `cat sources.txt`; do
-    rst=${file##./}
-    header=${rst%%.rst}.header
-    echo Copying $rst
-    cp -f ./_latest/${rst} ./${rst}
+    f=${file##./}
+    header=${f%.*}.header
+    echo Copying $f
+    cp -f ./_latest/${f} ./${f}
     cp -f ./_latest/${header} ./${header}
 done

Modified: doc/geodynamics.org/benchmarks/trunk/diff.sh
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/diff.sh	2009-11-16 22:49:21 UTC (rev 15979)
+++ doc/geodynamics.org/benchmarks/trunk/diff.sh	2009-11-16 22:49:28 UTC (rev 15980)
@@ -5,7 +5,7 @@
 diffcmd="colordiff $*"
 for file in `cat sources.txt`; do
     f=${file##./}
-    header=${f%%.rst}.header
+    header=${f%.*}.header
     ${diffcmd} -u ${header} ./_latest/${header}
     ${diffcmd} -u ${f} ./_latest/${f}
 done

Modified: doc/geodynamics.org/benchmarks/trunk/download.py
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/download.py	2009-11-16 22:49:21 UTC (rev 15979)
+++ doc/geodynamics.org/benchmarks/trunk/download.py	2009-11-16 22:49:28 UTC (rev 15980)
@@ -1,10 +1,13 @@
 #!/usr/bin/env python2.6
 
 """
-download.py - Download the source pages from geodynamics.org website.
+download.py
+===========
 
-This script copies the files listed in "sources.txt" from a mounted
-WebDAV folder, into the _latest directory.
+Download the source pages from the geodynamics.org website.
+We transfer the pages from the mounted WebDAV directory
+and save them to a local directory called ``_latest`` for
+further analysis.
 """
 
 import os, shutil
@@ -12,30 +15,32 @@
 def main():
 
     from common import (
-        webdav_benchroot,
-        readlines, writefile,
-        sitename, latestname, headername,
-        split_content,
+        source_files, writefile,
+        get_site_path, get_cached_page_filename,
+        get_cached_header_filename, get_cached_content_filename,
+        split_page,
     )
 
-    for line in readlines("sources.txt"):
-        filename = line.strip()
-        site_page = sitename(filename, prefix=webdav_benchroot)
+    for src in source_files():
 
-        latest_filename = latestname(filename)
-        latest_header_filename = headername(latest_filename)
-        tmp_filename = latest_filename + '.orig'
-        
-        print "Downloading %s" % site_page
-        shutil.copyfile(site_page, tmp_filename)
-        header, content = split_content(tmp_filename)
+        # obtain webdav path for our source page
+        site_path = get_site_path(src)
 
-        print "...writing %s" % latest_header_filename
-        writefile(latest_header_filename, header)
+        # destination files
+        cached_page_filename = get_cached_page_filename(src)
+        cached_header_filename = get_cached_header_filename(src)
+        cached_content_filename = get_cached_content_filename(src)
 
-        print "...writing %s" % latest_filename
-        writefile(latest_filename, content.replace('\r', ''))
+        print "Downloading", site_path
+        shutil.copyfile(site_path, cached_page_filename)
+        header, content = split_page(cached_page_filename)
 
+        print "...writing", cached_header_filename
+        writefile(cached_header_filename, header)
+
+        print "...writing", cached_content_filename
+        writefile(cached_content_filename, content.replace('\r', ''))
+
     return
 
 

Modified: doc/geodynamics.org/benchmarks/trunk/generate.py
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/generate.py	2009-11-16 22:49:21 UTC (rev 15979)
+++ doc/geodynamics.org/benchmarks/trunk/generate.py	2009-11-16 22:49:28 UTC (rev 15980)
@@ -19,8 +19,10 @@
 import docutils.io
 import docutils.languages
 
+from common import source_files, split_path
 
 # -----------------------------------------------------------------------------
+# The original file ``rst2html.py`` sets the locale here. Do we really need it?
 
 import locale
 locale.setlocale(locale.LC_ALL, '')
@@ -28,42 +30,53 @@
 
 # -----------------------------------------------------------------------------
 
-def getrst(filename):
+def rst_files():
     """
-    Given an .html filename, return its .rst source filename.
+    We're only interested in .rst files, so we filter out
+    any source files that do not end in ".rst"
     """
-    from common import split_path
-    coll, base, ext = split_path(filename)
+    for src in source_files():
+        if src.endswith('.rst'):
+            yield src
+
+
+def get_rst(html):
+    """
+    Construct file with .rst extension, given one with .html
+    """
+    coll, base, ext = split_path(html)
     assert ext == '.html'
-
     rst = '%s.rst' % base
     if coll:
         rst = '%s/%s' % (coll, rst)
 
     return rst
 
-def gethtml(rst_filename):
-    from common import split_path
+def get_html(rst_filename):
+    """
+    Construct file with .html extension, given one with .rst
+    """
     coll, base, ext = split_path(rst_filename)
+    assert ext == '.rst'
     html = '%s.html' % base
     if coll:
         html = '%s/%s' % (coll, html)
     return html
 
 
-def needs_update(filename):
+def needs_update(html):
     """
     This function decides whether the .html file in question
     needs to be re-generated from its .rst source.
     """
 
     # if file doesn't exist, generate it. duh.
-    if not os.path.exists(filename):
+    if not os.path.exists(html):
         return True
 
     # next, we check the file modification times
-    html_mtime = os.path.getmtime(filename)
-    rst_mtime = os.path.getmtime(getrst(filename))
+    html_mtime = os.path.getmtime(html)
+    rst_mtime = os.path.getmtime(get_rst(html))
 
     # if .rst file is more recent, we need to regenerate the .html file
     return (rst_mtime > html_mtime)
@@ -150,12 +163,12 @@
     #print pub.writer.parts['head']
 
 
-def regenerate(filename):
+def regenerate(html):
     """
     Given an .html filename, regenerate it from its .rst source.
     """
 
-    rst = getrst(filename)
+    rst = get_rst(html)
     assert os.path.exists(rst)
 
     overrides = dict(
@@ -165,40 +178,27 @@
 
     publish_file(
         source_path=rst,
-        destination_path=filename,
+        destination_path=html,
         settings_overrides=overrides,
     )
 
     timestamp = time.strftime("%H:%M:%S")
-    print "%s Regenerated %s" % (timestamp, filename)
+    print "%s Regenerated %s" % (timestamp, html)
 
 
-def regenerate_all_html_files():
-    """
-    Regenerate .html files from their .rst source.
-    """
-
-    from common import groups, locate
-
-    for group in groups:
-        for filename in locate('*.html', root=group):
-            if needs_update(filename):
-                regenerate(filename)
-
-
 def regenerate_from_list():
     """
     Regenerate the .html files that are listed in 'upload.txt'.
     """
     
-    from common import readlines
+    for src in source_files():
+        if src.endswith('.rst'):
+            html = get_html(src)
+            if needs_update(html):
+                regenerate(html)
 
-    for filename in readlines('sources.txt'):
-        html = gethtml(filename)
-        if needs_update(html):
-            regenerate(html)
+    return
 
-
 # -----------------------------------------------------------------------------
 
 def main():

Modified: doc/geodynamics.org/benchmarks/trunk/publish.py
===================================================================
--- doc/geodynamics.org/benchmarks/trunk/publish.py	2009-11-16 22:49:21 UTC (rev 15979)
+++ doc/geodynamics.org/benchmarks/trunk/publish.py	2009-11-16 22:49:28 UTC (rev 15980)
@@ -1,22 +1,34 @@
 #!/usr/bin/env python2.6
 # -*- Python -*-
 
+"""
+publish.py
+==========
+
+This script is responsible for publishing the files listed
+in the file ``sources.txt``. Those files are directly copied to the
+website through a mounted WebDAV directory in ``/Volumes/cig``.
+
+The ``download.py`` script must always be run right before
+this one. In other words, the cache directory ``_latest``
+must contain the latest versions of every source page as they
+exist on the main geodynamics.org website.
+"""
+
+
 import os, shutil
 
 def main():
 
     from common import (
-        webdav_benchroot,
-        readlines, readfile, writefile,
-        sitename, headername, latestname,
-        local_md5sum, latest_md5sum
+        source_files, writefile, md5sum,
+        get_cached, get_page, get_site_path,
     )
 
-    for line in readlines("sources.txt"):
-        filename = line.strip()
+    for src in source_files():
 
-        local_md5 = local_md5sum(filename)
-        latest_md5 = latest_md5sum(filename)
+        local_md5 = md5sum(src)
+        latest_md5 = md5sum(get_cached(src))
 
         # Check whether our local copy has changed. If it's different,
         # reupload it. We can only compare against the latest downloaded
@@ -24,30 +36,23 @@
         # needs to be done.
         if local_md5 != latest_md5:
 
-            # However, we can't upload the filename directly.
-            # We must first prepend a header, and change the line endings
-            # in our file content.
-            header = readfile(headername(filename))
-            content = readfile(filename)
-            site_content = header + content.replace('\n', '\r\n')
+            # Prepare local page contents (save to temporary file)
+            writefile(src + '.tmp', get_page(src))
 
-            # Write out the new site contents to a temporary file.
-            tmp_filename = filename + '.tmp'
-            writefile(tmp_filename, site_content)
+            # Destination path
+            site_path = get_site_path(src)
 
-            # This is the destination "URL"
-            site_page = sitename(filename, prefix=webdav_benchroot)
+            # Finally, upload our local page to the website
+            print "Publishing", site_path
+            shutil.copyfile(src + '.tmp', site_path)
 
-            # Finally, upload the site_content to site_page
-            print "Publishing %s" % site_page
-            shutil.copyfile(tmp_filename, site_page)
-
             # If the previous step succeeded, we should also overwrite
-            # our local site copy. The md5 checksums will be equal
-            # next time we run the publish.py script. Technically,
-            # we also need to copy the header files, but we can leave
-            # those header file updates to the download.py script.
-            shutil.copyfile(filename, latestname(filename))
+            # our local site copy. That way the md5 checksums for the
+            # current file will be equal next time we run the publish.py
+            # script. Technically, we also need to copy the header files,
+            # but we can leave those header file updates to the download.py
+            # script.
+            shutil.copyfile(src, get_cached(src))
 
     return