[cig-commits] [commit] pluggable: Here is a solution to the "huge model file MPI_Bcast problem", based upon a suggestion from Eh. The "prepare-model.py" script now constructs a special "tgz" file specifically for MPI_Bcast: "bcast_model.tgz". This "tgz" file is basically a copy of the original. However, any subdirectory named "shared" in the original "model.tgz" file is excluded from "bcast_model.tgz". In "bcast_model.tgz", the "shared" subdirectories are replaced with symbolic links which point to the corresponding original subdirectory (from the initial extraction) on the global, shared filesystem. (669a62f)

cig_noreply at geodynamics.org cig_noreply at geodynamics.org
Wed Apr 9 08:55:09 PDT 2014


Repository : ssh://geoshell/specfem3d_globe

On branch  : pluggable
Link       : https://github.com/geodynamics/specfem3d_globe/compare/64e1b38f0c5ebb4056cce0b15d41c0b9f94ab6e5...099a4d330d5b173b21e51ad441f9f429e5d37842

>---------------------------------------------------------------

commit 669a62f8ac6377ff2e80c8c4888088f13d4e356f
Author: Leif Strand <leif at geodynamics.org>
Date:   Tue Mar 24 01:07:10 2009 +0000

    Here is a solution to the "huge model file MPI_Bcast problem", based
    upon a suggestion from Eh.  The "prepare-model.py" script now
    constructs a special "tgz" file specifically for MPI_Bcast:
    "bcast_model.tgz".  This "tgz" file is basically a copy of the
    original.  However, any subdirectory named "shared" in the original
    "model.tgz" file is excluded from "bcast_model.tgz".  In
    "bcast_model.tgz", the "shared" subdirectories are replaced with
    symbolic links which point to the corresponding original subdirectory
    (from the initial extraction) on the global, shared filesystem.
    
    What this means is that earth model authors can simply place large
    files in a "shared" subdirectory.  Such files will magically reside on
    the shared filesystem, instead of being broadcast to the local
    filesystem.  All files are opened the same way, regardless of where
    they reside.


>---------------------------------------------------------------

669a62f8ac6377ff2e80c8c4888088f13d4e356f
 bcast_model.c    |  6 +++---
 prepare-model.py | 53 ++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/bcast_model.c b/bcast_model.c
index 1eb056b..4f66c66 100644
--- a/bcast_model.c
+++ b/bcast_model.c
@@ -35,7 +35,7 @@ void FC_FUNC_(bcast_model, BCAST_MODEL)(int *pRank, char *scratchDir, int scratc
     /* Broadcast the model archive to all the nodes. */
     if (rank == 0) {
         /* XXX: We shouldn't hardcode this filename. */
-        fd = open("model.tgz", O_RDONLY);
+        fd = open("bcast_model.tgz", O_RDONLY);
         if (fd == -1) {
             perror("open");
             MPI_Abort(MPI_COMM_WORLD, 1);
@@ -81,7 +81,7 @@ void FC_FUNC_(bcast_model, BCAST_MODEL)(int *pRank, char *scratchDir, int scratc
     }
     
     /* Save a local copy of the model archive. */
-    fd = open("model.tgz", O_CREAT | O_WRONLY, S_IRUSR);
+    fd = open("bcast_model.tgz", O_CREAT | O_WRONLY, S_IRUSR);
     if (fd == -1) {
         perror("open");
         MPI_Abort(MPI_COMM_WORLD, 1);
@@ -94,7 +94,7 @@ void FC_FUNC_(bcast_model, BCAST_MODEL)(int *pRank, char *scratchDir, int scratc
     free(data);
     
     /* Extract the model files. */
-    status = system("tar xzf model.tgz");
+    status = system("tar xzf bcast_model.tgz");
     if (status == -1) {
         perror("system");
         MPI_Abort(MPI_COMM_WORLD, 1);
diff --git a/prepare-model.py b/prepare-model.py
index 3d509e9..0e11ede 100755
--- a/prepare-model.py
+++ b/prepare-model.py
@@ -11,29 +11,37 @@ class MovedFromPortal:
    
     def prepareModel(self):
         import tarfile
-        from os.path import basename, dirname, splitext
+        from os.path import basename, dirname, exists, join, splitext
         from itertools import chain
         
         tgz = tarfile.open(self.model, 'r:gz')
-        path = "model"
+        root = "model"
+        cwd = os.getcwd()
 
         directories = []
         serialFortranSourceFiles = []
         serialCSourceFiles = []
         fortranSourceFiles = []
         cSourceFiles = []
+        bcastFiles = []
 
         for tarinfo in tgz:
             if tarinfo.isdir():
                 # Extract directory with a safe mode, so that
                 # all files below can be extracted as well.
                 try:
-                    os.makedirs(os.path.join(path, tarinfo.name), 0777)
+                    os.makedirs(join(root, tarinfo.name), 0777)
                 except EnvironmentError:
                     pass
                 directories.append(tarinfo)
-            elif tarinfo.name.endswith(".f90") or tarinfo.name.endswith(".c"):
-                pathname = os.path.join(path, tarinfo.name)
+            else:
+                tgz.extract(tarinfo, root)
+                if not "/shared/" in tarinfo.name:
+                    bcastFiles.append(tarinfo.name)
+            
+            if tarinfo.name.endswith(".f90") or tarinfo.name.endswith(".c"):
+                pathname = join(root, tarinfo.name)
+                os.unlink(pathname)
                 if tarinfo.name.endswith(".f90"):
                     if tarinfo.name.endswith(".serial.f90"):
                         serialFortranSourceFiles.append(pathname)
@@ -51,8 +59,18 @@ class MovedFromPortal:
                 for line in s.readlines():
                     line = line.replace('@THIS_DIR@', thisDir)
                     f.write(line)
-            else:
-                tgz.extract(tarinfo, path)
+
+        # Create symlinks to "shared" directories.
+        sharedParents = []
+        for directory, subdirectories, files in os.walk(root):
+            for subdirectory in subdirectories:
+                if subdirectory == "shared":
+                    pathname = join(directory, "shared")
+                    symLink = join(directory, "_shared")
+                    if exists(symLink):
+                        os.unlink(symLink)
+                    os.symlink(join(cwd, pathname), symLink)
+                    sharedParents.append(directory[len(root)+1:])
 
         # Reverse sort directories.
         directories.sort(lambda a, b: cmp(a.name, b.name))
@@ -60,14 +78,22 @@ class MovedFromPortal:
 
         # Set correct owner, mtime and filemode on directories.
         for tarinfo in directories:
-            path = os.path.join(path, tarinfo.name)
+            pathname = os.path.join(root, tarinfo.name)
             try:
-                tgz.chown(tarinfo, path)
-                tgz.utime(tarinfo, path)
-                tgz.chmod(tarinfo, path)
+                tgz.chown(tarinfo, pathname)
+                tgz.utime(tarinfo, pathname)
+                tgz.chmod(tarinfo, pathname)
             except tarfile.ExtractError, e:
                 pass
 
+        # Generate the bcast tgz file.
+        tgz = tarfile.open("bcast_model.tgz", 'w:gz')
+        for name in bcastFiles:
+            tgz.add(join(root, name), name)
+        for name in sharedParents:
+            tgz.add(join(root, name, "_shared"), join(name, "shared"))
+        tgz.close()
+
         # Generate the make include file.
         s = open("model.mk", "w")
         print >>s
@@ -125,7 +151,4 @@ def prepareModel():
     MovedFromPortal(model).prepareModel()
 
 
-try:
-    prepareModel()
-except Exception, e:
-    sys.exit("%s: %s" % (__file__, e))
+prepareModel()



More information about the CIG-COMMITS mailing list