[cig-commits] [commit] devel, master: Optimize ascii2bin script a bit. (6c2a930)

cig_noreply at geodynamics.org cig_noreply at geodynamics.org
Thu Nov 6 08:25:56 PST 2014


Repository : https://github.com/geodynamics/specfem3d_globe

On branches: devel,master
Link       : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f

>---------------------------------------------------------------

commit 6c2a930a82e6f902df739457e85d0dbddb08d7d0
Author: Elliott Sales de Andrade <esalesde at physics.utoronto.ca>
Date:   Mon Jul 7 14:25:36 2014 -0400

    Optimize ascii2bin script a bit.
    
    Since we don't need the previous data, it's better to read in chunks, so
    that we don't use a ton of memory.


>---------------------------------------------------------------

6c2a930a82e6f902df739457e85d0dbddb08d7d0
 DATA/topo_bathy/ascii2bin.py | 47 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/DATA/topo_bathy/ascii2bin.py b/DATA/topo_bathy/ascii2bin.py
index e3f4b6a..55fa746 100755
--- a/DATA/topo_bathy/ascii2bin.py
+++ b/DATA/topo_bathy/ascii2bin.py
@@ -2,6 +2,7 @@
 
 from __future__ import print_function
 import sys
+from io import BytesIO
 
 try:
     import numpy as np
@@ -13,22 +14,44 @@ if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) != 3:
     print('Usage: %s <input> <output>' % (sys.argv[0],))
     sys.exit()
 
-# Read input file
+# Output config
 print('Reading input file %s ...' % (sys.argv[1],))
-data = np.genfromtxt(sys.argv[1])
+print('Writing output to file %s ...' % (sys.argv[2],))
+
+# Input file
+inf = open(sys.argv[1], 'rb')
 
-# Convert to 16-bit integers
-data2 = data.astype(np.int16)
-if any(data != data2):
-    print('Warning: Data set does not fit in signed 16-bit integers!')
+# Output file
+outf = open(sys.argv[2], 'wb')
 
 # Add a byte-order mark
 byteorder = np.array([0x1234], dtype=np.int16)
-data2 = np.concatenate((byteorder, data2))
-
-# Save output file
-print('Writing output to file %s ...' % (sys.argv[2],))
-data2.tofile(sys.argv[2])
-
+byteorder.tofile(outf)
+
+i = 0
+while True:
+    # Read input file
+    text = inf.readlines(1024*1024)  # 1M at a time
+    if not len(text):
+        break
+    ioin = BytesIO(''.join(text))
+    data = np.genfromtxt(ioin)
+    if not len(data):
+        break
+
+    # Convert to 16-bit integers
+    data2 = data.astype(np.int16)
+    if any(data != data2):
+        print('Warning: Data set does not fit in signed 16-bit integers!')
+
+    # Save output file
+    data2.tofile(outf)
+
+    i = i + 1
+    if i % 10 == 0:
+        print('%d MB ...' % (i,))
+
+inf.close()
+outf.close()
 print('Done!')
 



More information about the CIG-COMMITS mailing list