Commits

Sean Davis  committed 9de4e58

Using python3 gzip for incredible performance improvements over subprocess gzip

  • Participants
  • Parent commits 12990b0

Comments (0)

Files changed (1)

File seqtools/utils.py

 import subprocess
 import tempfile
+import gzip
 
 transtab = str.maketrans('ACGTNacgtn','TGCANtgcan')
 
     return(tmp)
 
 
-def fileOpen(fname,mode='r'):
+def fileOpen(fname,mode='rt',encoding='latin-1'):
     """Open a file, including gzip files
 
     :param fname: The filename to open.  Gzip files are distinguished by ending in '.gz'
     """
     # gzip in python is REALLY slow, so use pipes instead.
     if(fname.endswith('.gz')):
-        if(mode.startswith('r')):
-            return subprocess.Popen(['gunzip -c %s' % fname],stdout=subprocess.PIPE,shell=True).stdout
-        if(mode.startswith('w')):
-            return subprocess.Popen(['gzip > %s' % fname],stdin=subprocess.PIPE,shell=True).stdin
+        return gzip.open(fname,mode=mode,encoding=encoding)
     else:
-        return open(fname,'r')
+        return open(fname,mode)
 
 
 def sortVcfBySequence(vcf,seqnames,seqmap=None):