Commits

Anonymous committed 130f083

Modified CpG masking tool - added the option to mask nonCpG sites. Also added unit tests for CpG masking tool.

Comments (0)

Files changed (2)

lib/bx/align/sitemask/cpg.py

             
         return block
 
+#Mak nonCpG sites
+class nonCpG( Masker ):
+    def __init__( self, mask = '?' ):
+        self.mask = mask
+        self.masked = 0
+        self.total = 0
+        
+    def __call__( self, block ):
+        if not block: return block
+        noncpglist = list_non_cpg( \
+            string.upper(block.components[0].text), \
+            string.upper(block.components[1].text) )
+
+        # now we have a fast list of non-CpG columns, iterate/mask
+        self.masked += len(noncpglist)
+        self.total += len(block.components[0].text)
+        for component in block.components:
+            component.text = mask_columns( noncpglist, component.text, self.mask )
+            
+        return block
+    
 def mask_columns( masklist, text, mask ):
     templist = list()
     for position in masklist:

lib/bx/align/sitemask/sitemask_tests.py

+"""
+Tests for `bx.align.maf.sitemask`.
+"""
+
+import sys,tempfile
+import unittest
+from StringIO import StringIO
+import cpg
+import bx.align.maf
+
+test_maf_cpg = """##maf version=1 scoring=none
+a score=0
+s apple  34 64 + 110 AGGGA---GTTCGTCACT------GTCGTAAGGGTTCAGA--CTGTCTATGTATACACAAGTTGTGTTGCA--ACCG
+s orange 19 61 - 100 AGGGATGCGTT--TCACTGCTATCGTCGTA----TTCAGACTTCG-CTATCT------GAGTTGT---GCATTACCG
+"""
+
+cpg_inclusive_result = [
+"##maf,version=1",
+"a,score=0",
+"s,apple,34,64,+,110,AGGGA---GTTCGTCACT------GT##TAAGGGTTCAGA--CTGTCTATGTATACACAAGTTGTGTTGCA--ACCG", 
+"s,orange,19,61,-,100,AGGGATG#GTT--TCACTGCTAT#GT##TA----TTCAGACTTCG-CTATCT------GAGTTGT---GCATTACCG" 
+]
+
+cpg_restricted_result = [
+"##maf,version=1",
+"a,score=0",
+"s,apple,34,64,+,110,A##GA---#TT##TC#C#------#T##TA###GTTC#GA--C##TC#A#G#ATAC####GT#G#GT#GC#--AC#G", 
+"s,orange,19,61,-,100,A##GA#G##TT--TC#C#GC#AT##T##TA----TTC#GAC#T##-C#A#C#------##GT#G#---GC#TTAC#G"                          
+]
+
+noncpg_result = [
+"##maf,version=1",
+"a,score=0",
+"s,apple,34,64,+,110,#GG##---G##CG##A#T------G#CG##AGG####A##--#TG##T#T#T####ACAA##T#T##T##A--##CG", 
+"s,orange,19,61,-,100,#GG##T#CG##--##A#T##T##CG#CG##----###A###T#CG-#T#T#T------GA##T#T---##A####CG" 
+]
+
+def test_cpg_inclusive():
+    reader = bx.align.maf.Reader( StringIO( test_maf_cpg ) )
+    out = tempfile.NamedTemporaryFile('w')
+    writer = bx.align.maf.Writer( out )
+    cpgfilter = cpg.Inclusive( mask='#' )
+    cpgfilter.run( reader, writer.write )
+    out.seek(0)
+    j=0
+    for line in file(out.name):
+        line = line.strip()
+        if not(line):
+            continue
+        assert cpg_inclusive_result[j] == ",".join(line.split())
+        j+=1
+    
+def test_cpg_restricted():
+    reader = bx.align.maf.Reader( StringIO( test_maf_cpg ) )
+    out = tempfile.NamedTemporaryFile('w')
+    writer = bx.align.maf.Writer( out )
+    cpgfilter = cpg.Restricted( mask='#' )
+    cpgfilter.run( reader, writer.write )
+    out.seek(0)
+    j=0
+    for line in file(out.name):
+        line = line.strip()
+        if not(line):
+            continue
+        assert cpg_restricted_result[j] == ",".join(line.split())
+        j+=1
+
+def test_non_cpg():
+    reader = bx.align.maf.Reader( StringIO( test_maf_cpg ) )
+    out = tempfile.NamedTemporaryFile('w')
+    writer = bx.align.maf.Writer( out )
+    cpgfilter = cpg.nonCpG( mask='#' )
+    cpgfilter.run( reader, writer.write )
+    out.seek(0)
+    j=0
+    for line in file(out.name):
+        line = line.strip()
+        if not(line):
+            continue
+        assert noncpg_result[j] == ",".join(line.split())
+        j+=1
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.