Commits

Tao Liu committed 0d59249

Filter out NA in id converter.

Comments (0)

Files changed (1)

Scripts/batch_convert_ids.py

 #!/usr/bin/env python
-# Time-stamp: <2011-06-14 11:27:25 Tao Liu>
+# Time-stamp: <2011-06-15 16:52:30 Tao Liu>
 """
 Convert gene ids through bioconductor.
 """
     if rule == "E2R":
         convert_script += "x<-"+gene_universe+"REFSEQ\n"
         convert_script += "converted <- grep('NM',unlist(as.list(mget(genelist,x,ifnotfound=NA)),use.names=F),value=T)\n"
+        convert_script += "converted <- converted[!is.na(converted)]\n"
     elif rule == "R2E":
         convert_script += "x<-"+gene_universe+"REFSEQ2EG\n"
         convert_script += "converted <- unlist(as.list(mget(genelist,x,ifnotfound=NA)),use.names=F)\n"
+        convert_script += "converted <- converted[!is.na(converted)]\n"        
     elif rule == "E2S":
         convert_script += "x<-"+gene_universe+"SYMBOL\n"
         convert_script += "converted <- unlist(as.list(mget(genelist,x,ifnotfound=NA)),use.names=F)\n"
+        convert_script += "converted <- converted[!is.na(converted)]\n"        
     elif rule == "S2E":
         convert_script += "x<-"+gene_universe+"SYMBOL2EG\n"
         convert_script += "converted <- unlist(as.list(mget(genelist,x,ifnotfound=NA)),use.names=F)\n"
+        convert_script += "converted <- converted[!is.na(converted)]\n"        
     elif rule == "S2R":
         # first convert SYMBOL to ENTREZ
         convert_script += "x<-"+gene_universe+"SYMBOL2EG\n"
         convert_script += "tmpentrez <- unlist(as.list(mget(genelist,x,ifnotfound=NA)),use.names=F)\n"
+        convert_script += "tmpentrez <- converted[!is.na(tmpentrez)]\n"        
         # then from ENTREZ to REFSEQ
         convert_script += "x<-"+gene_universe+"REFSEQ\n"
         convert_script += "converted <- grep('NM',unlist(as.list(mget(tmpentrez,x,ifnotfound=NA)),use.names=F),value=T)\n"
+        convert_script += "converted <- converted[!is.na(converted)]\n"        
     elif rule == "R2S":
         # first convert REFSEQ to ENTREZ
         convert_script += "x<-"+gene_universe+"REFSEQ2EG\n"
         convert_script += "tmpentrez <- unlist(as.list(mget(genelist,x,ifnotfound=NA)),use.names=F)\n"
+        convert_script += "tmpentrez <- converted[!is.na(tmpentrez)]\n"                
         # then from ENTREZ to SYMBOL
         convert_script += "x<-"+gene_universe+"SYMBOL\n"
         convert_script += "converted <- unlist(as.list(mget(tmpentrez,x,ifnotfound=NA)),use.names=F)\n"
+        convert_script += "converted <- converted[!is.na(converted)]\n"                
     else:
         raise Exception("Unrecognized conversion %s" % rule)
     p = subprocess.Popen("R --vanilla", shell=True,executable="/bin/bash", stdin=subprocess.PIPE,stdout=subprocess.PIPE)