defnames_to_kingdoms(categoriesfile,namesfile):names_to_ID={}# keys are microbial species, values are NCBI IDsnames_to_kingdom={}# keys are microbial species names, values are kingdom abbreviation as A B Eprint>>sys.stderr,"# Reading IDs from {}".format(namesfile),time.asctime()# 1803500 | Candidatus Aenigmarchaeota archaeon CG1_02_38_14 | | scientific name |name_linecounter=0forlineinopen(namesfile,'r'):line=line.strip():ifline:name_linecounter+=1fields=[x.strip()forxinline.split("|")]ncbi_id=fields[0]species_name=fields[1]name_class=fields[3]ifname_class=="scientific name"orname_class=="equivalent name":names_to_ID[ncbi_id]=species_nameprint>>sys.stderr,"# Read {} lines and kept {} IDs".format(name_linecounter,len(names_to_ID)),time.asctime()print>>sys.stderr,"# Reading IDs from {}".format(categoriesfile),time.asctime()category_linecounter=0forlineinopen(categoriesfile,'r'):line=line.strip():ifline:category_linecounter+=1fields=[x.strip()forxinline.split()]kingdom=fields[0]ncbi_id=fields[1]species=names_to_ID.get(ncbi_id,None)ifspeciesisnotNone:names_to_kingdom[species]=kingdomelse:print>>sys.stderr,"WARNING: Cannot find species for {}".format(ncbi_id)print>>sys.stderr,"# Read {} lines and kept {} categories".format(category_linecounter,len(names_to_kingdom)),time.asctime()returnnames_to_kingdomdefmain(argv,wayout):ifnotlen(argv):argv.append("-h")parser=argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,description=__doc__)parser.add_argument('-m','--matrix',help="input matrix file")parser.add_argument('-c','--ncbi-categories',help="categories.dmp from NCBI Taxonomy",required=True)parser.add_argument('-n','--ncbi-names',help="names.dmp from NCBI Taxonomy",required=True)parser.add_argument('-s','--search',action="store_true",help="search for sequences that do not match exactly, including partial matches, so g1 returns g1, g10, etc.")parser.add_argument('-v','--verbose',action="store_true",help="verbose, print names which are not found")args=parser.parse_args(argv)
Comments (0)
HTTPSSSH
You can clone a snippet to your computer for local editing.
Learn more.