importfastafromtimeitimporttimeitimportnumpyasnpfromBioimportSeqIOfromBioimportAlignIOfromBio.SeqIO.FastaIOimportSimpleFastaParserdefcount_adenines_fast(fn):"""Count number of adenines per position of the alignment, fast-fasta"""ali=fasta.read_matrix(open(fn))returnnp.sum(ali=='A',axis=0)defcount_adenines_biopython(fn):"""Count number of adenines per position of the alignment, biopython"""ali=AlignIO.read(open(fn),'fasta')return[sum((ali[j,i]=='A'forjinxrange(len(ali))))foriinxrange(ali.get_alignment_length())]if__name__=='__main__':N=10print'Reading large files'print'fast-fasta time:',timeit(lambda:list(fasta.read(open('long.fasta'))),number=N)print'biopython time:',timeit(lambda:list(SeqIO.parse(open('long.fasta'),'fasta')),number=N)print'SimpleFastaParser time:',timeit(lambda:list(SimpleFastaParser(open('long.fasta'))),number=N)N=1000print'Accessing alignment positions'print'fast-fasta time:',timeit(lambda:count_adenines_fast('x.fst'),number=N)print'biopython time:',timeit(lambda:count_adenines_biopython('x.fst'),number=N)# Output:# Reading large files# Reading large files# fast-fasta time: 1.35685396194# biopython time: 9.09956002235# SimpleFastaParser time: 2.85863208771# Accessing alignment positions# fast-fasta time: 0.568293094635# biopython time: 10.8877089024
"""Very simple fasta I/O functions.These functions are working much faster compared to Biopython.This comes at a cost: they are less general."""importnumpyasnpdefread_matrix(f):"""Read alignment from fasta file as numpy matrix"""seqs=[]forrinread(f):seqs.append([cforcinr[1]])returnnp.array(seqs,dtype=np.character)defread(f):"""Read fasta sequences from file f and return as tuples (accesion, seq)"""seq=Noneforlineinf:ifline.startswith('>'):ifnotseqisNone:yield(ac,seq)# You can edit this line depending on your fasta filesac=line.split(' ',1)[0][1:].strip()seq=''else:seq+=line.strip()ifseq:yield(ac,seq)defwrite(l,f):"""Write list of sequences in form of tuples (accession, sequence) to file f"""forac,seqinl:f.write('>%s\n'%ac)f.write(seq)f.write('\n')
Comments (0)
HTTPSSSH
You can clone a snippet to your computer for local editing.
Learn more.