Source

phylogenetic_utils / pairwise_distances.py

Full commit
# gather all columns in one single column and
# save it in a file
# Released under the BSD license

import re
import sys

def extract_cols(filename):
    """
    Read a file in this (approximate) format:
    ##################
    Uncorrected ("p") distance matrix

             1        2        3        4        5        6        7        8        9
  1 121c        -
  2 128c  0.22611        -
  3 129c  0.22031  0.13631        -
    #####################

    """

    text = file(filename,'r').read()
    f = re.sub('\-','',text)
    f = f.split('\r')

    # Discard first 10 characters
    # and read in columns beginning with numbers
    cols = (line[10:].strip() for line in f if (len(line)>11 and line[10][0].isdigit()))

    data = ' '.join(cols)
    data = data.replace('  ', ' ')
    data = data.replace(' ','\n')

    # Write the result
    out = file(filename+'.out','w')
    out.write(data)
    return data

if __name__ == '__main__':
    try:
        extract_cols(sys.argv[1])
    except:
        print 'usage: pairwise_distance.py filename'