1. yonatanf
  2. SparCC


SparCC / MakeBootstraps.py

#!/usr/bin/env python

Created on Jun 20, 2011

@author: jonathanfriedman

Script for making simulated datasets used to get pseudo p-values.

from lib.SurveyMatrix import Survey_matrix as SM

def kwargs_callback(option, opt, value, parser,**kwargs):
    d = kwargs['d']
    d[option.dest] = value
    return d

def Run(counts_file, n, **kwargs):
    Compute the pairwise distance matrix between all sites and write it out as txt file.
    ## read counts data
    temp   = SM()
    counts = temp.from_file(counts_file)
    ## make simulated data
    format   = kwargs.get('format','txt')
    out_file = kwargs.get('out_file', counts_file + '_sim_')
    counts.sim_data(n, method = 'permute', base_file = out_file, format = format)    
    print 'wrote ' + out_file + '_[1...%d].txt' %n
    print 'Done!'

if __name__ == '__main__':
    ## parse input arguments
    from optparse import OptionParser
    kwargs = {}
    usage  = ('Make n simulated datasets used to get pseudo p-values.\n' 
              'Simulated datasets are generated by assigning each OTU in each sample an abundance that is randomly drawn (w. replacement) from the abundances of the OTU in all samples.\n' 
              'Simulated datasets are either written out as txt files, or pickled. \n'
              'Usage:   python MakeBootstraps.py counts_file [options]\n'
              'Example: python Spython MakeBootstraps.py example/fake_data.txt -n 2 -o MySims')
    parser = OptionParser(usage)
    parser.add_option("-n", dest="n", default=100, type = 'int',
                      help="Number of simulated datasets to create (100 default).")
    parser.add_option("-f", "--format", dest="format", type = 'str',
                      help="Format of output files. 'txt' (default) | 'pick'.")
    parser.add_option("-o", "--out_file", dest="out_file", type = 'str',
                      action="callback", callback= kwargs_callback, callback_kwargs = {'d':kwargs}, 
                      help="Prefix of names of file to which simulated data will be written.\n" 
                           "A number and extension will be added to this prefix.\n"
                           "e.g. if the prefix is'MySims' output files may be named MySims_1.txt, MySims_w.txt, etc'.")
    (options, args) = parser.parse_args()
    counts_file     = args[0]
    n               = options.n
    ## write sample distance
    Run(counts_file,n, **kwargs)