SparCC / SparCC.py

#!/usr/bin/env python

'''
Created on Jun 20, 2011

@author: jonathanfriedman
'''

from lib.SurveyMatrix import Survey_matrix as SM

    
def kwargs_callback(option, opt, value, parser,**kwargs):
    d = kwargs['d']
    d[option.dest] = value
    return d
  

def RunSparCC(counts_file, algo = 'SparCC', **kwargs):
    ## read counts data
    print 'reading data'
    temp   = SM()
    counts = temp.from_file(counts_file)
    
    ## Calculate correlations between components using SparCC
    print 'computing correlations'
    cor, cov = counts.basis_corr(algo = algo , **kwargs)
    
    ## write out results
    print 'writing results'
    cor_file = kwargs.get('cor_file', 'cor_mat_' + algo + '.out')
    cor.writetxt(cor_file)
    print 'wrote ' + cor_file
    if cov:
        cov_file = kwargs.get('cov_file', 'cov_mat_' + algo + '.out')
        cov.writetxt(cov_file)
        print 'wrote ' + cov_file
    
    print 'Done!'
        

if __name__ == '__main__':
    ## parse input arguments
    from optparse import OptionParser
    kwargs = {}
    usage  = ('Compute the correlation between components (e.g. OTUs).\n' 
              'By default uses the SparCC algorithm to account for compositional effects.\n' 
              'Correlation and covariance (when applies) matrices are written out as txt files. \n'
              'Counts file needs to be a tab delimited text file where columns are samples and rows are components (e.g. OTUS).\n'
              ' See example/fake_data.txt for an example file.\n' 
              '\n'
              'Usage:   python SparCC.py counts_file [options]\n'
              'Example: python SparCC.py example/fake_data.txt -i 20 --cor_file=Cor_mat.out')
    parser = OptionParser(usage)
    parser.add_option("-c", "--cor_file", dest="cor_file", type = 'str',
                      action="callback", callback= kwargs_callback, callback_kwargs = {'d':kwargs}, 
                      help="File to which correlation matrix will be written.")
    parser.add_option("-v", "--cov_file", dest="cov_file", type = 'str',
                      action="callback", callback= kwargs_callback, callback_kwargs = {'d':kwargs},
                      help="File to which covariance matrix will be written.")
    parser.add_option("-a", "--algo", dest="algo", default='SparCC',
                      help="Name of algorithm used to compute correlations (SparCC (default) | pearson | spearman | kendall)")
    parser.add_option("-i", "--iter", dest = 'iter', type ='int', default=20,
                      action="callback", callback= kwargs_callback, callback_kwargs = {'d':kwargs},
                      help="Number of inference iterations to average over (20 default).")
    parser.add_option("-x", "--xiter", dest = 'xiter', type ='int', default=10,
                      action="callback", callback= kwargs_callback, callback_kwargs = {'d':kwargs},
                      help="Number of exclusion iterations to remove strongly correlated pairs (10 default).")
    parser.add_option("-t", "--thershold", dest = 'th', type ='float', default=0.1,
                      action="callback", callback= kwargs_callback, callback_kwargs = {'d':kwargs},
                      help= "Correlation strength exclusion threshold (0.1 default).")
    (options, args) = parser.parse_args()
    counts_file     = args[0]
    algo            = options.algo
    
    ## run SparCC    
    RunSparCC(counts_file, algo = algo, **kwargs)

    
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.