sleipnir / tools / SeekMiner / SeekMiner.ggo

Full commit
package	"SeekMiner"
version	"1.0"
purpose	"Performs cross-platform microarray query-guided search"

section "Main"
option	"dset"				x	"Input a set of datasets"
								string typestr="filename"	yes
option	"search_dset"		D	"A set of datasets to search. If not specified, search all datasets."
								string typestr="filename" default="NA"
option	"input"				i	"Input gene mapping"
								string	typestr="filename"	yes
option	"query"				q	"Query gene list"
								string typestr="filename"	yes
option	"dir_in"			d	"Database directory"
								string	typestr="directory"	yes
option	"dir_prep_in"		p	"Prep directory (containing .gavg, .gpres files)"
								string	typestr="directory"	yes
option	"dir_platform"		P	"Platform directory (containing .gplatavg, .gplatstdev, .gplatorder files)"
								string	typestr="directory"	yes
option	"dir_sinfo"			u	"Sinfo Directory (containing .sinfo files)"
								string	typestr="directory"	default="NA"
option	"dir_gvar"			U	"Gene variance directory (containing .gexpvar files)"
								string	typestr="directory"	default="NA"
option	"quant"				Q	"quant file (assuming all datasets use the same quantization)"
								string	typestr="filename"	yes								
option	"num_db"			n	"Number of databaselets in database"
								int	default="1000"	yes

section "Dataset weighting"
option	"weighting_method"	V	"Weighting method: query cross-validated weighting (CV), equal weighting (EQUAL), order statistics weighting (ORDER_STAT)"
								values="CV","EQUAL","ORDER_STAT" default="CV"

section "Optional - Functional Network Expansion"
option	"func_db"			w	"Functional network db path"
								string	typestr="directory"
option	"func_n"			f	"Functional network number of databaselets"
								int default="1000"
option	"func_prep"			W	"Functional network prep & platform directory"
								string typestr="directory"
option	"func_quant"		R	"Functional network quant file"
								string typestr="filename"
option	"func_dset"			F	"Functional network dset-list file (1 dataset)"
								string typestr="filename"
option	"func_logit"		l	"Functional network, integrate using logit values"
								flag	off

section "Optional - Random simulations"
option	"random"			S	"Generate random ranking score"
								flag	off
option	"num_random"		t	"Number of repetitions of generating random rankings"
								int	default="10"

section "Optional - Distance matrix transformations"
option	"dist_measure"		z	"Distance measure"
								values="pearson","z_score" default="z_score"
option	"norm_subavg"		m	"If z_score is selected, subtract each result gene's average z-score in the dataset."
								flag	off
option	"norm_subavg_plat"	M	"If z_score is selected, subtract each query gene's average score across platforms and divide by its stdev. Performed after --norm_subavg."
								flag	off
option	"score_cutoff"		c	"Cutoff on the gene-gene score before adding, default: no cutoff"
								float default="-9999"
option	"square_z"			e	"If z_score is selected, take the square the z-scores. Usually used in conjunction with --score-cutoff."							
								flag	off

section "Options for Dataset weighting"
option	"per_q_required"	C	"Fraction (max 1.0) of query required to correlate with a gene, in order to count the gene's query score. A gene may not correlate with a query gene if it is absent, or its correlation with query does not pass cut-off (specified by --score_cutoff). Use this with caution. Be careful if using with --score_cutoff."
								float default="0.0"
option	"CV_partition"		I	"The query partitioning method (for CV weighting): Leave-One-In, Leave-One-Out, X-Fold."
								values="LOI","LOO","XFOLD" default="LOI"
option	"CV_fold"			X	"The number of folds (for X-fold partitioning)."
								int	default="5"
option	"CV_rbp_p"			G	"The parameter p for RBP scoring of each partition for its query gene retrieval (for CV weighting)."
								float	default="0.99"	

section "MISC"								
option	"is_nibble"			N	"Whether the input DB is nibble type"
								flag	off
option	"buffer"			b	"Number of Databaselets to store in memory"
								int default="20"
option	"output_text"		O	"Output results (gene scores and dataset weights) as text"
								flag	off
option	"output_dir"		o	"Output directory"
								string typestr="directory"	yes
option	"output_w_comp"		Y	"Output dataset weight components (generates .dweight_comp file)"
								flag	off
option	"simulate_w"		E	"If equal weighting or order-statistics weighting is selected, output simulated dataset weights"
								flag	off
option	"additional_db"		B	"Utilize a second CDatabase collection. Path to the second CDatabase's setting file."
								string default="NA"