Wiki
Clone wikimetabit / Documentation-makefile
Documentation of makefile parsed by metaBIT
#!yaml # -*- mode: Yaml; -*- # Please respect indentation (with spaces), and pay attention to colons and hyphens. # Hash-commented lines are ignored. # ----------------------------------------------- # # Samples # # ----------------------------------------------- # # The expected format for sequence files, trimmed for adapters, # is fastq (gz and bz2 compressions are allowed). Samples: SamplePE: # User-defined name for the first sample. Sequence reads for this sample are paired-end reads. # Library paths. # Here, sequencing reads were trimmed and collapsed using AdapterRemoval # (Lindgren S. BMC Res Notes 2012, 5:337) # There are two ways to specify read paths: # 1st way - Provide an explicit list of files using the AdapterRemoval keys # "Collapsed" and "Paired" for paired-end reads, "Singles" for single-end reads: Lib1: # User-defined name for the first library. Collapsed: # Here, we use collapsed overlapping read pairs. # Here, we give a list (must include at least one item): - folder/Lib1/date1PE/lane*/reads.collapsed.gz # Wildcards are allowed - folder/Lib1/date2PE/lane*/reads.collapsed.gz # In cases where only one item is considered, it can be provided as a single # string after the colon, as follows: Collapsed: folder/Lib1/date1PE/lane*/reads.collapsed.gz # Lists or single strings can be used regardless of the type of data considered # (i.e. Collapsed, Singles, Paired); in the following, we use strings. # Non-overlapping paired-end reads can also be analysed. # The string {Pair} equates to 1 and 2. Paired: folder/Lib1/date1/lane*/reads.pair{Pair}.gz # Singletons (reads whose mate has been removed during trimming) can also be analysed. Singles: folder/Lib1/date1/lane*/reads.singleton.gz # For Single-end data, also use the 'Singles' key. # 2nd way – Provide a string or a list of folders containing read files named # according to the format described above, corresponding to default outputs of # AdapterRemoval: # reads.collapsed.gz # reads.pair{Pair}.gz Lib2: folder/Lib1/date1 SampleSE: # User-defined name for the second sample. Sequence reads for this sample are single-end reads. Lib1SE: Singles: folder/Lib1SE/date1SE/lane*/reads.singleton.gz # For Single-end data, use the 'Singles' key # In addition to the Sample: and Library: levels, groups can be nested within each # other, creating as many levels as required by the user as long as the name starts # with 'G_'. These groups will be used in the LEfSe (max one level of subclasses). # example: Samples: G_MyGroup1: G_MySubGroup1-1: Sample1: Lib1: path/to/sample1/lib1 Lib2: path/to/sample1/lib2 G_MySubGroup1-2: Sample2: Lib1: path/to/sample2/lib1 Lib2: path/to/sample2/lib2 G_MyGroup2: G_MySubGroup2-1: Sample3: ... G_MySubGroup2-2: Sample4: ... # ----------------------------------------------- # # Taxonomic profiling # # ----------------------------------------------- # # Types of files to keep from PE libraries keepfromPE: # This information is overriden if files are given in the 'Samples' section using the # fields Collapsed, Paired or Singles. # Default: Collapsed: yes Paired: yes Singles: yes Collapsed: yes Paired: yes Singles: yes Bowtie2: # any Bowtie2 option can be explicited. # quality scores: --phred33 (default) or --phred64 or --solexa-quals or --int-quals --phred33: yes # Sets the option. Bowtie2 default. #--phred: # works too. Metaphlan: # Before running MetaPhlAn, libraries from a given sample can be pooled. (abundances # are calculated for the pooled data). # Uncomment the following, and edit sample names accordingly if all sequence datasets # available for a given sample shall be considered together: Pool: SamplePE # The special value '*' will pool libraries within each sample (quotes required): Pool: '*' # Lists are also allowed: Pool: - SamplePE - SampleSE # Any MetaPhlAn command-line option can be given here: --ignore_eukaryotes: # comment to include eukaryotes (MetaPhlAn2 only) --ignore_viruses: # comment to include viruses (MetaPhlAn2 only) # ----------------------------------------------- # # Statistical Analysis of the taxonomic profiles # # ----------------------------------------------- # # In case you want to recycle taxonomic tables from previous runs and complete # additional analyses, the path to the input table must be specified here under # the 'run_from_table:' level: run_from_table: path/to/relative/abundance/table/all_taxa.tsv # the name of the table can be changed Krona: #run: no # will not produce Krona visualization files when uncommented -a: no # If you uncomment, Krona charts will require an internet connection to use Krona resources. Statax: # Statistical analyses to be run: MyAnalysis1: # name used as a prefix for all output files corresponding to the statistical analyses requested. # taxonomic levels to be considered: "k" kingdom, "p" phylum, "c" class, "o" order, # "f" family, "g" genus, "s" species, "t" strain). Default: pcofgs taxlevels: pcofgs filterout: 1 # removes taxa with relative abundances <1% across all the samples analysed. Default = 1 # Compute diversity index for each sample and taxlevel: doDiv: # Under "doDiv:", you can give any command-line option of the corresponding # R script, which can be found in the metaBIT package in # nodes/tools/statax_Rmodule/doDiv.R (see doDiv.R --help). # The same is valid for "doBarplot", "doHeatmap", "doPcoa", "doClust". # default index is shannon; simpson and invsimpson are also allowed --index: shannon # Create barplots of abundances at all taxlevels: doBarplot: # Create heatmaps of abundances at all taxlevels: doHeatmap: # Perform Principal Coordinates Analysis at all taxlevels: doPcoa: # default distance method = bray # Other distances can be selected: "manhattan", "euclidean", "canberra", "bray", "kulczynski", "jaccard", "gower", "altGower", "morisita", "horn", "mountford", "raup", "binomial", "chao", "cao", and "mahalanobis". --distance: bray # Perform Hierarchical clustering at all taxlevels: doClust: # default distance method = bray --method.dist: bray # All distances above are also allowed, as well as "correlation", "uncentered", # "abscor", "maximum", "binary" and "minkowski". # default clustering method = average --method.hclust: average # Other methods are allowed: "average", "ward", "single", "complete", "mcquitty", "median" and "centroid" # default number of pseudoreplicates for approximately unbiased and bootstrap supports = 10000. --nboot: 10000 MyAnalysis2: # Name of an other set of statistical analyses, if another set is requested. # Samples can be added for comparison if the path to the corresponding table of # relative abundances generated in a previous metaBIT run is indicated. # Example: MetaPhlAn 1 and 2 microbial profiles of human body sites by the Human Microbiome # Project (HMP_690.tsv and HMPII_689.tsv), or of soils (Soil.tsv and SoilII.tsv). # These comparative tables are provided as part of the metaBIT package. merge: # An indented list of previously obtained tables of taxon relative abundances - Path/to/previous/all_taxa.tsv #doDiv: # Uncomment to compute diversity indices. doBarplot: --order: euclidean # Reorder samples according to the Euclidean distance between samples. doClust: --ncores: 4 # number of cores for parallelization. doPcoa: --makefile: pcoa_symbols_and_color.R # A file that controls point and legend formats. doHeatmap: Lefse: # Disabled by default. # A typical use is to run the pipeline first without running LEfSe, and then run # LEfSe using groups established by the PCoA/hierarchical clustering (Statax section). run: no # Optionally add comparative samples. Must be a list. merge: - Comparative_all_taxa.tsv # Change output folder name (lefse): outdir: lefse format: pdf # Format for output plots. Default = pdf. Other values: png, svg. format_input: # options for lefse/format_input.py. # None is required (metaBIT will set -c -s and -u automatically). -o: 1000000 # Rescales abundances to sum to 1,000,000. Default = 1,000,000. run_lefse: # Any option for lefse/run_lefse.py (see https://bitbucket.org/nsegata/lefse/src). plot_res: # Any option for plot_res.py (see https://bitbucket.org/nsegata/lefse/src). plot_cladogram: # Any option for plot_cladogram.py (see https://bitbucket.org/nsegata/lefse/src). plot_features: # Any option for plot_features.py (see https://bitbucket.org/nsegata/lefse/src). # Groups including a minimum of 2 samples each MUST be defined a priori for running LEfSe analyses. Groups: MyGroup1: # e.g. Oceanic water MySubGroup1-1: # Optional level, required for all Groups if declared in one; e.g. surface. # Each element of the list must be a legal column name in the table of relative # abundances. If you are using the default table, column names are: # {samplename}_{libraryname} # If the sample has been pooled, the column name is just the sample name - Sample1_Lib1 - Sample1_Lib2 MySubGroup1-2: # optional level, required for all Groups if declared in one; e.g. mesopelagic. - Sample2_Lib1 - Sample2_Lib2 MyGroup2: # e.g. Soil MySubGroup2-1: # optional level, required for all Groups if declared in one; e.g. tropical. - Sample3_Lib1 - Sample3_Lib2 MySubGroup2-2: # optional level, required for all Groups if declared in one; e.g. polar. - Sample4_Lib1 - Sample4_Lib2
Updated