1. hbc
  2. galaxy-central-hbc

Commits

Cory Spencer  committed fa11ba5

Added PathPrint for Expression Sets.

  • Participants
  • Parent commits 240d89c
  • Branches default

Comments (0)

Files changed (3)

File tools/scde_pathprint/galaxy-pathprint.r

View file
   return(cel.fingerprint)
 }
 
+loadFingerprintFromExprsFile <- function(filename, platform, file = "") {
+  if (! is.supportedPlatform(platform))
+    stop(sprintf("The '%s' platform is not supported by PathPrint", platform))
+  
+  ## Load the data from the provided expression set file.
+  tryCatch({ data <- read.delim(filename, sep=("\t")) },
+             error = function(err) {
+                       stop(sprintf("Unable to parse expression set file: %s", err),
+                            call. = FALSE)
+                     })
+
+  ## Create the fingerprint based on the expression set file data.
+  tryCatch({ exprs.fingerprint <- exprs2fingerprint(as.data.frame(data), platform, getPlatformSpecies(platform)) },
+           error = function(err) {
+                       stop(sprintf("Expression set to fingerprint conversion failed.  Please ensure the platform is correct."))
+                     })
+
+  ## Convert the fingerprint to a data frame.
+  exprs.fingerprint <- as.data.frame(exprs.fingerprint)
+
+  ## If requested, write the fingerprint out to file.
+  if (file != "") {
+    data = matrix(c(attr(exprs.fingerprint, 'row.names'), exprs.fingerprint[,1]), ncol = 2)
+    write.table(data, col.names = c('Name', "Score"),
+                      row.names = FALSE,
+                      file      = file,
+                      quote     = FALSE,
+                      sep       = "\t")
+  }
+
+  ## Tag the fingerprint as having been calculated on either a GSM or GSE id.
+  if (ncol(exprs.fingerprint) == 1) {
+    attr(exprs.fingerprint, 'fingerprintType') <- 'GSM'
+  } else {
+    attr(exprs.fingerprint, 'fingerprintType') <- 'GSE'
+  }
+  
+  ## Return the fingerprint
+  return(exprs.fingerprint)
+}
+
+## loadFingerprintFromExprsFile("exprs.txt", "GPL72")
 ##
 ## Helper functions for validating if a CEL file is supported by PathPrint
 ## and for accessing data about the platform or species it supports.

File tools/scde_pathprint/pathprintExprsSet.pl

View file
+#! /usr/bin/perl
+
+use List::MoreUtils qw(zip);
+use File::Temp qw(tempfile);
+use Text::CSV::Slurp;
+use Statistics::R;
+use Getopt::Long;
+use Data::Dumper;
+
+require "galaxy-pathprint.pl";
+
+our $VERSION = "0.01";
+
+use strict;
+
+sub usage {
+  print <<USAGE;
+Usage: $0 <exprs> <platform> <consensus> <fingerprint> <histogram> [args]
+PathPrint command line interface.
+  -h,--help          Display usage information and exit.
+  -p,--p-value <N>   Use <N> as a cutoff p-value.  Defaults to 0.05.
+  -v,--verbose       Display verbose output.
+     --version       Display version information and exit.
+USAGE
+
+  return 1;
+}
+
+MAIN: {
+  my ($help, $pvalue, $verbose, $version);
+
+  # Strip any leading path info from the program name.
+  $0 =~ s/.*\///;
+
+  # Get any options provided to us on the command line.
+  my $res = GetOptions("help"      => \$help,
+                       "p-value=f" => \$pvalue,
+                       "verbose|v" => \$verbose,
+                       "version"   => \$version);
+  usage && exit 0 if ! $res;
+
+  # Display version information if requested and exit.
+  if ($version) {
+    print "PathPrint version $VERSION\n";
+    exit 0;
+  }
+
+  # Display the usage information and exit if no arguments were provided
+  # or help flag was provided
+  usage && exit(0) if (@ARGV != 5) || $help;
+
+  # Set some default values.
+  my $pvalue = defined $pvalue ? $pvalue : 0.05;
+
+  # Get the command line arguments and do some validation.
+  my $exprs       = shift @ARGV || die "Error: <exprs> is a required argument\n";
+  my $platform    = shift @ARGV || die "Error: <platform> is a required argument\n";
+  my $consensus   = shift @ARGV || die "Error: <consensus> is a required argument\n";
+  my $fingerprint = shift @ARGV || die "Error: <distance> is a required argument\n";
+  my $histogram   = shift @ARGV || die "Error: <histograml> is a required argument\n";
+
+  # Create a new R process.
+  my $R = Statistics::R->new(shared => 1);
+
+  my $fpfile = File::Temp->new(UNLINK => 1);
+  my $cdfile = File::Temp->new(UNLINK => 1);
+
+  # Load the required libraries and data files and perform the analysis.
+  my $code = <<EOCODE;
+        library(affy)
+        library(GEOquery)
+        library(pathprint)
+        library(galaxypathprint)
+
+        data(GEO.metadata.matrix)
+
+        fingerprint <- loadFingerprintFromExprsFile('$exprs', '$platform', file = '$fpfile')
+        distance    <- calculateDistance(fingerprint, pvalue = $pvalue, file = '$cdfile')
+	generateHistogram('$histogram', fingerprint, distance)
+EOCODE
+
+  my $res = eval { $R->run($code)  };
+  if ($@) {
+    # Check to see if there were errors running the R commands.
+    my ($err) = $@ =~ /Got the error:\n ?(.+?)\n/;
+    die("Error: $err\n");
+  }
+  # Generate the fingerprint file.
+  my $data = Text::CSV::Slurp->load(file => $fpfile, sep_char => "\t");
+  generate_fingerprint_file($fingerprint, @{$data});
+
+  # Generate the consensus file.
+  my $data = Text::CSV::Slurp->load(file => $cdfile, sep_char => "\t");
+  generate_consensus_file($consensus, @{$data});
+}

File tools/scde_pathprint/pathprintExprsSet.xml

View file
 
 <description>Performs the analysis for multiple experiments in a data matrix</description>
 
-<command>pathprintExprsSet.r $input $platform $species $exptRecords_consensus $exptGEO2RecordsDist $distHistogram</command>
+<command>pathprintExprsSet.pl $input $platform $consensus $fingerprint $histogram</command>
 
 <inputs>
+  <param name="input" type="data" label="Expression Matrix" help="This tool expects an expression matrix where columns represent experiments and rows represent normalized expression values.">
+  </param>
 
-	<param name="input" type="data" label="Expression Matrix" help="This tool expects an expression matrix where columns represent experiments and rows represent normalized expression values.">
-	</param>
-
-	<param name="platform" type="select" format="text">
-            <label>Array Platform</label>
-            <option value="GPL1261">GPL1261 - [Mouse430_2] Affymetrix Mouse Genome 430 2.0 Array</option>
-            <option value="GPL1319">GPL1319 - [Zebrafish] Affymetrix Zebrafish Genome Array</option>
-            <option value="GPL1322">GPL1322 - [Drosophila_2] Affymetrix Drosophila Genome 2.0 Array</option>
-            <option value="GPL1355">GPL1355 - [Rat230_2] Affymetrix Rat Genome 230 2.0 Array</option>
-            <option value="GPL200">GPL200  - [Celegans] Affymetrix C. elegans Genome Array</option>
-            <option value="GPL2700">GPL2700 - Sentrix HumanRef-8 Expression BeadChip</option>
-            <option value="GPL2986">GPL2986 - ABI Human Genome Survey Microarray Version 2</option>
-            <option value="GPL2995">GPL2995 - ABI Mouse Genome Survey Microarray</option>
-            <option value="GPL339">GPL339  - [MOE430A] Affymetrix Mouse Expression 430A Array</option>
-            <option value="GPL341">GPL341  - [RAE230A] Affymetrix Rat Expression 230A Array</option>
-            <option value="GPL3921">GPL3921 - [HT_HG-U133A] Affymetrix HT Human Genome U133A Array</option>
-            <option value="GPL4685">GPL4685 - [U133AAofAv2] Affymetrix GeneChip HT-HG_U133A Early Access Array</option>
-            <option value="GPL570">GPL570  - [HG-U133_Plus_2] Affymetrix Human Genome U133 Plus 2.0 Array</option>
-            <option value="GPL571">GPL571  - [HG-U133A_2] Affymetrix Human Genome U133A 2.0 Array</option>
-            <option value="GPL6102">GPL6102 - Illumina human-6 v2.0 expression beadchip</option>
-            <option value="GPL6103">GPL6103 - Illumina mouseRef-8 v1.1 expression beadchip</option>
-            <option value="GPL6104">GPL6104 - Illumina humanRef-8 v2.0 expression beadchip</option>
-            <option value="GPL6105">GPL6105 - Illumina mouse-6 v1.1 expression beadchip</option>
-            <option value="GPL6333">GPL6333 - Illumina Mouse Ref-6 V1</option>
-            <option value="GPL6883">GPL6883 - Illumina HumanRef-8 v3.0 expression beadchip</option>
-            <option value="GPL6884">GPL6884 - Illumina HumanWG-6 v3.0 expression beadchip</option>
-            <option value="GPL6885">GPL6885 - Illumina MouseRef-8 v2.0 expression beadchip</option>
-            <option value="GPL6887">GPL6887 - Illumina MouseWG-6 v2.0 expression beadchip</option>
-            <option value="GPL6947">GPL6947 - Illumina HumanHT-12 V3.0 expression beadchip</option>
-            <option value="GPL72">GPL72   - [DrosGenome1] Affymetrix Drosophila Genome Array</option>
-            <option value="GPL81">GPL81   - [MG_U74Av2] Affymetrix Murine Genome U74 Version 2 Array</option>
-            <option value="GPL8300">GPL8300 - [HG_U95Av2] Affymetrix Human Genome U95 Version 2 Array</option>
-            <option value="GPL8321">GPL8321 - [Mouse430A_2] Affymetrix Mouse Genome 430A 2.0 Array</option>
-            <option value="GPL85">GPL85   - [RG_U34A] Affymetrix Rat Genome U34 Array</option>
-            <option value="GPL91">GPL91   - [HG_U95A] Affymetrix Human Genome U95A Array</option>
-            <option value="GPL96">GPL96   - [HG-U133A] Affymetrix Human Genome U133A Array</option>
-	</param>
-
-	<param name="species" type="select" format="text"><label>Organism</label>
-	  <option value="human">Human</option> 
-          <option value="mouse">Mouse</option> 
-	</param>
-
+  <param name="platform" type="select" label="Platform" help="Select the platform">
+    <option value="GPL72">GPL72 - Affymetrix Drosophila Genome Array</option>
+    <option value="GPL85">GPL85 - Affymetrix Rat Genome U34 Array</option>
+    <option value="GPL91">GPL91 - Affymetrix Human Genome U95A Array</option>
+    <option value="GPL96">GPL96 - Affymetrix Human Genome U133A Array</option>
+    <option value="GPL200">GPL200 - Affymetrix C. elegans Genome Array</option>
+    <option value="GPL339">GPL339 - Affymetrix Mouse Expression 430A Array</option>
+    <option value="GPL341">GPL341 - Affymetrix Rat Expression 230A Array</option>
+    <option value="GPL570">GPL570 - Affymetrix Human Genome U133 Plus 2.0 Array</option>
+    <option value="GPL571">GPL571 - Affymetrix Human Genome U133A 2.0 Array</option>
+    <option value="GPL1261">GPL1261 - Affymetrix Mouse Genome 430 2.0 Array</option>
+    <option value="GPL1319">GPL1319 - Affymetrix Zebrafish Genome Array</option>
+    <option value="GPL1322">GPL1322 - Affymetrix Drosophila Genome 2.0 Array</option>
+    <option value="GPL1355">GPL1355 - Affymetrix Rat Genome 230 2.0 Array</option>
+    <option value="GPL2700">GPL2700 - Sentrix HumanRef-8 Expression BeadChip</option>
+    <option value="GPL2986">GPL2986 - ABI Human Genome Survey Microarray Version 2</option>
+    <option value="GPL2995">GPL2995 - ABI Mouse Genome Survey Microarray</option>
+    <option value="GPL3921">GPL3921 - Affymetrix HT Human Genome U133A Array</option>
+    <option value="GPL4685">GPL4685 - Affymetrix GeneChip HT-HG_U133A Early Access Array</option>
+    <option value="GPL6102">GPL6102 - Illumina human-6 v2.0 expression beadchip</option>
+    <option value="GPL6103">GPL6103 - Illumina mouseRef-8 v1.1 expression beadchip</option>
+    <option value="GPL6104">GPL6104 - Illumina humanRef-8 v2.0 expression beadchip</option>
+    <option value="GPL6105">GPL6105 - Illumina mouse-6 v1.1 expression beadchip</option>
+    <option value="GPL6333">GPL6333 - Illumina Mouse Ref-6 V1</option>
+    <option value="GPL6883">GPL6883 - Illumina HumanRef-8 v3.0 expression beadchip</option>
+    <option value="GPL6884">GPL6884 - Illumina HumanWG-6 v3.0 expression beadchip</option>
+    <option value="GPL6885">GPL6885 - Illumina MouseRef-8 v2.0 expression beadchip</option>
+    <option value="GPL6887">GPL6887 - Illumina MouseWG-6 v2.0 expression beadchip</option>
+    <option value="GPL6947">GPL6947 - Illumina HumanHT-12 V3.0 expression beadchip</option>
+    <option value="GPL8300">GPL8300 - Affymetrix Human Genome U95 Version 2 Array</option>
+    <option value="GPL8321">GPL8321 - Affymetrix Mouse Genome 430A 2.0 Array</option>
+  </param>
 </inputs>
 
 <outputs> 
-
-<data format="tabular" name="exptRecords_consensus" label="PathPrint Fingerprint"/> 
-<data format="tabular" name="exptGEO2RecordsDist" label="Distances for experiment versus GEO corpus"/>
-<data format="pdf" name="distHistogram" label="Histogram showing distances to the GEO fingerprint
-consensus"/>
-
+  <data format="html" name="consensus" label="Similar experiments in GEO"/>
+  <data format="tabular" name="fingerprint" label="PathPrint Fingerprint"/> 
+  <data format="pdf" name="histogram" label="Histogram showing distances for GEO experiments to the PathPrint fingerprint"/> 
 </outputs>