galaxy-central / tools / phenotype_association / pgSnp2gd_snp.xml

Full commit
<tool id="pgSnp2gd_snp" name="pgSnp to gd_snp" hidden="false">
  <description>Convert from pgSnp to gd_snp</description>
  <command interpreter="perl">
    #if $snptab.tab2 == "yes" 
      #if $snptab.colsOnly == "addColsOnly" $input1 -tab=$snptab.input2 -name=$indName -build=${input1.metadata.dbkey} -addColsOnly -chr=${input1.metadata.chromCol} > $out_file1 
      #else $input1 -tab=$snptab.input2 -name=$indName -build=${input1.metadata.dbkey} -ref=${ref} -chr=${input1.metadata.chromCol} > $out_file1
      #end if
    #else $input1 -name=$indName -build=${input1.metadata.dbkey} -ref=${ref} -chr=${input1.metadata.chromCol} > $out_file1
    #end if
    <param format="tab" name="input1" type="data" label="pgSnp dataset" />
    <conditional name="snptab">
      <param name="tab2" type="select" label="Append to gd_snp dataset in history">
        <option value="yes">yes</option>
        <option value="no" selected="true">no</option>
      <when value="yes">
      <param format="gd_snp" name="input2" type="data" label="gd_snp dataset" />
        <conditional name="needRef">
        <param name="colsOnly" type="select" label="Skip new SNPs">
          <option value="no" selected="true">no</option>
          <option value="addColsOnly">yes</option>
        <when value="no"> 
        <param name="ref" type="data_column" data_ref="input1" label="Column with reference allele" />
        <when value="addColsOnly"> <!-- do nothing -->
      <when value="no">
      <param name="ref" type="data_column" data_ref="input1" label="Column with reference allele" />
    <param name="indName" type="text" size="20" label="Label for new individual/group" value="na" />
  <data format="gd_snp" name="out_file1" />
      <param name='input1' value='pgSnpTest.ref.txt' ftype='interval' />
      <param name='tab2' value='no' />
      <param name='ref' value='8' />
      <param name='indName' value='na' />
      <output name="output" file="pgSnp2snp_output.txt" />


**Dataset formats**

The input dataset is of Galaxy datatype interval_, with the additional columns
required for pgSnp_ format.
Any further columns beyond those defined for pgSnp will be ignored.
The output dataset is a gd_snp_ table.  (`Dataset missing?`_)

.. _interval: ./static/formatHelp.html#interval
.. _pgSnp: ./static/formatHelp.html#pgSnp
.. _gd_snp: ./static/formatHelp.html#gd_snp
.. _Dataset missing?: ./static/formatHelp.html


**What it does**

This tool converts a pgSnp dataset to gd_snp format, either starting a new
dataset or appending to an old one.  When appending,
if any new SNPs appear only in the pgSnp file they can either be skipped entirely, or
backfilled with "-1" (meaning "unknown") for previous individuals/groups in the 
input gd_snp dataset.
If any new SNPs are being added (either by creating a new table or by backfilling),
then an extra column with the reference allele must be supplied in the pgSnp dataset,
as shown in the example below.



- input pgSnp file, with reference allele added::

   chr1    1888681  1888682  C/T     2       4,3     0.8893,0.8453   T
   chr1    3118325  3118326  T       1       8       0.8796          C
   chr1    3211457  3211458  A/C     2       17,10   0.8610,0.8576   A

- gd_snp output::

   chr1    1888681   T       C      -1      3       4       1     0.8893
   chr1    3118325   C       T      -1      0       8       0     0.8796
   chr1    3211457   A       C      -1      17      10      1     0.8576