view tools/smalt_wrapper.xml @ 1:b857363cc1db draft

Uploaded
author cjav
date Wed, 13 Feb 2013 13:23:09 -0500
parents 87f4f05b44bb
children
line wrap: on
line source

<tool id="smalt_wrapper" name="SMALT" version="0.0.1">
  <requirements>
    <requirement type="package" version="0.7.1">smalt</requirement>
  </requirements>
  <description>maps query reads onto the reference sequences</description>
  <command interpreter="python">
    smalt_wrapper.py 
      --threads="4"

      ## reference source
      --fileSource=$genomeSource.refGenomeSource
      #if $genomeSource.refGenomeSource == "history":
        ##build index on the fly
        --ref="${genomeSource.ownFile}"
        --dbkey=$dbkey
      #else:
        ##use precomputed indexes
        --ref="${genomeSource.indices.fields.path}"
        --do_not_build_index
      #end if

      ## input file(s)
      --input1=$paired.input1
      #if $paired.sPaired == "paired":
        --input2=$paired.input2
      #end if

      ## output file
      --output=$output

      ## run parameters
      --genAlignType=$paired.sPaired
      --params=$params.source_select
      #if $params.source_select != "pre_set":
        --scorDiff=$params.scorDiff
        #if $paired.sPaired == "paired":
          --insertMax=$params.insertMax
          --insertMin=$params.insertMin
          --pairTyp=$params.pairTyp
        #end if
        --minScor=$params.minScor
        --partialAlignments=$params.partialAlignments
        --minBasq=$params.minBasq
        --seed=$params.seed
        --complexityWeighted=$params.complexityWeighted
        --exhaustiveSearch=$params.cExhaustiveSearch.exhaustiveSearch
        #if $params.cExhaustiveSearch.exhaustiveSearch == "true"
          --minCover=$params.cExhaustiveSearch.minCover
        #end if
        --minId=$params.minId
      #end if

      ## suppress output SAM header
      --suppressHeader=$suppressHeader
  </command>
  <inputs>
    <conditional name="genomeSource">
      <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
        <option value="indexed">Use a built-in index</option>
        <option value="history">Use one from the history</option>
      </param>
      <when value="indexed">
        <param name="indices" type="select" label="Select a reference genome">
          <options from_data_table="smalt_indexes">
            <filter type="sort_by" column="2" />
            <validator type="no_options" message="No indexes are available" />
          </options>
        </param>
      </when>
      <when value="history">
        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
      </when>
    </conditional>
    <conditional name="paired">
      <param name="sPaired" type="select" label="Is this library mate-paired?">
        <option value="single">Single-end</option>
        <option value="paired">Paired-end</option>
      </param>
      <when value="single">
        <param name="input1" type="data" format="fastqsanger" label="FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
      </when>
      <when value="paired">
        <param name="input1" type="data" format="fastqsanger" label="Forward FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
        <param name="input2" type="data" format="fastqsanger" label="Reverse FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
      </when>
    </conditional>
    <conditional name="params">
      <param name="source_select" type="select" label="Smalt settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
        <option value="pre_set">Commonly Used</option>
        <option value="full">Full Parameter List</option>
      </param>
      <when value="pre_set" />
      <when value="full">
        <conditional name="cExhaustiveSearch">
            <param name="exhaustiveSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Do exhaustive search? (map -x)" help="This flag triggers a more exhaustive search for alignments at the cost of decreased speed." />
          <when value="true">
              <param name="minCover" type="float" value="0" label="Minimum cover (map -c)" help="Only consider mappings where the k-mer word seeds cover the query read to a minimum extent." />
          </when>
          <when value="no" />
        </conditional>
        <param name="scorDiff" type="integer" value="0" label="Score diff (map -d)" help="Set a threshold of the Smith-Waterman alignment score relative to the maximum score." />
        <param name="insertMax" type="integer" value="500" label="Maximum insert size (map -i)" help="Only in paired-end mode." />
        <param name="insertMin" type="integer" value="0" label="Minimum insert size (map -j)" help="Only in paired-end mode." />
        <param name="pairTyp" type="text" size="2" value="pe" label="Type of read pair library (map -l)" help="Can be either 'pe', 'mp' or 'pp'." />
        <param name="minScor" type="integer" value="0" label="Minimum score (map -m)" help="Sets an absolute threshold of the Smith-Waterman scores." />
        <param name="partialAlignments" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Partial alignments (map -p)" help="Report partial alignments if they are complementary on the read (split reads)." />
        <param name="minBasq" type="integer" value="0" label="Base quality threshold (map -q)" help="Sets a base quality threshold (0 &lt;= minbasq &lt;= 10, default 0)." />
        <param name="seed" type="integer" value="0" label="Seed (map -r)" help="See below." />
        <param name="complexityWeighted" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Complexity weighted (map -w)" help="Smith-Waterman scores are complexity weighted." />
        <param name="minId" type="float" value="0" label="Identity threshold (map -y)" help="Sets an identity threshold for a mapping to be reported." />
      </when>
    </conditional>
    <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="Smalt produces SAM with several lines of header information" />
  </inputs>
  <outputs>
    <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
      <actions>
        <conditional name="genomeSource.refGenomeSource">
          <when value="indexed">
            <action type="metadata" name="dbkey">
              <option type="from_data_table" name="smalt_indexes" column="1">
                <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
                <filter type="param_value" ref="genomeSource.indices" column="0"/>
              </option>
            </action>
          </when>
          <when value="history">
            <action type="metadata" name="dbkey">
              <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
            </action>
          </when>
        </conditional>
      </actions>
    </data>
  </outputs>
  <help>

**What it does**

SMALT is a pairwise sequence alignment program for the experimentingcient mapping of DNA sequencing reads onto genomic reference sequences. It uses a combination of short-word hashing and dynamic programming. Most types of sequencing platforms are supported including paired-end sequencing reads.

------

Please cite the website "http://www.sanger.ac.uk/resources/software/smalt/".

------

**Know what you are doing**

.. class:: warningmark

There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.

 .. __: http://www.sanger.ac.uk/resources/software/smalt/

------

**Input formats**

SMALT accepts files in Sanger FASTQ format (galaxy type *fastqsanger*). Use the FASTQ Groomer to prepare your files.

------

**A Note on Built-in Reference Genomes**

The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY.

------

**Outputs**

The output is in SAM format.

-------

**SMALT parameter list**

This is an exhaustive list of SMALT options:

For **map**::

  -a
     Output explicit alignments along with the mappings.

  -c &lt;mincover&gt;
     Only consider mappings where the k-mer word seeds cover the query read to
     a minimum extent. If &lt;mincover&gt; is an integer or floating point &gt; 1.0, at
     least this many bases of the read must be covered by k-mer word seeds. If
     &lt;mincover&gt; is a floating point &lt;= 1.0, it specifies the fraction of the
     query read length that must be covered by k-mer word seeds. This option
     is only valid in conjunction with the '-x' flag.

  -d &lt;scordiff&gt;
     Set a threshold of the Smith-Waterman alignment score relative to the
     maximum score. When mapping single reads, all alignments are reported
     that have Smith-Waterman scores within &lt;scorediff&gt; of the maximum.
     Mappings with lower scores are skipped. If &lt;scorediff&gt; is set to to a
     value &lt; 0, all alignments are printed that have scores above the
     threshold specified with the '-m &lt;minscor&gt;' option.
     For paired reads, only a value of 0 is supported. With the option '-d 0'
     all aligments (pairings) with the best score are output. By default
     (without the option '-d 0') single reads/mates with multiple best mappings
     are reported as 'not mapped'.

  -f &lt;format&gt;
     Specifies the output format. &lt;format&gt; can be either 'bam', 'cigar', 'gff',
     'sam' (default), 'samsoft' or 'ssaha'. Optional extension 'sam:nohead,clip'
     (see manual)

  -F &lt;inform&gt;
     Specifies the input format. &lt;inform&gt; can be either 'fastq' (default),
     'sam' or 'bam' (see: samtools.sourceforge.net). SAM and BAM formats
     require additional libraries to be installed.

  -g &lt;insfil&gt;
     Use the distribution of insert sizes stored in the file &lt;insfil&gt;. This
     file is in ASCII format and can be generated using the 'sample' task see
     'smalt sample -H' for help).

  -H
     Print these instructions.

  -i &lt;insertmax&gt;
     Maximum insert size (only in paired-end mode). The default is 500.

  -j &lt;insertmin&gt;
     Minimum insert size (only in paired-end mode). The default is 0.

  -l &lt;pairtyp&gt;
     Type of read pair library. &lt;pairtyp&gt; can be either 'pe', i.e. for
     the Illumina paired-end library for short inserts (|--&gt; &lt;--|). 'mp'
     for the Illumina mate-pair library for long inserts (&lt;--| |--&gt;) or
     'pp' for mates sequenced on the same strand (|--&gt; |--&gt;). 'pe' is the
     default.

  -m &lt;minscor&gt;
     Sets an absolute threshold of the Smith-Waterman scores. Mappings with
     scores below that threshold will not be reported. The default is
     &lt;minscor&gt; = &lt;wordlen&gt; + &lt;stepsiz&gt; - 1

  -n &lt;nthreads&gt;
     Run smalt using mutiple threads. &lt;nthread&gt; is the number of additional
     threads forked from the main thread. The order of the reads in the
     input files is not preserved for the output unless '-O' is also specified.

  -o &lt;oufilnam&gt;
     Write mapping output (e.g. SAM lines) to a separate file. If this option
     is not specified, mappings are written to standard output together with
     other messages.

  -O
     Output mappings in the order of the reads in the input files when using
     multiple threads (option '-n &lt;nthreads&gt;').

  -p
     Report partial alignments if they are complementary on the read (split
     reads).

  -q &lt;minbasq&gt;
     Sets a base quality threshold (0 &lt;= minbasq &lt;= 10, default 0).
     K-mer words of the read with nucleotides that have a base quality below
     this threshold are not looked up in the hash index.

  -r &lt;seed&gt;
     If &lt;seed&gt; &gt;= 0 report an alignment selected at random where there are
     multiple mappings with the same best alignment score. With &lt;seed&gt; = 0
     (default) a seed is derived from the current calendar time. If &lt;seed&gt;
     &lt; 0 reads with multiple best mappings are reported as 'not mapped'.

  -T &lt;tmp_dir&gt;
     Write temporary files to directory &lt;tmp_dir&gt; (used with input files in
     SAM/BAM format).

  -w
     Smith-Waterman scores are complexity weighted.

  -x
     This flag triggers a more exhaustive search for alignments at the cost
     of decreased speed. In paired-end mode each mate is mapped independently.
     (By default the mate with fewer hits in the hash index is mapped first
     and the vicinity is searched for mappings of its mate.)

  -y &lt;minid&gt;
     Sets an identity threshold for a mapping to be reported (default: 0).
     &lt;minid&gt; specifies the number of exactly matching nucleotides either as
     a positive integer or as a fraction of the read length (&lt;= 1.0).

  </help>
</tool>