diff gsnap.xml @ 3:488e9d642566 draft

GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
author peterjc
date Wed, 28 Sep 2016 10:47:28 -0400
parents f6ba0f12cca2
children 14561eb803a5
line wrap: on
line diff
--- a/gsnap.xml	Wed Sep 28 10:43:44 2016 -0400
+++ b/gsnap.xml	Wed Sep 28 10:47:28 2016 -0400
@@ -1,9 +1,9 @@
-<tool id="gsnap" name="GSNAP" version="3.0.0">
+<tool id="gsnap" name="GSNAP" version="3.0.1">
   <description>Genomic Short-read Nucleotide Alignment Program</description>
   <requirements>
       <requirement type="package" version="2013-05-09">gmap</requirement>
   </requirements>
-  <version_string>gsnap --version</version_string>
+  <version_command>gsnap --version</version_command>
   <command>
     #import os.path, re
     gsnap
@@ -140,7 +140,7 @@
         --npath=$output.npath
       #end if
       #if $output.maxsearch.__str__ != '':
-        --maxsearch=$output.maxsearch 
+        --maxsearch=$output.maxsearch
       #end if
       $output.quiet_if_excessive
       $output.show_refdiff
@@ -266,15 +266,15 @@
             </param>
             <param name="pairmax_dna"  type="integer" value="" optional="true" label="Max total genomic length for DNA-Seq paired reads, or other reads without splicing (default 1000)." help="Used if no splice file is provided and novelsplicing is off."/>
             <param name="pairmax_rna"  type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used when novel splicing is specified or a splice file is provided.  Should probably match the value for localsplicedist."/>
-            <param name="pairexpect"  type="integer" value="" optional="true" label="Expected paired-end length" 
+            <param name="pairexpect"  type="integer" value="" optional="true" label="Expected paired-end length"
                    help="Used for calling splices in medial part of paired-end reads (default 200)"/>
-            <param name="pairdev"  type="integer" value="" optional="true" label="Allowable deviation from expected paired-end length" 
+            <param name="pairdev"  type="integer" value="" optional="true" label="Allowable deviation from expected paired-end length"
                    help="Used for calling splices in medial part of paired-end reads (default 25)"/>
           </when>
         </conditional>
         <param name="barcode_length" type="integer" value="" optional="true"  label="Amount of barcode to remove from start of read (default 0)" />
         <param name="fastq_id_start" type="integer" value="" optional="true"  label="Starting field  of identifier in FASTQ header, whitespace-delimited, starting from 1" />
-        <param name="fastq_id_end" type="integer" value="" optional="true"  label="Ending field  of identifier in FASTQ header, whitespace-delimited, starting from 1" 
+        <param name="fastq_id_end" type="integer" value="" optional="true"  label="Ending field  of identifier in FASTQ header, whitespace-delimited, starting from 1"
              help="Examples:
                   &lt;br&gt;@HWUSI-EAS100R:6:73:941:1973#0/1
                   &lt;br&gt; . start=1, end=1 (default)  => identifier is HWUSI-EAS100R:6:73:941:1973#0/1
@@ -283,8 +283,8 @@
                   &lt;br&gt; . start=2, end=2  => identifier is 071112_SLXA-EAS1_s_7:5:1:817:345
                   &lt;br&gt; . start=1, end=2  => identifier is SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345"
         />
-        <param name="filter_chastity" type="select" label="Skip reads marked by the Illumina chastity program" 
-               help="String after the accession having a  'Y'  after the first colon, like this:  
+        <param name="filter_chastity" type="select" label="Skip reads marked by the Illumina chastity program"
+               help="String after the accession having a  'Y'  after the first colon, like this:
                     &lt;br&gt;@accession 1:Y:0:CTTGTA
                     &lt;br&gt;where the  'Y'  signifies filtering by chastity.
                     &lt;br&gt; For 'either', a  'Y'  on either end of a paired-end read will be filtered.
@@ -303,10 +303,10 @@
         <param name="gsnap" type="data" format="fasta" label="Select a single-end dataset" help="GSNAP fasta must have the sequence entirely on one line, a second line is interpreted as the paired-end sequence"/>
         <param name="circularinput" type="boolean" checked="false" truevalue="--circular-input=true" falsevalue="" label="Circular-end data (paired reads are on same strand)"/>
       </when>
-      
+
     </conditional>
     <!-- No longer in options as of version 2011-11-30
-    <param name="mapq_unique_score"  type="integer" value="" optional="true" label="MAPQ score threshold" 
+    <param name="mapq_unique_score"  type="integer" value="" optional="true" label="MAPQ score threshold"
                 help="For multiple results, consider as a unique result if only one of the results has a MAPQ score equal or greater than this
                       (if not selected, then reports all multiple results, up to npaths)" />
     -->
@@ -350,8 +350,8 @@
         </param>
 
         <conditional name="use_splicing">
-          <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns" 
-                 help="Look for splicing involving known sites or known introns at short or long distances 
+          <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns"
+                 help="Look for splicing involving known sites or known introns at short or long distances
                   See README instructions for the distinction between known sites and known introns">
             <option value="none" selected="true">None</option>
             <option value="gmapdb">From the GMAP Database</option>
@@ -359,7 +359,7 @@
           </param>
           <when value="none"/>
           <when value="history">
-            <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" 
+            <param name="splicemap" type="data" format="splicesites.iit,introns.iit" label="Select a splicesite map"
               help="built with GMAP IIT"/>
           </when>
           <when value="gmapdb">
@@ -384,7 +384,7 @@
           </param>
           <when value="none"/>
           <when value="history">
-            <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex" 
+            <param name="snpindex" type="data" format="gmapsnpindex" label="Select a snpindex"
               help="built with GMAP SNP Index"/>
           </when>
           <when value="gmapdb">
@@ -403,7 +403,7 @@
 
       </when>
       <when value="gmapdb">
-        <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb" 
+        <param name="gmapdb" type="data" format="gmapdb" label="Select a gmapdb"
               help="A GMAP database built with GMAP Build"/>
         <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
           <options>
@@ -420,8 +420,8 @@
         </param>
 
         <conditional name="use_splicing">
-          <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns" 
-                 help="Look for splicing involving known sites or known introns at short or long distances 
+          <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns"
+                 help="Look for splicing involving known sites or known introns at short or long distances
                   See README instructions for the distinction between known sites and known introns">
             <option value="none" selected="true">None</option>
             <option value="gmapdb">From the GMAP Database</option>
@@ -429,10 +429,10 @@
           </param>
           <when value="none"/>
           <when value="history">
-            <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" 
+            <param name="splicemap" type="data" format="splicesites.iit,introns.iit" label="Select a splicesite map"
               help="built with GMAP IIT"/>
             <param name="ambig_splice_noclip"  type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites"
-              help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.  
+              help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.
                     This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/>
           </when>
           <when value="gmapdb">
@@ -442,7 +442,7 @@
               </options>
             </param>
             <param name="ambig_splice_noclip"  type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites"
-              help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.  
+              help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.
                     This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/>
           </when>
         </conditional>
@@ -455,7 +455,7 @@
           </param>
           <when value="none"/>
           <when value="history">
-            <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex" 
+            <param name="snpindex" type="data" format="gmapsnpindex" label="Select a snpindex"
               help="built with GMAP SNP Index"/>
           </when>
           <when value="gmapdb">
@@ -478,7 +478,7 @@
       </param>
       <when value="default"/>
       <when value="advanced">
-         <param name="max_mismatches" type="float" value="" optional="true" label="Maximum number of mismatches allowed (uses default when negative)" 
+         <param name="max_mismatches" type="float" value="" optional="true" label="Maximum number of mismatches allowed (uses default when negative)"
               help="Maximum number of mismatches allowed (if not specified, then
                     defaults to the ultrafast level of ((readlength+index_interval-1)/kmer - 2))
                     (By default, the genome index interval is 3, but this can be changed
@@ -497,7 +497,7 @@
                       Keeping this number large will allow for random selection among multiple alignments.
                       Reducing this number can speed up the program. "/>
 
-         <param name="terminal_threshold"  type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment" 
+         <param name="terminal_threshold"  type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment"
                 help="Threshold for searching for a terminal alignment (from one end of the
                                    read to the best possible position at the other end) (default 2
                                    for standard, atoi-stranded, and atoi-nonstranded mode; default 100
@@ -510,7 +510,7 @@
                                    To turn off terminal alignments, set this to a high value, greater
                                    than the value for max-mismatches.
                                    "/>
-         <param name="indel_penalty"  type="integer" value="" optional="true" label="Penalty for an indel (default 2)" 
+         <param name="indel_penalty"  type="integer" value="" optional="true" label="Penalty for an indel (default 2)"
                 help="Counts against mismatches allowed.  To find indels, make indel-penalty less than or equal to max-mismatches.  A value &lt; 2 can lead to false positives at read ends" />
          <param name="indel_endlength"  type="integer" value="" optional="true" label="Minimum length at end required for indel alignments (default 4)" />
          <param name="max_middle_insertions"  type="integer" value="" optional="true" label="Maximum number of middle insertions allowed (default 9)" />
@@ -519,17 +519,17 @@
          <param name="max_end_deletions"  type="integer" value="" optional="true" label="Maximum number of end deletions allowed (default 6)" />
          <param name="suboptimal_levels"  type="integer" value="" optional="true" label="Report suboptimal hits beyond best hit (default 0)"
                 help="All hits with best score plus suboptimal-levels are reported" />
-         <param name="adapter_strip"  type="select" label="Method for removing adapters from reads" 
+         <param name="adapter_strip"  type="select" label="Method for removing adapters from reads"
                 help="Default is 'off'.  To turn on, specify 'paired', which removes adapters
                                    from paired-end reads if they appear to be present.">
            <option value="paired">paired</option>
            <option value="off" selected="true">off</option>
          </param>
-         <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)" 
+         <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)"
                 help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive mismatches at the ends of reads)"/>
-         <param name="trim_indel_score" type="integer" value="" optional="true" label="Score to use for indels when trimming at ends (default is -4)" 
+         <param name="trim_indel_score" type="integer" value="" optional="true" label="Score to use for indels when trimming at ends (default is -4)"
                 help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive indels at the ends of reads)"/>
-         <param name="use_tally" type="data" format="tally.iit" optional="true" metadata_name="dbkey" label="Select a tally IIT file to resolve concordant multiple results" 
+         <param name="use_tally" type="data" format="tally.iit" optional="true" label="Select a tally IIT file to resolve concordant multiple results"
               help="generated by gsnap_tally and iit_store"/>
 
          <!--
@@ -542,24 +542,24 @@
                                               just give full path name to use-runlength instead.
            use-runlength=STRING         Use this runlength IIT file to resolve concordant multiple results
          -->
-         
+
          <!-- Options for GMAP alignment within GSNAP -->
-          <param name="gmap_mode" type="select" multiple="true" optional="true" display="checkboxes" label="Cases to use GMAP for complex alignments containing multiple splices or indels" 
+          <param name="gmap_mode" type="select" multiple="true" optional="true" display="checkboxes" label="Cases to use GMAP for complex alignments containing multiple splices or indels"
                  help="Default: pairsearch,terminal,improve">
             <option value="pairsearch" selected="true">pairsearch</option>
             <option value="indel_knownsplice" selected="true">indel_knownsplice</option>
             <option value="terminal" selected="true">terminal</option>
             <option value="improve" selected="true">improve</option>
           </param>
-          <param name="trigger_score_for_gmap" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 5)" 
+          <param name="trigger_score_for_gmap" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 5)"
                  help="Try GMAP pairsearch on nearby genomic regions if best score (the total of both ends if paired-end) exceeds this value (default 5)" />
-          <param name="max_gmap_pairsearch" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 3)" 
+          <param name="max_gmap_pairsearch" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 3)"
                  help="Perform GMAP pairsearch on nearby genomic regions up to this many candidate ends (default 3)." />
-          <param name="max_gmap_terminal" type="integer" value="" optional="true" label="GMAP terminal threshold (default 3)" 
+          <param name="max_gmap_terminal" type="integer" value="" optional="true" label="GMAP terminal threshold (default 3)"
                  help="Perform GMAP terminal on nearby genomic regions up to this many candidate ends (default 3)." />
-          <param name="max_gmap_improvement" type="integer" value="" optional="true" label="GMAP improvement threshold (default 3)" 
+          <param name="max_gmap_improvement" type="integer" value="" optional="true" label="GMAP improvement threshold (default 3)"
                  help="Perform GMAP improvement on nearby genomic regions up to this many candidate ends (default 3)." />
-          <param name="microexon_spliceprob"  type="float" value="" optional="true" label="GMAP microexons threshold (default .90)" 
+          <param name="microexon_spliceprob"  type="float" value="" optional="true" label="GMAP microexons threshold (default .90)"
                  help="Allow microexons only if one of the splice site probabilities is greater than this value." >
             <validator type="in_range" message="The microexons  probability must be between 0. and 1." min="0." max="1."/>
           </param>
@@ -574,7 +574,7 @@
       <when value="default"/>
       <when value="advanced">
          <!-- Splicing options for RNA-Seq -->
-         <!-- use-splicing This should be either a select list from the gmapdb maps or a data type using splicesdir and use-splicing --> 
+         <!-- use-splicing This should be either a select list from the gmapdb maps or a data type using splicesdir and use-splicing -->
          <!-- Neither novel splicing (-N) nor known splicing (-s) turned on => assume reads are DNA-Seq (genomic) -->
          <param name="novelsplicing" type="boolean" checked="false" truevalue="--novelsplicing=1" falsevalue="" label="Look for novel splicing "/>
          <param name="localsplicedist"  type="integer" value="" optional="true" label="Definition of local novel splicing event (default 200000)"/>
@@ -587,8 +587,8 @@
          <param name="shortend_splice_endlength"  type="integer" value="" optional="true" label="Minimum length at end required for short-end spliced alignments"
                 help="(default 2, but unless known splice sites are provided,  GSNAP may still need the end length to be the value of kmer size to find a given splice"/>
          <param name="distant_splice_identity"  type="float" value="" optional="true" label="Minimum identity at end required for distant spliced alignments (default 0.95)"/>
-         <param name="antistranded_penalty"  type="integer" value="" optional="true" label="Penalty for antistranded splicing when using stranded RNA-Seq protocols" 
-                help="A positive value, such as 1, expects antisense on the first read and sense on the second read.  
+         <param name="antistranded_penalty"  type="integer" value="" optional="true" label="Penalty for antistranded splicing when using stranded RNA-Seq protocols"
+                help="A positive value, such as 1, expects antisense on the first read and sense on the second read.
                       Default is 0, which treats sense and antisense equally well"/>
       </when>
     </conditional>
@@ -602,11 +602,11 @@
       <when value="default"/>
       <when value="advanced">
         <param name="npath"  type="integer" value="" optional="true" label="Maximum number of paths to print (default 100)"/>
-        <param name="quiet_if_excessive" type="boolean" checked="false" truevalue="--quiet-if-excessive" falsevalue="" label="Quiet if Excessive" 
+        <param name="quiet_if_excessive" type="boolean" checked="false" truevalue="--quiet-if-excessive" falsevalue="" label="Quiet if Excessive"
                help="If more than maximum number of paths are found, then nothing is printed."/>
-        <param name="show_refdiff" type="boolean" checked="false" truevalue="--show-refdiff" falsevalue="" label="Show SNP-tolerant alignment" 
+        <param name="show_refdiff" type="boolean" checked="false" truevalue="--show-refdiff" falsevalue="" label="Show SNP-tolerant alignment"
                help="For GSNAP output in SNP-tolerant alignment, shows all differences relative to the reference genome as lower case (otherwise, it shows all differences relative to both the reference and alternate genome)"/>
-        <param name="clip_overlap" type="boolean" checked="false" truevalue="--clip-overlap" falsevalue="" label="Clip Overlap" 
+        <param name="clip_overlap" type="boolean" checked="false" truevalue="--clip-overlap" falsevalue="" label="Clip Overlap"
                help="For paired-end reads whose alignments overlap, clip the overlapping region."/>
       </when>
     </conditional>
@@ -640,8 +640,8 @@
     <!-- TODO combine fails and split_output -->
 
     <conditional name="results">
-      <param name="split_output" type="select" label="&lt;HR&gt;Split outputs" 
-       help="Separate outputs for: nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and concordant_mult results"> 
+      <param name="split_output" type="select" label="&lt;HR&gt;Split outputs"
+       help="Separate outputs for: nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and concordant_mult results">
         <option value="no">no</option>
         <option value="yes">yes</option>
       </param>
@@ -655,8 +655,8 @@
           <when value="default"/>
           <when value="nofails"/>
           <when value="failsonly">
-            <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format" 
-              help=""/> 
+            <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format"
+              help=""/>
           </when>
         </conditional>
       </when>
@@ -671,8 +671,8 @@
           <when value="nofails"/>
           <when value="failsonly"/>
         </conditional>
-        <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format" 
-              help=""/> 
+        <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format"
+              help=""/>
       </when>
     </conditional>
 
@@ -808,18 +808,18 @@
 
   </outputs>
   <tests>
-  </tests> 
+  </tests>
 
   <help>
 
 **What it does**
 
-GSNAP_ (Genomic Short-read Nucleotide Alignment Program) is a short read aligner which can align both single- and paired-end reads as short as 14nt and of arbitrarily long length. It can detect short- and long-distance splicing, including interchromosomal splicing, in individual reads, using probabilistic models or a database of known splice sites. Our program also permits SNP-tolerant alignment to a reference space of all possible combinations of major and minor alleles, and can align reads from bisulfite-treated DNA for the study of methylation state. It is developed by Thomas D. Wu of Genentech, Inc.  
+GSNAP_ (Genomic Short-read Nucleotide Alignment Program) is a short read aligner which can align both single- and paired-end reads as short as 14nt and of arbitrarily long length. It can detect short- and long-distance splicing, including interchromosomal splicing, in individual reads, using probabilistic models or a database of known splice sites. Our program also permits SNP-tolerant alignment to a reference space of all possible combinations of major and minor alleles, and can align reads from bisulfite-treated DNA for the study of methylation state. It is developed by Thomas D. Wu of Genentech, Inc.
 Publication_ citation: Thomas D. Wu, Serban Nacu "Fast and SNP-tolerant detection of complex variants and splicing in short reads. Bioinformatics. 2010 Apr 1;26(7):873-81. Epub 2010 Feb 10.
 
 .. _GSNAP: http://research-pub.gene.com/gmap/
 .. _Publication: http://bioinformatics.oupjournals.org/cgi/content/full/26/7/873
-http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2844994/?tool=pubmed
+https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2844994/?tool=pubmed
 
 ------
 
@@ -835,10 +835,10 @@
 
 **Input formats**
 
-Input to GSNAP should be either in FASTQ or FASTA format.  
+Input to GSNAP should be either in FASTQ or FASTA format.
 
 The FASTQ input may include quality scores, which will then be included in SAM
-output, if that output format is selected. 
+output, if that output format is selected.
 
 For FASTA format, you should include one line per read (or end of a
 paired-end read).  The same FASTA file can have a mixture of
@@ -880,10 +880,9 @@
 
 Default GSNAP format
   See the README_
-
-
-
-
   </help>
+  <citations>
+    <citation type="doi">10.1093/bioinformatics/btq057</citation>
+  </citations>
 </tool>