diff gmap.xml @ 3:488e9d642566 draft

GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
author peterjc
date Wed, 28 Sep 2016 10:47:28 -0400
parents f6ba0f12cca2
children 14561eb803a5
line wrap: on
line diff
--- a/gmap.xml	Wed Sep 28 10:43:44 2016 -0400
+++ b/gmap.xml	Wed Sep 28 10:47:28 2016 -0400
@@ -1,9 +1,9 @@
-<tool id="gmap" name="GMAP" version="3.0.0">
+<tool id="gmap" name="GMAP" version="3.0.1">
   <description>Genomic Mapping and Alignment Program for mRNA and EST sequences</description>
   <requirements>
     <requirement type="package" version="2013-05-09">gmap</requirement>
   </requirements>
-  <version_string>gmap --version</version_string>
+  <version_command>gmap --version</version_command>
   <command>
     #import os,os.path
     gmap
@@ -41,7 +41,7 @@
       --protein_gen
     #elif $result.format == "sam":
       --format=$result.sam_paired_read
-      $result.no_sam_headers 
+      $result.no_sam_headers
       $result.sam_use_0M
       $result.force_xs_dir
       $result.md_lowercase_snp
@@ -127,7 +127,7 @@
       ${i.added_input}
     #end for
     #if $split_output == True
-      2> $gmap_stderr 
+      2> $gmap_stderr
     #else
       2> $gmap_stderr > $output
     #end if
@@ -194,7 +194,7 @@
         </param>
       </when>
       <when value="gmapdb">
-        <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb" 
+        <param name="gmapdb" type="data" format="gmapdb" label="Select a gmapdb"
               help="A GMAP database built with GMAP Build"/>
         <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
           <options>
@@ -208,12 +208,12 @@
         </param>
       </when>
       <when value="history">
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" 
+        <param name="ownFile" type="data" format="fasta" label="Select the reference genome"
               help="Fasta containing genomic DNA sequence"/>
       </when>
     </conditional>
 
-    
+
     <!-- Computation options -->
     <conditional name="computation">
       <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help="">
@@ -223,56 +223,56 @@
       <when value="default"/>
       <when value="advanced">
        <param name="nosplicing" type="boolean" truevalue="--nosplicing" falsevalue="" checked="false" label="Turn off splicing" help="(useful for aligning genomic sequences onto a genome)"/>
-       <param name="min_intronlength" type="integer" value="" optional="true" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." >	
+       <param name="min_intronlength" type="integer" value="" optional="true" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." >
          <validator type="in_range" message="min_intronlength must be positive" min="0" />
        </param>
-       <param name="intronlength" type="integer" value="" optional="true" label="Max length for one intron (default 1000000)" >	
+       <param name="intronlength" type="integer" value="" optional="true" label="Max length for one intron (default 1000000)" >
          <validator type="in_range" message="intronlength must be positive" min="0" />
        </param>
-       <param name="localsplicedist" type="integer" value="" optional="true" label="Max length for known splice sites at ends of sequence (default 200000)" >	
+       <param name="localsplicedist" type="integer" value="" optional="true" label="Max length for known splice sites at ends of sequence (default 200000)" >
          <validator type="in_range" message="localsplicedist must be positive" min="0" />
        </param>
-       <param name="totallength"  type="integer" value="" optional="true" label="Max total intron length (default 2400000)" >	
+       <param name="totallength"  type="integer" value="" optional="true" label="Max total intron length (default 2400000)" >
          <validator type="in_range" message="totallength must be positive" min="0" />
        </param>
-       <param name="chimera_margin" type="integer" value="" optional="true" label="Amount of unaligned sequence that triggers search for a chimera" 
-              help=" default is 40, To turn off, set to 0" >	
+       <param name="chimera_margin" type="integer" value="" optional="true" label="Amount of unaligned sequence that triggers search for a chimera"
+              help=" default is 40, To turn off, set to 0" >
          <validator type="in_range" message="chimera_margin must be positive" min="0" />
        </param>
-       <param name="direction"  type="select" label="cDNA direction">	
+       <param name="direction"  type="select" label="cDNA direction">
          <option value="auto">auto</option>
          <option value="sense_force">sense_force</option>
          <option value="antisense_force">antisense_force</option>
          <option value="sense_filter">sense_filter</option>
          <option value="antisense_filter">antisense_filter</option>
        </param>
-       <param name="trimendexons"  type="integer" value="" optional="true" label="Trim end exons with fewer than given number of matches (in nt, default 12)" >	
+       <param name="trimendexons"  type="integer" value="" optional="true" label="Trim end exons with fewer than given number of matches (in nt, default 12)" >
          <validator type="in_range" message="trimendexons must be positive" min="1" />
        </param>
        <param name="find_shifted_canonical" type="boolean" truevalue="--find-shifted-canonical-species" falsevalue="" checked="false" label="find-shifted-canonical Use a more sensitive search for canonical splicing" help=""/>
        <param name="cross_species" type="boolean" truevalue="--cross-species" falsevalue="" checked="false" label="Cross-species alignment" help="For cross-species alignments, use a more sensitive search for canonical splicing"/>
-       
-       <param name="canonical"  type="select" label="Reward for canonical and semi-canonical introns">	
+
+       <param name="canonical"  type="select" label="Reward for canonical and semi-canonical introns">
          <option value="1">high reward (default)</option>
          <option value="0">low reward</option>
          <option value="2">low reward for high-identity sequences</option>
        </param>
-       <param name="allow_close_indels"  type="select" label="Allow an insertion and deletion close to each other">	
+       <param name="allow_close_indels"  type="select" label="Allow an insertion and deletion close to each other">
          <option value="1" selected="true">yes (default)</option>
          <option value="0">no</option>
          <option value="2">only for high-quality alignments</option>
        </param>
-       <param name="microexon_spliceprob" type="float" value="" optional="true" label="Micro Exon splice probablility threshold" 
-              help="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" >	
-         <validator type="in_range" message="slice probability between 0.00 and 1.00" min="0" max="1"/> 
+       <param name="microexon_spliceprob" type="float" value="" optional="true" label="Micro Exon splice probablility threshold"
+              help="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" >
+         <validator type="in_range" message="slice probability between 0.00 and 1.00" min="0" max="1"/>
        </param>
-       <param name="prunelevel"  type="select" label="Pruning level">	
+       <param name="prunelevel"  type="select" label="Pruning level">
          <option value="0">no pruning (default)</option>
          <option value="1">poor sequences</option>
          <option value="2">repetitive sequences</option>
          <option value="3">poor and repetitive sequences</option>
        </param>
-       <!--  could do this as a config file 
+       <!--  could do this as a config file
        <param name="chrsubsetfile" type="data" format="fasta" label="User-supplied chromosome subset file" />
        <param name="chrsubset" type="text" label="Chromosome subset to search" />
        -->
@@ -293,25 +293,25 @@
         <option value="--invertmode=1">Invert cDNA and print genomic (-) strand</option>
         <option value="--invertmode=2">Invert cDNA and print genomic (+) strand</option>
        </param>
-       <param name="introngap" type="integer" value="" optional="true" label="Nucleotides to show on each end of intron (default=3)">	
+       <param name="introngap" type="integer" value="" optional="true" label="Nucleotides to show on each end of intron (default=3)">
          <validator type="in_range" message="introngap must be positive" min="0" />
        </param>
-       <param name="wraplength" type="integer" value="" optional="true" label="Line Wrap length for alignment (default=50)">	
+       <param name="wraplength" type="integer" value="" optional="true" label="Line Wrap length for alignment (default=50)">
          <validator type="in_range" message="wraplength must be positive" min="1" />
        </param>
        <param name="npaths" type="integer" value="" optional="true"
-              label="Maximum number of paths to show.  Ignored if negative.  If 0, prints two paths if chimera detected, else one." >	
+              label="Maximum number of paths to show.  Ignored if negative.  If 0, prints two paths if chimera detected, else one." >
          <validator type="in_range" message="npaths must be positive" min="0" />
        </param>
        <param name="suboptimal_score" type="integer" value="" optional="true"
               label="Report only paths whose score is within this value of the best path"
-              help="By default the program prints all paths found." >	
+              help="By default the program prints all paths found." >
          <validator type="in_range" message="suboptimal_score must be positive" min="0" />
        </param>
-       <param name="chimera_overlap" type="integer" value="" optional="true" label="Overlap to show, if any, at chimera breakpoint (default 0)" >	
+       <param name="chimera_overlap" type="integer" value="" optional="true" label="Overlap to show, if any, at chimera breakpoint (default 0)" >
                 <validator type="in_range" message="chimera_overlap must be positive" min="0" />
        </param>
-       <param name="tolerant" type="boolean" checked="false" truevalue="--tolerant=true" falsevalue="" 
+       <param name="tolerant" type="boolean" checked="false" truevalue="--tolerant=true" falsevalue=""
               label="Translates cDNA with corrections for frameshifts"/>
        <param name="protein" type="select" label="Protein alignment" help="">
         <option value="">default</option>
@@ -383,9 +383,9 @@
         <param name="read_group_library" type="text" value="" label="Value to put into read-group library (RG-LB) field"/>
         <param name="read_group_platform" type="text" value="" label="Value to put into read-group library platform (RG-PL) field"/>
         <param name="sam_use_0M" type="boolean" truevalue="--sam-use-0M" falsevalue="" checked="false" label="Insert 0M in CIGAR between adjacent insertions and deletions" help="Required by Picard, but can cause errors in other tools"/>
-        <param name="force_xs_dir" type="boolean" truevalue="--force-xs-dir" falsevalue="" checked="false" label="Force direction (disallow XS:A:?)" 
+        <param name="force_xs_dir" type="boolean" truevalue="--force-xs-dir" falsevalue="" checked="false" label="Force direction (disallow XS:A:?)"
                help="For RNA-Seq alignments, disallows XS:A:? when the sense direction is unclear, and replaces this value arbitrarily with XS:A:+. May be useful for some programs, such as Cufflinks, that cannot handle XS:A:?.  However, if you use this flag, the reported value of XS:A:+ in these cases will not be meaningful."/>
-        <param name="md_lowercase_snp" type="boolean" truevalue="--md-lowercase-snp" falsevalue="" checked="false" label="MD lowercase SNP" 
+        <param name="md_lowercase_snp" type="boolean" truevalue="--md-lowercase-snp" falsevalue="" checked="false" label="MD lowercase SNP"
                help="In MD string, when known SNPs are given by the -v flag, prints difference nucleotides as lower-case when they, differ from reference but match a known alternate allele"/>
       </when>
     </conditional> <!-- name="result" -->
@@ -393,7 +393,7 @@
     <param name="split_output" type="boolean" truevalue="--split-output=gmap_out" falsevalue="" checked="false" label="Separate outputs for nomapping, uniq, mult, and chimera" help="(chimera only when chimera-margin is selected)"/>
 
 
-    <!-- 
+    <!--
       map=iitfile      Map file.  If argument is '?' (with the quotes), this lists available map files.
       mapexons         Map each exon separately
       mapboth          Report hits from both strands of genome
@@ -401,7 +401,7 @@
       print-comment    Show comment line for each hit
     -->
 
-    <!-- 
+    <!--
   min-trimmed-coverage=FLOAT   Do not print alignments with trimmed coverage less
                                    this value (default=0.0, which means no filtering)
                                    Note that chimeric alignments will be output regardless
@@ -484,13 +484,13 @@
     </data>
   </outputs>
   <tests>
-  </tests> 
+  </tests>
 
   <help>
 
 **What it does**
 
-GMAP_ (Genomic Mapping and Alignment Program)  The functionality provided by gmap allows a user to: (1) map and align a single cDNA interactively against a large genome in about a second, without the startup time of several minutes typically needed by existing mapping programs; (2) switch arbitrarily among different genomes, without the need for a preloaded server dedicated to each genome; (3) run the program on computers with as little as 128 MB of RAM (random access memory); (4) perform high-throughput batch processing of cDNAs by using memory mapping and multithreading when appropriate memory and hardware are available; (5) generate accurate gene models, even in the presence of substantial polymorphisms and sequence errors; (6) locate splice sites accurately without the use of probabilistic splice site models, allowing generalized use of the program across species; (7) detect statistically significant microexons and incorporate them into the alignment; and (8) handle mapping and alignment tasks on genomes having alternate assemblies, linkage groups or strains.  It is developed by Thomas D. Wu of Genentech, Inc.  
+GMAP_ (Genomic Mapping and Alignment Program)  The functionality provided by gmap allows a user to: (1) map and align a single cDNA interactively against a large genome in about a second, without the startup time of several minutes typically needed by existing mapping programs; (2) switch arbitrarily among different genomes, without the need for a preloaded server dedicated to each genome; (3) run the program on computers with as little as 128 MB of RAM (random access memory); (4) perform high-throughput batch processing of cDNAs by using memory mapping and multithreading when appropriate memory and hardware are available; (5) generate accurate gene models, even in the presence of substantial polymorphisms and sequence errors; (6) locate splice sites accurately without the use of probabilistic splice site models, allowing generalized use of the program across species; (7) detect statistically significant microexons and incorporate them into the alignment; and (8) handle mapping and alignment tasks on genomes having alternate assemblies, linkage groups or strains.  It is developed by Thomas D. Wu of Genentech, Inc.
 
 Publication_ citation: Thomas D. Wu, Colin K. Watanabe  Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
 
@@ -506,7 +506,9 @@
 You will want to read the README_
 
 .. _README: http://research-pub.gene.com/gmap/src/README
-
   </help>
+  <citations>
+    <citation type="doi">10.1093/bioinformatics/bti310</citation>
+  </citations>
 </tool>