changeset 0:7da2c9654a83 draft default tip

Uploaded
author wolma
date Tue, 12 Aug 2014 11:26:15 -0400
parents
children
files annotate_variants.xml bamsort.xml convert.xml deletion_predictor.xml reheader.xml sam_header.xml sampleinfo.xml seqdict.xml snap_caller.xml snp_caller_caller.xml snpeff_genomes.xml vcf_filter.xml
diffstat 12 files changed, 1081 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/annotate_variants.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,165 @@
+<tool id="annotate_variants" name="Variant Annotation">
+  <description>Predict the effects of SNPs and indels on known genes in the reference genome using SnpEff</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+</requirements>
+  <command>
+  	mimodd annotate
+	
+	$inputfile
+	
+	#if $str($annotool.name)=='snpeff':
+	--genome ${annotool.genomeVersion}
+	    #if $annotool.ori_output:
+	--snpeff_out $snpeff_file
+	    #end if
+	    #if $annotool.stats:
+	--stats $summary_file
+	    #end if
+	${annotool.snpeff_settings.chr} ${annotool.snpeff_settings.no_us} ${annotool.snpeff_settings.no_ds} ${annotool.snpeff_settings.no_intron} ${annotool.snpeff_settings.no_intergenic} ${annotool.snpeff_settings.no_utr}
+	    #if $annotool.snpeff_settings.min_cov:
+	--minC ${annotool.snpeff_settings.min_cov}
+	    #end if
+	    #if $annotool.snpeff_settings.min_qual:
+	--minQ ${annotool.snpeff_settings.min_qual}
+	    #end if
+	    #if $annotool.snpeff_settings.ud:
+	--ud ${annotool.snpeff_settings.ud}
+	    #end if
+	#end if
+
+	--ofile $outputfile
+	#if $str($formatting.oformat) == "text":
+	--oformat text
+	#end if
+	#if $str($formatting.oformat) == "html":
+	  #if $formatting.formatter_file:
+	--link ${formatting.formatter_file}
+	  #end if
+	  #if $formatting.species
+	--species ${formatting.species}
+	  #end if
+	#end if
+	
+	#if $str($grouping):
+	--grouping $grouping
+	#end if
+	--verbose
+  </command>
+
+  <inputs>
+    <param name="inputfile" type="data" format="vcf" label="vcf inputfile to be annotated" />
+    <param name="grouping" type="select" label="Group variants by">
+      <option value="">order in the input file</option>
+      <option value="by_sample">sample</option>
+      <option value="by_genes">most affected genes</option>
+    </param>
+    <conditional name="formatting">
+      <param name="oformat" type="select" label="Format of the annotation output file">
+	<option value="html">HTML</option>
+	<option value="text">Tab-separated plain text</option>
+      </param>
+      <when value="html">
+	<param name="formatter_file" type="data" format="txt" optional="true" label="Optional file with hyperlink formatting instructions" />
+	<param name="species" type="text" label="Species" help="Overwrite the species guess from the SnpEff genome, often not necessary" />
+      </when>
+    </conditional>
+    <conditional name="annotool">
+      <param name="name" type="select" label="Use this tool to annotate the input file" help = "Select SnpEff here, if you want to have the vcf input annotated with genomic feature information. Select None if you do not want additional annotation, if you do not have SnpEff installed, or if you have no appropriate SnpEff annotation file for the input.">
+	<option value="snpeff">SnpEff</option>
+	<option value="None">None</option>
+      </param>   
+      <when value="snpeff">
+          <param name="genome_list" type="data" format="tabular" label="genome list" /> 
+          <param name="genomeVersion" type="select" label="Genome">
+      	    <options from_dataset="genome_list">
+              <column name="name" index="0"/>
+              <column name="value" index="1"/>
+            </options>
+          </param>
+          <param name="ori_output" type="boolean" checked="true" label="Keep the original SnpEff output" />
+          <param name="stats" type="boolean" checked="true" label="Produce a summary file of results" />
+          
+	  <conditional name="snpeff_settings">
+            <param name="detail_level" type="select" label="SnpEff-specific parameter settings" help="This section lets you specify the detailed parameter settings for the SnpEff tool.">
+              <option value="default">default settings</option>
+	      <option value="change">change settings</option>
+            </param>   
+            <when value="default">
+	      ## default settings for SnpEff
+	      <param name="chr" type="hidden" value=""/>
+    	      <param name="min_cov" type="hidden" value=""/>
+    	      <param name="min_qual" type="hidden" value=""/>
+	      <param name="no_ds" type="hidden" value=""/>
+    	      <param name="no_us" type="hidden" value=""/>
+	      <param name="no_intron" type="hidden" value=""/>  
+	      <param name="no_intergenic" type="hidden" value=""/>
+	      <param name="no_utr" type="hidden" value=""/>
+	      <param name="ud" type="hidden" value=""/>
+            </when>
+            <when value="change">
+              <param name="chr" type="boolean" truevalue="-chr" falsevalue="" checked="false" label="prepend 'chr' to chromosome names, e.g., 'chr7' instead of '7'" />
+              <param name="min_cov" type="integer" optional="true" label="minimum coverage (default = not used)" help="do not include variants with a coverage lower than this value"/>
+              <param name="min_qual" type="integer" optional="true" label="minimum quality (default = not used)" help="do not include variants with a quality lower than this value"/>
+              <param name="no_ds" type="boolean" label="do not show downstream changes" truevalue="--no_downstream" falsevalue="" checked="false" help="annotation of effects on the downstream region of genes can be suppressed"/>
+              <param name="no_us" type="boolean" label="do not show upstream changes" truevalue="--no_upstream" falsevalue="" checked="false" help="annotation of effects on the upstream region of genes can be suppressed"/>
+              <param name="no_intron" type="boolean" label="do not show intron changes" truevalue="--no_intron" falsevalue="" checked="false" help="annotation of effects on introns of genes can be suppressed"/>
+              <param name="no_intergenic" type="boolean" label="do not show intergenic changes" truevalue="--no_intergenic" falsevalue="" checked="false" help="annotation of effects on intergenic regions can be suppressed"/>    
+              <param name="no_utr" type="boolean" label="do not show UTR changes" truevalue="--no_utr" falsevalue="" checked="false" help="annotation of effects on the untranslated regions of genes can be suppressed"/>
+              <param name="ud" type="integer" optional="true" label="upstream downstream interval length (default = 5000 bases)" help="specify the upstream/downstream interval length, i.e., variants more than INTERVAL nts from the next annotated gene are considered to be intergenic"/>
+            </when>
+	  </conditional>
+      </when>
+    </conditional>    
+  </inputs>
+
+  <outputs>
+    <data name="outputfile" format="html" >
+      <change_format>
+	<when input="formatting.oformat" value="text" format="tabular"/>
+      </change_format>
+    </data>
+    <data name="snpeff_file" format="vcf" >
+      <filter>(annotool['name']=="snpeff" and annotool['ori_output'])</filter>
+    </data>
+    <data name="summary_file" format="html">
+      <filter>(annotool['name']=="snpeff" and annotool['stats'])</filter>
+    </data>
+  </outputs>
+
+  <help>
+.. class:: infomark
+
+   **What it does**
+
+The tool turns a variant list in VCF format into a more readable summary table listing variant sites and effects.
+
+If installed, the variant annotation tool SnpEff can be used transparently to determine the genomic features, e.g., genes or transcripts, affected by the variants.
+
+Use of this feature requires that you have an appropriate SnpEff genome file installed on the host machine. You can use the *List installed SnpEff genomes* tool to generate a list of all available SnpEff genomes.
+This list can then be used (by selecting the dataset as the *genome list*) to populate the *genome* dropdown menu, from which you can select the SnpEff genome file to be used for the annotation.
+
+As output file formats HTML or plain text are supported.
+In HTML mode, variant positions and/or affected genomic features can be turned into hyperlinks to corresponding views in web-based genome browsers.
+
+The behavior of this feature depends on:
+
+1) Recognition of the species that is analyzed
+
+   You can declare the species you are working with using the *Species* text field.
+   If you are not declaring the species explicitly, but are choosing SnpEff for effect annotation, the tool will usually be able to auto-detect the species from the SnpEff genome you are using.
+   If no species gets assigned in either way, no hyperlinks will be generated and the html output will look essentially like plain text.
+   
+2) Available hyperlink formatting rules for this species
+   
+   When the species has been recognized, the tool checks if you have selected an *optional file with hyperlink formatting instructions*.
+   If you did and that file contains an entry matching the recognized species, that entry will be used as a template to construct the hyperlinks.
+   If no matching entry is found in the file, an error will be raised.
+   
+   If you did not supply a hyperlink formatting instruction file, the tool will consult an internal lookup table to see if it finds default rules for the construction of the hyperlinks for the species.
+   If not, no hyperlinks will be generated and the html output will look essentially like plain text.
+   
+  </help>
+</tool>
+	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bamsort.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,40 @@
+<tool id="bamsort" name="Sort BAM file">
+  <description>Sort a BAM file by coordinates (or names) of the mapped reads</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+</requirements>
+  <command>
+	mimodd sort $inputfile -o $output --oformat $oformat $by_name
+  </command>
+  
+  <inputs>
+    <param name="inputfile" type="data" format="bam" label="Input file to sort" />
+    <param name="by_name" type="boolean" truevalue = "-n" falsevalue ="" label="Sort by read names instead of coordinates" checked = "false" help="A less common option, but necessary, e.g., if you want to re-align sorted output from a previous run of the Snap Align Tool." />
+    <param name="oformat" type="boolean" truevalue = "sam" falsevalue = "bam" label = "Output in uncompressed SAM format" checked = "false" />
+  </inputs>
+  
+  <outputs>
+    <data name="output" format="bam" label="Sorted output from MiModd ${tool.name} on ${on_string}">
+      <change_format>
+	 <when input="oformat" value="sam" format="sam" />
+      </change_format>
+    </data>
+  </outputs>
+
+<help>
+.. class:: infomark
+
+   **What it does**
+
+The tool sorts a BAM file of aligned reads, typically by the reference genome coordinates that the reads have been mapped to.
+
+Coordinate-sorted input files are expected by the downstream MiModD tools *Variant Calling and Coverage Analysis* and *Deletion prediction*.
+
+Note, however, that the *SNAP Read Alignment* produces coordinate-sorted output by default and it is only necessary to sort files that come from other sources or from *SNAP Read Alignment* jobs with a custom sort order.
+
+The option *Sort by read names instead of coordinates* is useful if you want to re-align coordinate-sorted paired-end data. In *paired-end mode*, the *SNAP Read Alignment* tool expects the reads in the input file to be arranged in read pairs, i.e., the forward read information of a pair must be followed immediately by its reverse mate information, which is typically not the case in coordinate-sorted files. Resorting such files by read names fixes this problem.
+
+</help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convert.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,124 @@
+<tool id="convert" name="Convert">
+  <description>between different sequence data formats</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+  </requirements>
+  <command>
+	mimodd convert 
+ 
+  	#for $i in $mode.input_list
+	    ${i.file1}
+	    #if $str($mode.iformat) in ("fastq_pe", "gz_pe"):
+		${i.file2}
+	    #end if
+	#end for
+	#if $str($header) != "None":  	
+		--header $header
+	#end if
+	--output $outputname
+	--iformat $(mode.iformat)
+	--oformat $(mode.oformat)
+  </command>
+  
+  <inputs>
+    <conditional name="mode">
+	<param name="iformat" type="select" label="choose the input file format" help="Your data will have to be in a single file if it comes from single-end sequencing, but will have to be arranged in two files if it comes from paired-end sequencing.">
+	    <option value="fastq">fastq: single-end (one file)</option>
+	    <option value="fastq_pe">fastq: paired-end (two files)</option>
+	    <option value="gz">gzip compressed fastq: single-end (one file)</option>
+	    <option value="gz_pe">gzip compressed fastq: paired-end (two files)</option>
+	    <option value="sam">sam</option>
+	    <option value="bam">bam</option>
+        </param>	
+            <when value="fastq">
+	      <param name="oformat" type="select" label="choose the output file format">
+	        <option value="sam">sam</option>
+	        <option value="bam">bam</option>
+	      </param>
+	    <repeat name="input_list" title="fastq input dataset" default="1" min="1">
+		<param name="file1" format="fastq" type="data" label="inputfile"/>
+	    </repeat>
+	    </when>
+	    <when value="fastq_pe">
+	      <param name="oformat" type="select" label="choose the output file format">
+	        <option value="sam">sam</option>
+	        <option value="bam">bam</option>
+	      </param>
+	      <repeat name="input_list" title="fastq input datasets" default="1" min="1">
+		<param format="fastq" name="file1" type="data" label="inputfile 1"/>		
+		<param format="fastq" name="file2" type="data" label="inputfile 2"/>
+	    </repeat>
+	    </when>
+            <when value="gz">
+	      <param name="oformat" type="select" label="choose the output file format">
+	        <option value="sam">sam</option>
+	        <option value="bam">bam</option>
+	      </param>
+	    <repeat name="input_list" title="fastq.gz input dataset" default="1" min="1">
+		<param name="file1" format="data" type="data" label="inputfile"/>
+	    </repeat>
+	    </when>
+	    <when value="gz_pe">
+	      <param name="oformat" type="select" label="choose the output file format">
+	        <option value="sam">sam</option>
+	        <option value="bam">bam</option>
+	      </param>
+	      <repeat name="input_list" title="fastq.gz input datasets" default="1" min="1">
+		<param format="data" name="file1" type="data" label="inputfile 1"/>		
+		<param format="data" name="file2" type="data" label="inputfile 2"/>
+	    </repeat>
+	    </when>
+	    <when value="sam">
+	      <param name="oformat" type="select" label="choose the output file format">
+	        <option value="bam">bam</option>
+	      </param>
+	      <repeat name="input_list" title="sam input dataset" default="1" min="1" max="1">
+		<param name="file1" format="sam" type="data" label="inputfile"/>
+	    </repeat>
+	    </when>
+	    <when value="bam">
+	      <param name="oformat" type="select" label="choose the output file format">
+	        <option value="sam">sam</option>
+	      </param>
+	      <repeat name="input_list" title="bam input dataset" default="1" min="1" max="1">
+		<param name="file1" format="bam" type="data" label="inputfile"/>
+	    </repeat>
+	    </when>
+    </conditional>
+    <param name="header" type="data" format="sam" optional="True" label="Use Header File" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file. Selection is highly recommended."/>
+  </inputs>
+  
+  <outputs>
+    <data name="outputname" format="bam" label="Converted reads from MiModd ${tool.name} on ${on_string}">
+	<change_format>
+	    <when input="mode.oformat" value="sam" format="sam" />
+	</change_format>
+    </data>
+  </outputs>
+
+<help>
+.. class:: infomark
+
+   **What it does**
+
+The tool converts between different file formats used for storing next-generation sequencing data.
+
+As input file types, it can handle uncompressed or gzipped fastq, SAM or BAM format, which it can convert to SAM or BAM format.
+
+**Notes:**
+
+1) In its standard configuration Galaxy will decompress any .gz files during their upload, effectively preventing the use of gzipped fastq files.
+
+2) The tool can convert fastq files representing data from paired-end sequencing runs to appropriate SAM/BAM format provided that the mate information is split over two fastq files in corresponding order.
+
+   **TIP:** If your paired-end data is arranged differently, you may look into the *FASTQ splitter* and *FASTQ de-interlacer* tools to see if they convert your files to the right format.
+
+3) Specifying a SAM header file to use in the conversion is highly recommended as this will add sequencing run metadata to the results file, which is the main purpose of storing unaligned NGS data in SAM/BAM format.
+
+   See the help on the *NGS Run Annotation* tool for information on how to generate a new header file.
+
+</help>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deletion_predictor.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,60 @@
+<tool id="deletion_predictor" name="Deletion Prediction for paired-end data">
+  <description>Predicts deletions in one or more aligned read samples based on coverage of the reference genome and on insert sizes</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+  </requirements>
+  <command>
+    mimodd delcall
+    #for $l in $list_input
+        ${l.bamfile}
+    #end for
+    $covfile -o $outputfile
+    --max_cov $max_cov --min_size $min_size $include_uncovered $group_by_id --verbose
+  </command>
+
+  <inputs>
+    <repeat name="list_input" title="Aligned reads input source" default="1" min="1">
+        <param name="bamfile" type="data" format="bam" label="input BAM file" />
+    </repeat>
+    <param name="covfile" type="data" format="tabular" label="input coverage file" help="A MiModD coverage file as generated by the Variant Calling and Coverage Analysis tool."/>
+    <param name="group_by_id" type="boolean" label="group reads based on read group id only" truevalue="-i" falsevalue="" checked="true" help="If selected, reads from different read groups will be treated strictly separate. If turned off, read groups with identical sample names are used together for identifying uncovered regions, but are still treated separately for the prediction of deletions." />
+    <param name="include_uncovered" type="boolean" label="include low-coverage regions" truevalue="-u" falsevalue="" checked="true" help="If selected, regions that fulfill the coverage criteria below, but are not statistically significant deletions, will be included in the output." />  
+    <param name="max_cov" type="integer" value="0" label="maximal coverage allowed inside a low-coverage region (default: 0)" help="The maximal coverage at a site allowed to consider it as part of a low-coverage region" />
+    <param name="min_size" type="integer" value="100" label="minimal deletion size (default: 100)" help="A low-coverage region must consist of at least this number of consecutive bases below the maximal coverage to consider it in further analyses."/>
+  </inputs>
+
+  <outputs>
+    <data name="outputfile" format="gff" />
+  </outputs>
+
+<help>
+.. class:: infomark
+
+   **What it does**
+
+The tool predicts deletions from paired-end data in a two-step process.
+
+First, it finds regions of low-coverage, i.e., candidate regions for deletions, by scanning a coverage file as produced by the *Variant Calling and Coverage Analysis* tool.
+The *maximal coverage allowed inside a low-coverage region* and the *minimal deletion size* parameters are used at this step to define what is considered a low-coverage region.
+
+Second, the tool assesses every low-coverage region statistically for evidence of it being a real deletion.
+This step requires paired-end data since it relies on shifts in the distribution of read pair insert sizes around real deletions.
+
+By default, the tool only reports Deletions, i.e., the fraction of low-coverage regions that pass the statistical test.
+If *include low-coverage regions* is selected, regions that failed the test will also be reported.
+
+With *group reads based on read group id only* selected, as it is by default, grouping of reads into samples is done strictly based on their read group IDs.
+With the option deselected, grouping is done based on sample names in the first step of the analysis, i.e. the reads of all samples with a shared sample name are used to identify low-coverage regions.
+In the second step, however, reads will be regrouped by their read group IDs again, i.e. the statistical assessment for real deletions is always done on a per read group basis.
+
+**TIP:**
+Deselecting *group reads based on read group id only* can be useful, for example, if you have both paired-end and single-end sequencing data for the same sample.
+
+In this case, the two sets of reads will usually share a common sample name, but differ in their read groups.
+With grouping based on sample names, the single-end data can be used together with the paired-end data to identify low-coverage regions, thus increasing overall coverage and reliability of this step.
+Still, the assessment of deletions will use only the paired-end data (auto-detecting that the single-end reads do not provide insert size information).
+
+</help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/reheader.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,33 @@
+<tool id="reheader" name="Reheader BAM file">
+  <description>From a BAM file generate a new file with the original header (if any) replaced by that found in a second SAM file</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+  </requirements>
+  <command>
+	mimodd reheader $template $input -o $output --verbose
+  </command>
+  
+  <inputs>
+    <param name="input" type="data" format="bam" label="Input file in BAM format" help="The file to reheader." />
+    <param name="template" type="data" format="sam" label="Header template file in SAM format" help="Use the header information of this file" />
+  </inputs>
+  
+  <outputs>
+    <data name="output" format="bam" label="(Re)headered bam file from MiModd ${tool.name} on ${on_string}">
+    </data>
+  </outputs>
+
+<help>
+.. class:: infomark
+
+   **What it does**
+
+The tool replaces the header of the input BAM file (i.e., its metadata) with that found in the template SAM file and writes the result to a new BAM file.
+
+Typically, you will generate the header template file with the *NGS Run Annotation* tool, but any SAM file with header information can be used instead.
+
+</help>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sam_header.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,130 @@
+<tool id="sam_header" name="NGS Run Annotation">
+  <description>Create a SAM format header from run metadata for sample annotation.</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+  </requirements>
+  <command>
+  	mimodd header
+
+	--rg_id "$rg_id"
+	--rg_sm "$rg_sm"
+	
+	#if $str($rg_cn):
+		--rg_cn "$rg_cn"
+	#end if
+	#if $str($rg_ds):
+		--rg_ds "$rg_ds"
+	#end if	
+	#if $str($anno) and $str($month) and $str($day):
+		--rg_dt "$anno-$month-$day"
+	#end if
+	#if $str($rg_lb):
+		--rg_lb "$rg_lb"
+	#end if
+	#if $str($rg_pl):
+		--rg_pl "$rg_pl"
+	#end if
+	#if $str($rg_ds):
+		--rg_pi "$rg_pi"
+	#end if
+	#if $str($rg_pu):
+		--rg_pu "$rg_pu"
+	#end if
+	
+	--outputfile $outputfile
+
+  </command>
+
+  <inputs>
+    <param name="rg_id" type="text" size="80" label="read-group ID (mandatory)">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable">
+                <remove value="&quot;" />
+            </valid>
+            <mapping initial="none">
+                <add source="&quot;" target="\&quot;"/>
+            </mapping>
+        </sanitizer>
+    </param>
+    <param name="rg_sm" type="text" size="80" label="sample name (mandatory)">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable">
+                <remove value="&quot;" />
+            </valid>
+            <mapping initial="none">
+                <add source="&quot;" target="\&quot;"/>
+            </mapping>
+        </sanitizer>
+    </param>
+    <param name="rg_cn" type="text" size="80" label="name of sequencing center">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable">
+                <remove value="&quot;" />
+            </valid>
+            <mapping initial="none">
+                <add source="&quot;" target="\&quot;"/>
+            </mapping>
+        </sanitizer>
+    </param>
+    <param name="rg_ds" type="text" size="80" label="description">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable">
+                <remove value="&quot;" />
+            </valid>
+            <mapping initial="none">
+                <add source="&quot;" target="\&quot;"/>
+            </mapping>
+        </sanitizer>
+    </param>
+    <param name="anno" type="text" label="year (YYYY) the run was produced" />
+    <param name="month" type="text" label="month (MM) the run was produced" />
+    <param name="day" type="text" label="day (DD) the run was produced" />
+    <param name="rg_lb" type="text" size="80" label="read-group library">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable">
+                <remove value="&quot;" />
+            </valid>
+            <mapping initial="none">
+                <add source="&quot;" target="\&quot;"/>
+            </mapping>
+        </sanitizer>
+    </param>
+    <param name="rg_pl" type="text" label="platform/technology used to produce the reads" />
+    <param name="rg_pi" type="text" label="predicted median insert size" />
+    <param name="rg_pu" type="text" size="80" label="platform unit; unique identifier">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable">
+                <remove value="&quot;" />
+            </valid>
+            <mapping initial="none">
+                <add source="&quot;" target="\&quot;"/>
+            </mapping>
+        </sanitizer>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="outputfile" format="sam" label="${rg_sm} (${rg_id}) header information from MiModd ${tool.name} on ${on_string}"/>
+  </outputs>
+
+<help>
+.. class:: infomark
+
+   **What it does**
+
+This tool takes the user-provided information about a next-generation sequencing run and constructs a valid header in the SAM file format from it.
+
+The result file can be used by the tools *Convert* and *Reheader* or in the *SNAP Read Alignment* step to add run metadata to sequenced reads files (or to overwrite pre-existing information).
+
+**Note:**
+
+**MiModD requires run metadata for every input file at the Alignment step !**
+
+**Tip:**
+
+While you can do Alignments from fastq file format by providing a custom header file directly to the *SNAP Read Alignment* tool, the **recommended approach** is to first convert all input files to and archive all datasets in SAM/BAM format with appropriate header information prior to any downstream analysis. Although a bit more time-consuming this practice protects against information loss and ensures that the input datasets will remain useful for others in the future.
+
+</help>
+</tool>
+	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sampleinfo.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,29 @@
+<tool id="sampleinfo" name="Retrieve Sample Information">
+  <description>for supported data formats.</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+  </requirements>
+  <command>
+    mimodd info $ifile -o $outputfile --verbose
+  </command>
+
+  <inputs>
+    <param name="ifile" type="data" format="bam,sam,vcf,tabular" label="input file" />
+  </inputs>
+
+  <outputs>
+    <data name="outputfile" format="txt" label="Sample Info on ${on_string}"/>
+  </outputs>
+
+<help>
+.. class:: infomark
+
+   **What it does**
+
+The tool inspects the input file and writes a report about the samples (and read groups) encoded in it.
+
+It works with all file formats used and supported by MiModD that contain sample metadata, i.e. headered SAM/BAM files, vcf files with sample information and the cov files produced during Coverage Analysis.
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seqdict.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,30 @@
+<tool id="CloudMap Config" name="Generate a species configuration file">
+  <description>for use with CloudMap.</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+  </requirements>
+  <command>
+    mimodd cm_seqdict $ifile -o $outputfile
+  </command>
+
+  <inputs>
+    <param name="ifile" type="data" format="vcf" label="vcf input file" />
+  </inputs>
+
+  <outputs>
+    <data name="outputfile" format="tabular" label="Sequence Dictionary for CloudMap from ${on_string}"/>
+  </outputs>
+
+  <help>
+.. class:: infomark
+
+   **What it does**
+
+The purpose of this tool is solely to provide compatibility with the external **CloudMap** *Variant Discovery Mapping* and *Hawaiian Variant Mapping* tools.
+
+From a VCF file, the tool extracts the chromosome names and sizes and reports them in the **CloudMap** *species configuration file* format.
+Such a file is required as input to the **CloudMap** mapping tools, if you are working with a species other than the natively supported ones (i.e., other than C.elegans or A. thaliana for the current version of CloudMap).
+   
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snap_caller.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,221 @@
+<tool id="snap_caller" name="SNAP Read Alignment">
+  <description>Map sequence reads to a reference genome using SNAP</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+  </requirements>
+  <command> 
+	mimodd snap_batch -s
+	## SNAP calls (considering different cases)
+
+	#for $i in $datasets
+		"snap ${i.mode_choose.mode} $ref_genome
+		#if $str($i.mode_choose.mode) == "paired" and $str($i.mode_choose.input.iformat) in ("fastq", "gz"):
+${i.mode_choose.input.ifile1} ${i.mode_choose.input.ifile2}
+		#else:
+${i.mode_choose.input.ifile}
+		#end if
+--outputfile $outputfile --iformat ${i.mode_choose.input.iformat} --oformat $oformat
+--idx_seedsize $set.seedsize
+--idx_slack $set.slack --maxseeds $set.maxseeds --maxhits $set.maxhits --clipping=$set.clipping --maxdist $set.maxdist --confdiff $set.confdiff
+		#if $i.mode_choose.input.header:
+--header ${i.mode_choose.input.header}
+		#end if
+		#if $str($i.mode_choose.mode) == "paired":
+--spacing $set.sp_min $set.sp_max
+		#end if
+		#if $str($set.selectivity) != "off":
+--selectivity $set.selectivity
+		#end if
+		#if $str($set.filter_output) != "off":
+--filter_output $set.filter_output
+		#end if
+		#if $str($set.sort) != "off":
+--sort $set.sort
+		#end if
+		#if $str($set.mmatch_notation) == "general":
+-M
+		#end if
+--max_mate_overlap $set.max_mate_overlap
+--verbose
+"							
+	#end for
+  </command>
+
+  <inputs>
+    ## mandatory arguments (and mode-conditionals)
+
+    <param name="ref_genome" type="data" format="fasta" label="reference genome" help="The fasta reference genome that SNAP should align reads against; a SNAP index will be built by the tool automatically."/>
+    
+    <repeat name="datasets" title="datasets" default="1" min="1">    
+        <conditional name="mode_choose">
+            <param name="mode" type="select" label="choose mode" help="Reads obtained from single-end sequencing runs should be aligned in 'single' mode, paired-end reads in 'paired' mode. **WARNING**: if the read input file is in SAM/BAM format, the current version of this tool will **not** verify the mode and may produce erroneous alignments with wrong settings!">
+	        <option value="single">single-end</option>
+	        <option value="paired">paired-end</option>
+            </param>
+      
+        <when value="single">
+	    <conditional name="input">
+                <param name="iformat" type="select" label="input file format">
+                    <option value="bam">BAM</option>
+                    <option value="sam">SAM</option>
+                    <option value="gz">gz</option>
+		    <option value="fastq">fastq</option>
+	        </param>
+	        <when value="bam">
+		    <param name="ifile" type="data" format="bam" label="input file"/>
+                    <param name="header" type="data" optional="true" format="sam" label="custom header file" />
+	        </when>
+	        <when value="sam">
+		    <param name="ifile" type="data" format="sam" label="input file"/>
+                    <param name="header" type="data" optional="true" format="sam" label="custom header file" />
+	        </when>            
+	        <when value="gz">
+		    <param name="ifile" type="data" label="input file"/>
+		    <param name="header" type="data" format="sam" label="header file" />
+		</when>
+	        <when value="fastq">
+		    <param name="ifile" type="data" format="fastq" label="input file"/>
+		    <param name="header" type="data" format="sam" label="header file" />
+		</when>
+            </conditional>
+        </when>
+        <when value="paired">	
+	    <conditional name="input">
+                <param name="iformat" type="select" label="input file format">
+                    <option value="bam">BAM</option>
+                    <option value="sam">SAM</option>
+		    <option value="gz">gz</option>
+                    <option value="fastq">fastq</option>
+	        </param>
+                <when value="bam">
+		    <param name="ifile" type="data" format="bam" label="input file"/>
+                    <param name="header" type="data" optional="true" format="sam" label="custom header file" />
+		</when>
+                <when value="sam">
+		    <param name="ifile" type="data" format="sam" label="input file"/>
+		    <param name="header" type="data" optional="true" format="sam" label="custom header file" />
+                </when>
+ 	        <when value="fastq">
+		    <param name="ifile1" type="data" format="fastq" label="input file 1"/>
+	            <param name="ifile2" type="data" format="fastq" label="input file 2"/>
+		    <param name="header" type="data" format="sam" label="header file" />
+		</when>
+	        <when value="gz">
+		    <param name="ifile1" type="data" label="input file 1"/>
+	            <param name="ifile2" type="data" label="input file 2"/>
+		    <param name="header" type="data" format="sam" label="header file" />
+		</when>
+            </conditional>
+	</when>
+        </conditional>
+    </repeat>
+
+    <param name="oformat" type="select" label="output file format">
+        <option value="bam">BAM</option>
+        <option value="sam">SAM</option>
+    </param>	
+    
+    ## optional arguments
+
+    <conditional name="set">
+        <param name="settings_mode" type="select" label="further parameter settings" help="This section lets you specify the detailed parameter settings for the SNAP aligner. Only change them if you know what you are doing, i.e., read the SNAP manual first.">
+            <option value="default">default settings</option>
+	    <option value="change">change settings</option>
+        </param>
+
+      ## default settings   
+  
+        <when value="default">
+	    <param name="seedsize" type="hidden" value="20"/>
+    	    <param name="slack" type="hidden" value="0.3"/>
+    	    <param name="sp_min" type="hidden" value="100"/>
+	    <param name="sp_max" type="hidden" value="10000"/>
+    	    <param name="maxdist" type="hidden" value="8"/>
+	    <param name="confdiff" type="hidden" value="2"/>  
+	    
+	    <param name="maxseeds" type="hidden" value="25"/>
+	    <param name="maxhits" type="hidden" value="250"/>
+	    <param name="clipping" type="hidden" value="++"/>
+
+	    <param name="selectivity" type="hidden" value="off"/>
+	    <param name="filter_output" type="hidden" value="off"/>
+	    <param name="sort" type="hidden" value="0"/>
+	    <param name="mmatch_notation" type="hidden" value="general"/>
+	    <param name="max_mate_overlap" type="hidden" value="0" />
+        </when>
+      
+      ## change settings
+
+        <when value="change">
+	    <param name="seedsize" type="integer" value="20" label="seed size (default: 20)" help="Length of the seeds used in the reference genome hash table (SNAP index option -s)."/>
+    	    <param name="slack" type="float" value="0.3" label="hash table slack size (default: 0.3)" help="Corresponds to the -h option of SNAP index."/>	
+
+      ## paired-end specific options
+    	    <param name="sp_min" type="integer" value="100" label="minimum spacing to allow between paired ends (default: 100)" help="Corresponds to the first value of the SNAP option -s."/>
+	    <param name="sp_max" type="integer" value="10000" label="maximum spacing to allow between paired ends (default: 10000)" help="Corresponds to the second value of the SNAP option -s."/>
+	    <param name="max_mate_overlap" type="float" value="0" label="Maximal overlap between the reads in a pair (as a fraction of their combined length; default: 0, no overlap allowed)" help="If the reads of a read pair overlap by more than this fraction of their combined length, they are filtered out" />
+
+    	    <param name="maxdist" type="integer" value="8" label="edit distance (default: 8)" help="maximum edit distance allowed per read or pair (SNAP option -d); higher values allow more divergent alignments to be found, but increase the rate of misalignments."/>
+	    <param name="confdiff" type="integer" value="2" label="confidence threshold (default: 2)" help="Confidence threshold (SNAP option -c); the minimum edit distance difference between two alternate alignments required to reject the poorer alignment as suboptimal; higher values increase the rate of ambiguously aligned reads."/>  
+    	    <param name="maxseeds" type="integer" value="25" label="maximum seeds per read (default: 25)" help="Number of seeds to use per read (SNAP option -n) when trying to match it to the reference genome; higher numbers will increase the rate of aligned reads and reduce the rate of misalignments, but will reduce performance."/>
+	    <param name="maxhits" type="integer" value="250" label="maximum hits per seed (default: 250)" help="Maximum hits to consider per seed (SNAP option -h); don't use a seed region in the alignment process if it matches more than maxhits regions in the reference genome. Higher values reduce the rate of misalignments, but reduce performance."/>
+	    <param name="clipping" type="select" label="read clipping (default: from back and front)" help="Specifies from which end of a read low-quality bases should be clipped (SNAP option -Cxx)">
+	        <option value="++">from back and front</option>
+	        <option value="-+">from back only</option>
+	        <option value="+-">from front only</option>
+	        <option value="--">no clipping</option>
+	    </param>
+	    <param name="selectivity" type="integer" value="1" label="selectivity (default: 1)" help="randomly choose 1/selectivity of the reads to score (SNAP option -S). The tool uses the default of 1 (or a 0 setting) to indicate that all reads should be worked with." />
+	    <param name="filter_output" type="select" label="filter output (default: no filtering)" help="filter output (SNAP option -F for certain classes of reads.">
+	        <option value="off">no filtering</option>
+	        <option value="a">aligned only</option>
+	        <option value="s">single-aligned only</option>
+	        <option value="u">unaligned only</option>
+	    </param>
+	    <param name="sort" type="select" label="output sorting (default: sort by read coordinates)" help="Sort the output file by alignment location (SNAP option --so).">
+	        <option value="0">sort by read coordinates</option>	  
+	        <option value="off">no sorting</option>
+	    </param>
+	    <param name="mmatch_notation" type="select" label="CIGAR symbols for alignment matches/mismatches (default: M notation)" help="Indicates whether CIGAR strings in the generated SAM/BAM file should use M (alignment match) rather than = and X (sequence (mis-)match). Warning: Downstream variant calling based on samtools currently relies on the old-style M notation!!" >
+	        <option value="general">use M for both matches and mismatches</option>
+	        <option value="differentiate">use = for matches, X for mismatches</option>
+	    </param>
+        </when>
+    </conditional>
+</inputs>
+
+<outputs>
+    <data name="outputfile" format="bam" label="Aligned reads from MiModd ${tool.name} on ${on_string}">
+        <change_format>
+	    <when input="oformat" value="sam" format="sam"/>
+	</change_format>
+    </data>
+</outputs>
+
+<help>
+.. class:: infomark
+
+   **What it does**
+
+The tool aligns the sequenced reads in an arbitrary number of input files against a common reference genome and stores the results in a single, possibly multi-sample output file.
+
+It does so by using the ultrafast, hashtable-based aligner SNAP, but unless you want to change aligner-specific options you do not have to know anything about this implementation detail.
+
+**Notes:**
+
+1) The tool requires that each input file contains adequate header information (i.e. metadata about the read groups and samples it encodes). The *custom header file* is offered as an **optional choice** for input files that **may** contain such header information, but you **must** specify it if your specific file does not provide the information. You **can** also provide a header file for an input file with header information, in which case the custom header will overwrite the existing header of the input file.
+
+2) Currently, you cannot configure aligner-specific options separately for specific input files from within this Galaxy tool. If you need this advanced level of control, you should use the command line tool ``mimodd snap_batch``.
+
+</help>
+</tool>
+    
+
+  
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snp_caller_caller.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,99 @@
+<tool id="snp_caller_caller" name="Variant Calling and Coverage Analysis">
+  <description>Predict SNPs and indels in one or more aligned read samples and calculate the coverage of every base in the reference genome using samtools/bcftools</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+  </requirements>
+  <command> 
+	mimodd varcall
+
+	$ref_genome
+	#for $l in $list_input
+            ${l.inputfile}
+        #end for
+	--output_vcf=$output_vcf
+        #if $cov:
+	--output_cov=$output_cov
+	#end if
+	#if $cstats:
+	--cstats=$output_stats
+	#end if
+        --depth=$depth
+	#if $sites.use_sites:
+        --sites=$sites.sitelist
+        --output_sites=$output_sites
+	#end if
+	$group_by_id
+        --verbose
+        --quiet
+  </command>
+
+  <inputs>
+    <param name="ref_genome" type="data" format="fasta" label="reference genome" />
+    <repeat name="list_input" title="Aligned reads input source" default="1" min="1">
+      <param name="inputfile" type="data" format="bam" label="input file" />
+    </repeat>
+    <param name="group_by_id" type="boolean" label="group reads based on read group id only" truevalue="-i" falsevalue="" checked="true" help="If selected, this option ensures that only the read group id (but not the sample name) is considered in grouping reads in the input file(s). If turned off, read groups with identical sample names are automatically pooled and analyzed together even if they come from different NGS runs." />
+
+    <param name = "cov" type = "boolean" checked = "true" label = "produce coverage file" />
+    <param name = "cstats" type = "boolean" checked = "true" label = "generate coverage statistics" />
+
+      <conditional name="sites">
+      <param name="use_sites" type="boolean" checked = "false" label="report on sites specified by positions" help = "Select if you want the optional position-specified sites vcf file to be generated (see the tool help below)." />
+
+      ## default settings   
+      <when value="false">
+	<param name="sites" type="hidden" value="None"/>
+      </when>
+      
+      ## change settings
+      <when value="true">
+	<param name="sitelist" type="data" format="vcf" label="list of genomic positions indicating sites to report on independent of genotypes" help="Select a vcf file that lists the genomic positions you wish to have reported on in the position-specified sites file."/>
+      </when>
+
+    </conditional>
+    <param name="depth" type="integer" value="250" label="maximum per-BAM depth (default: 250)" help="to avoid excessive use of memory"/>
+  </inputs>
+
+  <outputs>
+    <data name="output_vcf" format="vcf" label="Variant Calls from MiModd samtools Variant Calling on ${on_string}"/>
+    <data name="output_cov" format="tabular" label="MiModD Coverage Analysis on ${on_string}">
+        <filter>cov</filter>
+    </data>
+    <data name="output_stats" format="tabular" label="MiModD Coverage Statistics ${on_string}">
+        <filter>cstats</filter>
+    </data>
+
+    <data name="output_sites" format="vcf" label="Fixed-sites Nucleotide Calls from MiModd samtools Variant Calling on ${on_string}">
+        <filter>sites['use_sites']</filter>
+    </data>
+  </outputs>
+
+<help>
+.. class:: infomark
+
+   **What it does**
+
+The tool calls variants (SNPs and indels) with respect to the reference genome from the aligned reads in the input files. 
+
+It produces up to three output files:
+
+1) The *variant sites file* is in vcf format and includes one line for every genomic position at which a variant is found.
+
+   When the input files hold aligned reads from more than one sample, a variant detected in one sample is enough for inclusion.
+   The sample-specific information in the last columns of the output file will provide detailed information about the genotype likelihoods for each sample.
+
+2) The optional *coverage file* reports the depth of coverage for each sample per base across the entire reference genome.
+
+   This file is required by the *Deletion Prediction* tool.
+
+3) The optional *position-specified sites file* is in vcf format again. If *report on sites specified by positions* was selected, it will have one line per user-defined genomic position independent of whether that position is included also in the variant sites vcf file or not.
+
+   **TIP:** This file is what you will need for the **Cloudmap** *Hawaiian Variant Mapping* tool.
+
+**Note:**
+
+The tool uses samtools mpileup and bcftools for variant calling, but exposes just a single configuration parameter of these tools - the *maximum per-BAM depth*. Through this parameter, the maximum number of reads considered for variant calling at any site can be controlled. Its default value of 250 is taken from *samtools mpileup* and usually suitable. Consider, however, that this gives the maximum read number per input file, so if you have a large number of samples in one input file, it could become necessary to increase the value to get sufficient reads considered per sample.
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpeff_genomes.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,23 @@
+<tool id="snpeff_genomes" name="List Installed SnpEff Genomes">
+  <description>Checks the local SnpEff installation to compile a list of currently installed genomes</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+  </requirements>
+  <command>
+  	mimodd snpeff_genomes -o $outputfile
+  </command>
+  <outputs>
+    <data name="outputfile" format="tabular" />
+  </outputs>
+<help>
+.. class:: infomark
+
+**What it does**
+
+When executed this tool searches the host machine's SnpEff installation for properly registered and installed
+genome annotation files. The resulting list is added as a plain text file to your history for use with the *Variant Annotation* Tool.
+
+</help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcf_filter.xml	Tue Aug 12 11:26:15 2014 -0400
@@ -0,0 +1,127 @@
+<tool id="vcf_filter" name="VCF Filter">
+  <description>extracts lines from a vcf variant file based on field-specific filters</description>
+  <requirements>
+    <requirement type="package" version="3.4.1">python3</requirement>
+    <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
+  </requirements>
+  <command> 
+	mimodd vcf_filter
+	$inputfile
+	-o $outputfile
+	#if len($datasets):
+	-s
+	#for $i in $datasets
+		$i.sample
+	#end for
+	--gt
+	#for $i in $datasets
+	    ## remove whitespace from free-text input
+	    #echo ("".join($i.GT.split()) or "ANY")
+	    #echo " "
+	#end for
+	--dp
+	#for $i in $datasets
+	    $i.DP
+	#end for
+	--gq
+	#for $i in $datasets
+	    $i.GQ
+	#end for
+	#end if
+	#if len($regions):
+	-r
+	    #for $i in $regions
+	        #if $i.stop:
+	$i.chrom:$i.start-$i.stop
+	        #else:
+	$i.chrom:$i.start
+	        #end if
+	    #end for
+	#end if
+	#if $vfilter:
+	--v_filter
+	## remove ',' (and possibly adjacent whitespace) and replace with ' '
+	#echo (" ".join("".join($vfilter.split()).split(',')))
+	#end if
+	$vartype
+  </command>
+  
+  <inputs>
+    <param name="inputfile" type="data" format="vcf" label="VCF input file" />
+    <repeat name="datasets" title="Sample-specific Filter" default="0" min="0">
+        <param name="sample" type="text" label="sample" help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to." />
+	<param name="GT" type="text" label="genotype pattern(s) for the inclusion of variants" help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list." />
+	<param name="DP" type="integer" label="depth of coverage for the sample at the variant site" value = "0" help="keep only variants with at least this sample-specific coverage at the variant site" />
+	<param name="GQ" type="integer" label="genotype quality for the variant in the sample" value = "0" help="keep only variants for which the genotype prediction for the sample has at least this quality" />
+    </repeat>
+    <repeat name="regions" title="Region Filter" default="0" min="0" help = "Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported.">
+      <param name="chrom" type="text" label="Chromosome" />
+      <param name="start" type="text" label="Region Start" />
+      <param name="stop" type="text" label="Region End" />
+    </repeat>
+    <param name="vartype" type="select" label="Select the types of variants to include in the output">
+      <option value="">all types of variants</option>
+      <option value="--no_indels">exclude indels</option>
+      <option value="--indels_only">only indels</option>
+    </param>
+    <param name="vfilter" type="text" label="sample" help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output." />
+  </inputs>
+  
+  <outputs>
+    <data name="outputfile" format="vcf" />
+  </outputs>
+
+  <help>
+.. class:: infomark
+
+   **What it does**
+
+The tool filters a variant file in VCF format to generate a new VCF file with only a subset of the original variants.
+
+The following types of variant filters can be set up:
+
+1) Sample-specific filters:
+   
+   Filter variants based on their characteristics in the sequenced reads of a specific sample. Multiple sample-specific filters are combined by logical AND, i.e., only variants that pass ALL sample-specific filters are kept.
+   
+2) Region filters:
+   
+   Filter variants based on the genomic region they affect. Multiple region filters are combined by logical OR, i.e., variants passing ANY region filter are kept.
+   
+3) Variant type filter:
+
+   Filter variants by their type, i.e. whether they are single nucleotide variations (SNVs) or indels
+   
+In addition, the *sample* filter can be used to reduce the samples encoded in a multi-sample VCF file to just those specified by the filter.
+The *sample* filter is included for compatibility reasons: if an external tool cannot deal with the multisample file format, but instead looks only at the first sample-specific column of the file, you can use the filter to turn the multi-sample file into a single-sample file.
+
+**Examples of sample-specific filters:**
+
+*Simple genotype pattern*
+
+genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant
+
+*Complex genotype pattern*
+
+genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype
+
+*Multiple sample-specific filters*
+
+Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1:
+==> keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant
+
+*Combining sample-specific filter criteria*
+
+genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9
+==> keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9
+**and** at least three reads from the sample cover the variant site
+
+**TIP:**
+
+As in the example above, genotype quality is typically most useful in combination with a genotype pattern.
+It acts then, effectively, to make the genotype filter more stringent.
+
+
+
+  </help>
+</tool>