diff blat_wrapper.xml @ 0:3cec538aab33 draft

Uploaded
author joachim-jacob
date Thu, 30 May 2013 07:13:55 -0400
parents
children da4426cac227
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blat_wrapper.xml	Thu May 30 07:13:55 2013 -0400
@@ -0,0 +1,179 @@
+<tool id="blat" name="Align sequences with BLAT" version="1.0.0">
+    <!-- Additional info: wrapper compatible with versions ..... -->
+    <description>
+		to a reference genome
+    </description>
+    
+    <version_command/>
+    
+    <requirements>
+        <requirement type="package" version="1.0.0">blat</requirement>
+    </requirements>
+    
+    <command interpreter="perl">
+        ## it is recommended that you write a wrapper for your tool
+        ## and pass all parameters to that tool, which parses them.
+        blat_wrapper.pl $configfile
+
+    </command> 
+   
+    <inputs>
+        <param format="fasta" name="input" type="data" label="Query fasta sequences" />
+        <param name="q" type="select" label="Process the query as">
+			<option value="dna" selected="True">DNA</option>
+			<option value="rna">RNA</option>
+			<option value="prot">Protein</option>
+			<option value="dnax">DNA sequence translated in six frames to protein</option>
+			<option value="rnax">RNA sequence translated in six frames to protein</option>
+        </param>  
+        <conditional name="refGenomeSource">
+          <param name="genomeSource" type="select" label="Will you select a sequence database from your history or use a built-in index?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in index</option>
+            <option value="history" selected="True">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+              <options from_data_table="twobit.loc">
+                <filter type="sort_by" column="1"/>
+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+              </options>
+            <param name="range" type="text" size="25" value="" label="Restrict the search space to this genomic range in the sequence database" help="Format = 'chr1:250000-260000' (i.e. name:startposition-endposition)" />
+        
+            </param>
+          </when>
+          <when value="history">
+            <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the sequence database" />
+          </when>  <!-- history -->
+        </conditional>  <!-- refGenomeSource -->
+      
+        <param name="t" type="select" label="Process the sequence database as">
+			<option value="dna" selected="True">DNA</option>
+			<option value="prot">Protein</option>
+			<option value="dnax">DNA sequence translated in six frames to protein</option>
+        </param>
+        
+        <conditional name="advanced_params">
+			<param name="use" type="select" label="Show advanced parameters">
+				<option value="no" selected="True">No</option>
+				<option value="yes">Yes</option>
+			</param>
+			<when value="no" />
+			<when value="yes" >
+				<param name="tileSize" type="text" size="3" value="11" label="size of match that triggers an alignment" help="Usually between 8 and 12. Default is 11 for DNA and 5 for protein." />
+				<param name="stepSize" type="text" size="3" value="11" label="spacing between tiles" />
+				<param name="oneOff" type="boolean" truevalue="1" falsevalue="0" checked="False" label="Allowing one mismatch in tile?" />
+				<param name="minMatch" type="select" label="number of tile matches" >
+					<option value="1">1 (for proteins)</option>
+					<option value="2" selected="True">2</option>
+					<option value="3">3</option>
+					<option value="4">4</option>
+				</param>
+				<param name="minScore" type="text" size="3" value="30" label="minimum score" help="This is the matches minus the mismatches minus some sort of gap penalty. Default is 30" />
+				<param name="maxGap" type="select" label="size of maximum gap between tiles in a clump" help=" Usually set from 0 to 3.  Default is 2. Only relevent for minMatch > 1">
+					<option value="0">0</option>
+					<option value="1">1</option>
+					<option value="2" selected="True">2</option>
+					<option value="3">3</option>
+				</param>
+				<param name="mask" type="select" label="Mask out repeats in reference sequence" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored entirely in protein or translated searches.">
+					<option value="lower">mask out lower cased sequence</option>
+					<option value="upper">mask out upper cased sequence</option>
+				</param>
+				<param name="qMask" type="select" label="Mask out repeats in query sequence" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored entirely in protein or translated searches.">
+					<option value="lower">mask out lower cased sequence</option>
+					<option value="upper">mask out upper cased sequence</option>
+				</param>
+				<param name="repeats" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="Report matches in repeats separately?" />
+				<param name="trimT" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="Trim leading poly-T" />
+				<param name="noTrimA" type="boolean" truevalue="no" falsevalue="yes" checked="True" label="Trim trailing poly-A" />
+				<param name="fine" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="For high quality mRNAs look harder for small initial and terminal exons. Not recommended for ESTs" />
+				<param name="maxIntron" type="text" size="10" value="750000" label="Maximum intron size" />
+				<param name="extendThroughN" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="Extent alignment through large blocks of N's? Not recommended for ESTs" />
+			</when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="output" label="${tool.name} on ${on_string}: alignments"/>
+    </outputs>
+
+    <configfiles>
+		<!-- this config file collects all parameter settings -->
+		<configfile name="configfile">
+			## first we pass some galaxy environment variables
+			galtemp==${__new_file_path__}
+			
+			#if $refGenomeSource.genomeSource == "indexed"
+				referencepath==${refGenomeSource.index.fields.path}
+				range=$refGenomeSource.range
+			#else
+				referencepath==${refGenomeSource.ownFile}
+			#end if
+
+			input==$input
+			output==$output
+			q==$q
+			t==$t
+
+			advanced_params.use==$advanced_params.use
+			#if $advanced_params.use=="yes"
+				tileSize==$advanced_params.tileSize
+				stepSize==$advanced_params.stepSize
+				oneOff==$advanced_params.oneOff
+				minMatch==$advanced_params.minMatch
+				minScore==$advanced_params.minScore
+				maxGap==$advanced_params.maxGap
+				mask==$advanced_params.mask
+				qMask==$advanced_params.qMask
+				repeats==$advanced_params.repeats
+				trimT==$advanced_params.trimT
+				noTrimA==$advanced_params.noTrimA
+				fine==$advanced_params.fine
+				maxIntron==$advanced_params.maxIntron
+				extendThroughN==$advanced_params.extendThroughN
+			#end if
+
+		</configfile>
+	</configfiles> 
+
+	<tests>
+		<test>
+          <param name="input" value="input83.fas" />
+          <param name="refGenomeSource.genomeSource" value="history" />
+          <param name="refGenomeSource.ownFile" value="databasetest1.txt" />
+          <param name="q" value="dna" />
+          <param name="t" value="dna" />
+          <param name="advanced_params.use" value="yes" />
+          <param name="advanced_params.tileSize" value="11" />
+          <param name="advanced_params.stepSize" value="11" />
+          <param name="advanced_params.oneOff" value="0" />
+          <param name="advanced_params.minMatch" value="2" />
+          <param name="advanced_params.minScore" value="30" />
+          <param name="advanced_params.maxGap" value="2" />
+          <param name="advanced_params.mask" value="lower" />
+          <param name="advanced_params.qMask" value="lower" />
+          <param name="advanced_params.repeats" value="no" />
+          <param name="advanced_params.trimT" value="no" />
+          <param name="advanced_params.noTrimA" value="yes" />
+          <param name="advanced_params.fine" value="no" />
+          <param name="advanced_params.maxIntron" value="750000" />
+          <param name="advanced_params.extendThroughN" value="no" />
+          <output name="output" file="outputtest1.txt" lines_diff="2"/>     
+        </test>
+    </tests>
+
+    <help>
+**BLAT Overview**
+
+BLAT_ is an alignment tool like BLAST, but it is structured differently. 
+On DNA, Blat works by keeping an index of an entire genome in memory. 
+Thus, the target database of BLAT is not a set of GenBank sequences, but 
+instead an index derived from the assembly of the entire genome. By 
+default, the index consists of all non-overlapping 11-mers except for 
+those heavily involved in repeats.
+
+.. _BLAT: http://genome.ucsc.edu/FAQ/FAQblat.html
+
+    </help>
+</tool>
+