comparison blat.xml @ 35:87644259e668 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ucsc_blat/ commit f38778f3a25020809c3f6cf17aafb8dbfc54b2e8
author iuc
date Sat, 28 Sep 2024 16:29:10 +0000
parents 53cf9d25ef39
children e564213a92b8
comparison
equal deleted inserted replaced
34:53cf9d25ef39 35:87644259e668
1 <tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> 1 <tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>BLAST-like sequence alignment tool</description> 2 <description>BLAST-like sequence alignment tool</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">377</token> 4 <token name="@TOOL_VERSION@">469</token>
5 <token name="@VERSION_SUFFIX@">0</token> 5 <token name="@VERSION_SUFFIX@">0</token>
6
7 <xml name="mask_cond" tokens="maskarg,label,help">
8 <conditional name="@MASKARG@_type">
9 <param argument="-@MASKARG@" type="select" label="@LABEL@" help="@HELP@">
10 <option value="" selected="true">No masking</option>
11 <option value="lower">lower - mask out lower-cased sequence</option>
12 <option value="upper">upper - mask out upper-cased sequence</option>
13 <option value="file.out">out - mask database according to RepeatMasker out</option>
14 </param>
15 <when value="" />
16 <when value="lower" />
17 <when value="upper" />
18 <when value="file.out">
19 <param name="@MASKARG@_file" type="data" format="txt" label="RepeatMasker file.out" />
20 </when>
21 </conditional>
22 </xml>
6 </macros> 23 </macros>
7 <xrefs> 24 <xrefs>
8 <xref type="bio.tools">blat</xref> 25 <xref type="bio.tools">blat</xref>
9 </xrefs> 26 </xrefs>
10 <requirements> 27 <requirements>
11 <requirement type="package" version="@TOOL_VERSION@">ucsc-blat</requirement> 28 <requirement type="package" version="@TOOL_VERSION@">ucsc-blat</requirement>
12 </requirements> 29 </requirements>
13 <command detect_errors="exit_code"><![CDATA[ 30 <command detect_errors="exit_code"><![CDATA[
14 #if str($reference_source.reference_source_selector) == "history": 31 #if str($reference_source.reference_source_selector) == "history":
15 ## blat depends on file extension 32 ## blat depends on file extension
16 #if $reference_source.database.is_of_type("fasta"): 33 #if $reference_source.database.is_of_type("fasta.gz"):
34 #set $reference_fasta_filename = "localref.fa.gz"
35 #elif $reference_source.database.is_of_type("fasta"):
17 #set $reference_fasta_filename = "localref.fa" 36 #set $reference_fasta_filename = "localref.fa"
18 #elif $reference_source.database.is_of_type("twobit"): 37 #elif $reference_source.database.is_of_type("twobit"):
19 #set $reference_fasta_filename = "localref.2bit" 38 #set $reference_fasta_filename = "localref.2bit"
20 #else 39 #else
21 #set $reference_fasta_filename = "localref" 40 #set $reference_fasta_filename = "localref"
23 ln -s '$reference_source.database' '$reference_fasta_filename' && 42 ln -s '$reference_source.database' '$reference_fasta_filename' &&
24 #else: 43 #else:
25 #set $reference_fasta_filename = str($reference_source.database.fields.path) 44 #set $reference_fasta_filename = str($reference_source.database.fields.path)
26 #end if 45 #end if
27 46
47 ## blat depends on file extension
48 #if $query.is_of_type("fasta.gz"):
49 #set $query_filename = "query.fa.gz"
50 #elif $query.is_of_type("fasta"):
51 #set $query_filename = "query.fa"
52 #elif $query.is_of_type("twobit"):
53 #set $query_filename = "query.2bit"
54 #else
55 #set $query_filename = "query"
56 #end if
57 ln -s '$query' '$query_filename' &&
58
28 blat 59 blat
29 -q=$query_type 60 -q=$query_type
30 -t=$database_type 61 -t=$database_type
31 $oneOff 62 ## Basic alignment parameters
32 #if str($minScore) 63 #if str($basic_align.minScore)
33 -minScore=$minScore 64 -minScore=$basic_align.minScore
34 #end if 65 #end if
35 -maxGap=$maxGap 66 #if str($basic_align.minIdentity)
36 #if str($repMatch) 67 -minIdentity=$basic_align.minIdentity
37 -repMatch=$repMatch 68 #end if
38 #end if 69 $basic_align.trimT
39 #if $mask_type.mask == "file.out": 70 $basic_align.noTrimA
40 -mask='$mask_type.mask_file' 71 $basic_align.trimHardA
41 #else: 72 $basic_align.fastMap
42 -mask=$mask_type.mask 73 $basic_align.fine
43 #end if 74 #if str($basic_align.maxIntron)
75 -maxIntron=$basic_align.maxIntron
76 #end if
77 $basic_align.extendThroughN
78 ## Advanced alignment parameters
79 #if str($adv_align.tileSize)
80 -tileSize=$adv_align.tileSize
81 #end if
82 #if str($adv_align.stepSize)
83 -stepSize=$adv_align.stepSize
84 #end if
85 $adv_align.oneOff
86 #if str($adv_align.minMatch)
87 -minMatch=$adv_align.minMatch
88 #end if
89 -maxGap=$adv_align.maxGap
90 #if str($adv_align.repMatch)
91 -repMatch=$adv_align.repMatch
92 #end if
93 ## Repeat masking parameters
94 #if $repeat.mask_type.mask == "file.out":
95 -mask='$repeat.mask_type.mask_file'
96 #elif $repeat.mask_type.mask:
97 -mask=$repeat.mask_type.mask
98 #end if
99 #if $repeat.qMask_type.qMask == "file.out":
100 -qMask='$repeat.qMask_type.qMask_file'
101 #elif $repeat.qMask_type.qMask:
102 -qmask=$repeat.qMask_type.qMask
103 #end if
104 #if $repeat.repeats_type.repeats == "file.out":
105 -repeats='$repeat.repeats_type.repeats_file'
106 #elif $repeat.repeats_type.repeats:
107 -repeats=$repeat.repeats_type.repeats
108 #end if
109 #if str($repeat.minRepDivergence)
110 -minRepDivergence=$repeat.minRepDivergence
111 #end if
112
44 #if str($dots) 113 #if str($dots)
45 -dots=$dots 114 -dots=$dots
46 #end if 115 #end if
47 $trimT
48 $noTrimA
49 $trimHardA
50 $fastMap
51 $fine
52 #if str($maxIntron)
53 -maxIntron=$maxIntron
54 #end if
55 $extendThroughN
56 '$reference_fasta_filename' 116 '$reference_fasta_filename'
57 '$query' 117 '$query_filename'
58 -out=$out 118 -out=$out
59 '$output' 119 '$output'
60 ]]></command> 120 ]]></command>
61 <inputs> 121 <inputs>
62 <conditional name="reference_source"> 122 <conditional name="reference_source">
65 <option value="history">History</option> 125 <option value="history">History</option>
66 </param> 126 </param>
67 <when value="cached"> 127 <when value="cached">
68 <param name="database" type="select" label="Select database"> 128 <param name="database" type="select" label="Select database">
69 <options from_data_table="all_fasta"> 129 <options from_data_table="all_fasta">
130 <!-- <column name="name" index="0"/>
131 <column name="value" index="2"/> -->
70 <filter type="sort_by" column="2" /> 132 <filter type="sort_by" column="2" />
71 </options> 133 </options>
72 <validator type="no_options" message="A built-in database is not available" /> 134 <validator type="no_options" message="A built-in database is not available" />
73 </param> 135 </param>
74 </when> 136 </when>
75 <when value="history"> 137 <when value="history">
76 <param name="database" type="data" format="fasta, twobit" label="Using database file, either a .fa, .nib or .2bit file" /> 138 <param name="database" type="data" format="fasta,fasta.gz,twobit" label="Using database file, either a fasta, fasta.gz or twobit dataset" />
77 </when> 139 </when>
78 </conditional> 140 </conditional>
79 <param name="query" type="data" format="fasta, twobit" label="Query data, either a .fa, .nib or .2bit file"/> 141 <param name="query" type="data" format="fasta,fasta.gz,twobit" label="Query data, either a fasta, fasta.gz or twobit dataset"/>
80 <param argument="-t" name="database_type" type="select" format="txt" multiple="false" label="database type" help="Choose your database type, the default is dnax"> 142 <param argument="-t" name="database_type" type="select" format="txt" multiple="false" label="database type" help="Choose your database type, the default is dnax">
81 <option value="dna">dna - DNA sequence</option> 143 <option value="dna" selected="true">dna - DNA sequence</option>
82 <option value="prot">prot - protein sequence</option> 144 <option value="prot">prot - protein sequence</option>
83 <option value="dnax" selected="true">dnax - DNA sequence translated in six frames to protein</option> 145 <option value="dnax">dnax - DNA sequence translated in six frames to protein</option>
84 </param> 146 </param>
85 <param argument="-q" name="query_type" type="select" format="txt" multiple="false" label="query type" help="Choose your query type, the default is rnax"> 147 <param argument="-q" name="query_type" type="select" format="txt" multiple="false" label="query type" help="Choose your query type, the default is rnax">
86 <option value="dna">dna - DNA sequence </option> 148 <option value="dna" selected="true">dna - DNA sequence </option>
87 <option value="rna">rna - RNA sequence</option> 149 <option value="rna">rna - RNA sequence</option>
88 <option value="prot">prot - protein sequence</option> 150 <option value="prot">prot - protein sequence</option>
89 <option value="dnax">dnax - DNA sequence translated in six frames to protein</option> 151 <option value="dnax">dnax - DNA sequence translated in six frames to protein</option>
90 <option value="rnax" selected="true">rnax - DNA sequence translated in three frames to protein</option> 152 <option value="rnax">rnax - DNA sequence translated in three frames to protein</option>
91 </param> 153 </param>
92 <param argument="-oneOff" type="boolean" truevalue="-oneOff=1" falsevalue="" label="If set, this allows one mismatch in tile and still triggers an alignments" /> 154 <section name="basic_align" title="Alignment parameters" expanded="true">
93 <param argument="-minScore" type="integer" value="30" label="Minimum score" help="It is the matches minus the mismatches minus some sort of gap penalty" /> 155 <param argument="-minScore" type="integer" value="30" label="Minimum score" help="It is the matches minus the mismatches minus some sort of gap penalty" />
94 <param argument="-maxGap" type="integer" value="2" min="0" max="3" label="Maximum gap between tiles in a clump" help="Usually set from 0 to 3. Only relevant for minMatch > 1" /> 156 <param argument="-minIdentity" type="integer" value="" optional="true" min="0" max="100" label="Minimum sequence identity (in percent)" help="Default is 90 for nucleotide searches, 25 for protein or translated protein searches" />
95 <param argument="-repMatch" type="integer" value="" optional="true" label="Number of repetitions of a tile allowed before it is marked as overused" help="Typically this is 256 for tileSize 12, 1024 for tileSize 11, 4096 for tileSize 10. Also affected by stepSize. When stepSize is halved repMatch is doubled to compensate" /> 157 <param argument="-trimT" type="boolean" truevalue="-trimT" falsevalue="" label="Trim leading poly-T" />
96 <conditional name="mask_type"> 158 <param argument="-noTrimA" type="boolean" truevalue="-noTrimA" falsevalue="" label="Don't trim trailing poly-A" />
97 <param argument="-mask" type="select" label="Mask out repeats" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored entirely in protein or translated searches. Default is lower"> 159 <param argument="-trimHardA" type="boolean" truevalue="-trimHardA" falsevalue="" label="Remove poly-A tail from qSize and alignments in .psl output" />
98 <option value="lower" selected="true">lower - mask out lower-cased sequence</option> 160 <param argument="-fastMap" type="boolean" truevalue="-fastMap" falsevalue="" label="Run for fast DNA/DNA remapping" help="It does not allow introns and require high %ID. Query sizes must not exceed 5000" />
99 <option value="upper">upper - mask out upper-cased sequence</option> 161 <param argument="-fine" type="boolean" truevalue="-fine" falsevalue="" label="Refine search for small initial and terminal exons" help="For high-quality mRNAs. Not recommended for ESTs" />
100 <option value="out">out - mask according to database.out RepeatMasker .out file</option> 162 <param argument="-maxIntron" type="integer" value="750000" optional="true" label="Maximum intron size" />
101 <option value="file.out">file.out - mask database according to RepeatMasker file.out</option> 163 <param argument="-extendThroughN" type="boolean" truevalue="-extendThroughN" falsevalue="" label="Allow extension of alignment through large blocks of N's" />
102 </param> 164 </section>
103 <when value="lower" /> 165 <section name="adv_align" title="Advanced alignment parameters" expanded="false">
104 <when value="upper" /> 166 <param argument="-tileSize" type="integer" value="" optional="true" min="1" label="Tile size" help="Sets the size of match that triggers an alignment. Usually between 8 and 12. Default is 11 for DNA and 5 for protein" />
105 <when value="out" /> 167 <param argument="-stepSize" type="integer" value="" optional="true" min="1" label="Spacing between tiles" help="Default is tileSize" />
106 <when value="file.out"> 168 <param argument="-oneOff" type="boolean" truevalue="-oneOff=1" falsevalue="" label="If set, this allows one mismatch in tile and still triggers an alignments" />
107 <param name="mask_file" type="data" format="txt" label="RepeatMasker file.out" /> 169 <param argument="-minMatch" type="integer" value="" optional="true" min="1" label="Minimum number of tile matches" help="Usually set from 2 to 4. Default is 2 for nucleotide, 1 for protein." />
108 </when> 170 <param argument="-maxGap" type="integer" value="2" min="0" max="3" label="Maximum gap between tiles in a clump" help="Usually set from 0 to 3. Only relevant for minMatch > 1" />
109 </conditional> 171 <param argument="-repMatch" type="integer" value="" optional="true" label="Number of repetitions of a tile allowed before it is marked as overused" help="Typically this is 256 for tileSize 12, 1024 for tileSize 11, 4096 for tileSize 10. Also affected by stepSize. When stepSize is halved repMatch is doubled to compensate" />
172 </section>
173 <section name="repeat" title="Repeat masking parameters" expanded="true">
174 <expand macro="mask_cond" maskarg="mask" label="Mask out repeats" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored entirely in protein or translated searches. Default is no masking"/>
175 <expand macro="mask_cond" maskarg="qMask" label="Mask out repeats in query sequence" help="Analoguous to -mask, but for the query sequence"/>
176 <expand macro="mask_cond" maskarg="repeats" label="Report matches in repeats separately" help="Repeat bases will not be masked in any way, but matches in repeat areas will be reported separately from matches in other areas in the output"/>
177 <param argument="-minRepDivergence" type="integer" value="" min="0" max="100" optional="true" label="Minimum divergence of repeats (percent)" help="to allow them to be unmasked. Default is 15. Only relevant for masking using RepeatMasker .out files" />
178 </section>
110 <param argument="-dots" type="integer" value="" optional="true" label="Output a dot every N sequences in log" help="Dots show program's progress" /> 179 <param argument="-dots" type="integer" value="" optional="true" label="Output a dot every N sequences in log" help="Dots show program's progress" />
111 <param argument="-trimT" type="boolean" truevalue="-trimT" falsevalue="" label="Trim leading poly-T" />
112 <param argument="-noTrimA" type="boolean" truevalue="-noTrimA" falsevalue="" label="Don't trim trailing poly-A" />
113 <param argument="-trimHardA" type="boolean" truevalue="-trimHardA" falsevalue="" label="Remove poly-A tail from qSize and alignments in .psl output" />
114 <param argument="-fastMap" type="boolean" truevalue="-fastMap" falsevalue="" label="Run for fast DNA/DNA remapping" help="It does not allow introns and require high %ID. Query sizes must not exceed 5000" />
115 <param argument="-fine" type="boolean" truevalue="-fine" falsevalue="" label="Refine search for small initial and terminal exons" help="For high-quality mRNAs. Not recommended for ESTs" />
116 <param argument="-maxIntron" type="integer" value="750000" optional="true" label="Maximum intron size" />
117 <param argument="-extendThroughN" type="boolean" truevalue="-extendThroughN" falsevalue="" label="Allow extension of alignment through large blocks of N's" />
118 <param name="out" type="select" label="Select output file format (-out)"> 180 <param name="out" type="select" label="Select output file format (-out)">
119 <option value="psl">Tab-separated format, no sequence (psl)</option> 181 <option value="psl">Tab-separated format, no sequence (psl)</option>
120 <option value="psl -noHead">Tab-separated format, no sequence, no header (psl -noHead)</option> 182 <option value="psl -noHead">Tab-separated format, no sequence, no header (psl -noHead)</option>
183 <option value="pslx">Tab-separated format (pslx)</option>
184 <option value="pslx -noHead">Tab-separated format, no header (pslx -noHead)</option>
121 <option value="axt">Blastz-associated axt format (axt)</option> 185 <option value="axt">Blastz-associated axt format (axt)</option>
122 <option value="maf">Multiz-associated maf format (maf)</option> 186 <option value="maf">Multiz-associated maf format (maf)</option>
123 <option value="sim4">Similar to sim4 format (sim4)</option> 187 <option value="sim4">Similar to sim4 format (sim4)</option>
124 <option value="wublast">Similar to WU-BLAST format (wublast)</option> 188 <option value="wublast">Similar to WU-BLAST format (wublast)</option>
125 <option value="blast">Similar to NCBI BLAST format (blast)</option> 189 <option value="blast">Similar to NCBI BLAST format (blast)</option>
127 <option value="blast9">NCBI BLAST tabular format with comments (blast9)</option> 191 <option value="blast9">NCBI BLAST tabular format with comments (blast9)</option>
128 </param> 192 </param>
129 </inputs> 193 </inputs>
130 <outputs> 194 <outputs>
131 <data name="output" format="tabular" label="${tool.name} on ${on_string}"> 195 <data name="output" format="tabular" label="${tool.name} on ${on_string}">
132 <change_format> 196 <change_format><!-- add test -->
133 <when input="out" value="axt" format="axt" /> 197 <when input="out" value="axt" format="axt" />
134 <when input="out" value="maf" format="maf" /> 198 <when input="out" value="maf" format="maf" />
135 <when input="out" value="sim4" format="txt" /> 199 <when input="out" value="sim4" format="txt" />
136 <when input="out" value="wublast" format="tabular" />
137 <when input="out" value="blast" format="tabular" />
138 </change_format> 200 </change_format>
139 </data> 201 </data>
140 </outputs> 202 </outputs>
141 <tests> 203 <tests>
142 <!-- test on query of GenBank RefSeq records for Gallus gallus and database of Amazona vittata --> 204 <!-- test on query of GenBank RefSeq records for Gallus gallus and database of Amazona vittata -->
143 <test> 205 <test>
144 <param name="reference_source_selector" value="history" /> 206 <conditional name="reference_source">
145 <param name="database" value="amaVit1_Gallus/amaVit1.fa" /> 207 <param name="reference_source_selector" value="history" />
146 <param name="query" value="amaVit1_Gallus/Gallus_gallus_RefSeq.fa" /> 208 <param name="database" value="amaVit1_Gallus/amaVit1.fa" ftype="fasta" />
209 </conditional>
210 <param name="query" value="amaVit1_Gallus/Gallus_gallus_RefSeq.fa" ftype="fasta" />
147 <param name="database_type" value="dnax" /> 211 <param name="database_type" value="dnax" />
148 <param name="query_type" value="rnax" /> 212 <param name="query_type" value="rnax" />
149 <param name="mask" value="lower" /> 213 <conditional name="mask_type">
150 <param name="out" value="psl -noHead" /> 214 <param name="mask" value="lower" />
151 <output name="output" value="amaVit1_Gallus/amaVit1_Gallus_gallus_sorted.psl" sort="true"/> 215 </conditional>
216 <param name="out" value="maf" />
217 <output name="output" value="amaVit1_Gallus/amaVit1_Gallus_gallus_sorted.maf" ftype="maf"/>
218 <assert_command>
219 <has_text text="-tileSize=" negate="true"/>
220 <has_text text="-stepSize=" negate="true"/>
221 <has_text text="-mask=lower"/>
222 </assert_command>
152 </test> 223 </test>
153 <!-- test on query of partial mRNA of Drosophila melanogaster and the database of Drosophila biamipes dot chromosome --> 224 <!-- test on query of partial mRNA of Drosophila melanogaster and the
225 database of Drosophila biamipes dot chromosome
226 - also test cached reference -->
154 <test> 227 <test>
155 <param name="reference_source_selector" value="history" /> 228 <conditional name="reference_source">
156 <param name="database" value="dbia3/dbia3.fa" /> 229 <param name="reference_source_selector" value="cached"/>
157 <param name="query" value="dbia3/dmel-transcript.fa" /> 230 <param name="database" value="dbdia display name"/>
231 </conditional>
232 <param name="query" value="dbia3/dmel-transcript.fa" ftype="fasta" />
158 <param name="database_type" value="dnax" /> 233 <param name="database_type" value="dnax" />
159 <param name="query_type" value="rnax" /> 234 <param name="query_type" value="rnax" />
160 <param name="mask" value="lower" /> 235 <section name="basic_align">
236 <param name="maxIntron" value="" />
237 </section>
238 <section name="adv_align">
239 <param name="tileSize" value="5"/><!--explicitly set default .. to check if it is on the CL-->
240 <param name="stepSize" value="5"/><!--explicitly set default .. to check if it is on the CL-->
241 </section>
161 <param name="out" value="psl -noHead" /> 242 <param name="out" value="psl -noHead" />
162 <param name="maxIntron" value="" /> 243 <output name="output" value="dbia3/dbia3.sorted.psl" ftype="tabular" sort="true">
163 <output name="output" value="dbia3/dbia3.sorted.psl" sort="true"/> 244 <assert_contents>
245 <has_n_columns n="21"/>
246 </assert_contents>
247 </output>
248 <assert_command>
249 <has_text text="-tileSize=5"/>
250 <has_text text="-mask" negate="true"/>
251 </assert_command>
252 </test>
253 <test>
254 <conditional name="reference_source">
255 <param name="reference_source_selector" value="cached"/>
256 <param name="database" value="dbdia display name"/>
257 </conditional>
258 <param name="query" value="dbia3/dmel-transcript.fa" ftype="fasta" />
259 <param name="database_type" value="dnax" />
260 <param name="query_type" value="rnax" />
261 <section name="basic_align">
262 <param name="maxIntron" value="" />
263 </section>
264 <section name="adv_align">
265 <param name="tileSize" value="5"/><!--explicitly set default .. to check if it is on the CL-->
266 <param name="stepSize" value="5"/><!--explicitly set default .. to check if it is on the CL-->
267 </section>
268 <param name="out" value="pslx -noHead" />
269 <output name="output" value="dbia3/dbia3.sorted.psl" ftype="tabular" sort="true" compare="contains">
270 <assert_contents>
271 <has_n_columns n="23"/>
272 </assert_contents>
273 </output>
274 <assert_command>
275 <has_text text="-tileSize=5"/>
276 <has_text text="-mask" negate="true"/>
277 </assert_command>
164 </test> 278 </test>
165 <!-- test on the database masked by repeat masker --> 279 <!-- test on the database masked by repeat masker -->
166 <test> 280 <test>
167 <param name="reference_source_selector" value="history" /> 281 <conditional name="reference_source">
168 <param name="database" value="dbia3/dbia3_masked.2bit" /> 282 <param name="reference_source_selector" value="history" />
169 <param name="query" value="dbia3/dmel-transcript.fa" /> 283 <param name="database" value="dbia3/dbia3_masked.2bit" ftype="twobit" />
284 </conditional>
285 <param name="query" value="dbia3/dmel-transcript.fa" ftype="fasta"/>
170 <param name="database_type" value="dnax" /> 286 <param name="database_type" value="dnax" />
171 <param name="query_type" value="rnax" /> 287 <param name="query_type" value="rnax" />
172 <param name="oneOff" value="false" /> 288 <param name="oneOff" value="false" />
173 <param name="minScore" value="30" /> 289 <param name="minScore" value="30" />
174 <param name="maxGap" value="2" /> 290 <param name="maxGap" value="2" />
175 <param name="trimT" value="false" /> 291 <param name="trimT" value="false" />
176 <param name="noTrimA" value="false" /> 292 <param name="noTrimA" value="false" />
177 <param name="fine" value="false" /> 293 <param name="fine" value="false" />
178 <param name="maxIntron" value="750000" /> 294 <param name="maxIntron" value="750000" />
179 <param name="extendThroughN" value="false" /> 295 <param name="extendThroughN" value="false" />
180 <param name="mask" value="file.out" /> 296 <conditional name="mask_type">
181 <param name="mask_file" value="dbia3/dbia3_RM.out" /> 297 <param name="mask" value="file.out" />
182 <param name="out" value="psl -noHead" /> 298 <param name="mask_file" value="dbia3/dbia3_RM.out" />
299 </conditional>
300 <param name="out" value="psl" ftype="tabular" />
183 <output name="output" value="dbia3/dbia3_masked.sorted.psl"/> 301 <output name="output" value="dbia3/dbia3_masked.sorted.psl"/>
302 <assert_command>
303 <has_text text="-tileSize=" negate="true"/>
304 <has_text text="-stepSize=" negate="true"/>
305 <has_text text="-mask='/"/>
306 </assert_command>
184 </test> 307 </test>
185 </tests> 308 <!-- tiny test data from https://davetang.org/muse/2012/05/15/using-blat/ -->
309 <test>
310 <conditional name="reference_source">
311 <param name="reference_source_selector" value="history" />
312 <param name="database" value="mini-db.fa.gz" ftype="fasta.gz" />
313 </conditional>
314 <param name="query" value="mini-query.fa.gz" ftype="fasta.gz"/>
315 <param name="minScore" value="0" />
316 <section name="adv_align">
317 <param name="stepSize" value="1"/>
318 </section>
319 <param name="out" value="psl" ftype="tabular" />
320 <output name="output">
321 <assert_contents>
322 <has_n_lines n="7"/>
323 </assert_contents>
324 </output>
325 <assert_command>
326 <has_text text="-minScore=0"/>
327 <has_text text="-stepSize=1"/>
328 </assert_command>
329 </test> </tests>
186 <help> 330 <help>
187 <![CDATA[ 331 <![CDATA[
188 BLAT 332 BLAT
189 ==== 333 ====
190 BLAT is a bioinformatics software a tool which performs rapid mRNA/DNA and cross-species protein alignments. 334 BLAT is a bioinformatics software a tool which performs rapid sequence alignments (mRNA/DNA and cross-species protein).
191 335 It is designed to find sequences of high similarity and have a certain minimum length. With the default setting this is
192 blat (version: v36)- Standalone blat sequence search command line tool. 336
193 ------------------------------------------------------------------------- 337 - >95% similarity and a minimum length of 25 bases for nucleotide sequences
194 338 - >80% similarity and a minimum lenth of 20 amino acids for proteins
195 usage: 339
196 ++++++ 340 More divergent or shorter sequence alignments may be missed.
197 341 The algorithm works in two phases:
198 $ blat database query [-ooc=11.ooc] output.psl 342
199 343 1. Search phase: find regions of probable homology using an index of the reference sequence
200 where: 344 2. Alignment phase: Detailed Alignment of the sequences in these regions
201 database and query are each either a .fa, .nib or .2bit file, 345
202 or a list of these files with one file name per line. 346 Search phase
203 -ooc=11.ooc tells the program to load over-occurring 11-mers from 347 ++++++++++++
204 an external file. This will increase the speed 348
205 by a factor of 40 in many cases, but is not required. 349 Builds an index of the reference containing the nonoverlapping K-mers and their
206 output.psl is the name of the output file. 350 positions (by default, can be changed using `-tileSize` and `-stepSize`). Hits,
207 351 i.e. exactly matching k-mers in query and reference, are then found by looking
208 documentation: 352 up each overlapping K-mer of the query sequence. By enabling `-oneOff` the
353 algorithm allows for a single substitition. Note that this increases the run
354 time of this phase significantly.
355
356 The hits are then split into buckets of 64k (based on the database position)
357 and sorted on the diagonal (database minus query positions). Hits within the
358 gap limit form so called proto-clumps. Those are then sorted by database position
359 and put into clumps if they are within the window limit (wrt database coordinate).
360
361 Clumps with less than the minimum number of hits are discarded (-minMatch) and
362 those within 300 bases or 100 amino acids in the database are merged together.
363 The resulting clumps define regions of the database which are homologous to the
364 query sequence which are then aligned.
365
366 Alignment phase
367 +++++++++++++++
368
369 The alignment is performed differently for nucleotide and
370 aminoacid sequences.
371
372 **Alignment for nucleotide sequences**: A hit list (exactly matching k-mers) for
373 the query and the homologous region of the database is generated. If necessary
374 hits are mode unique by extending them until they are unique or have a maximum
375 size. The hits are then extended maximally allowing no mismatches, and overlapping
376 hits are merged.
377 Subsequent (wrt query and reference) extended hits are then linked in an
378 alignment. If there are gaps in query and reference, the algorithm recurses
379 using a smaller value for k until no additional hits are found or gaps are
380 smaller than 6 bases.
381
382 **Protein Alignments**: The hits from the search stage are extended into maximally
383 scoring ungapped alignments (HSPs) (match cost 2 and mismatch cost 1). The HSPs
384 are organized in a directed graph where an edge connect HSPs A and B if A starts
385 before B wrt query and database coordinates. The weight of the edge is then
386 defined as the score of B minus a gap penalty based on the distance between A
387 and B (overlapping HSPs are treated differently, see Kent 2002). The maximal
388 scoring alignment is then determined as the maximum weight path through the
389 graph and the HSPs of this path are removed. This is repeated until no HSPs are
390 left.
391
392 **Stitching and Filling In**:
393 In order to find also alignments of genes scattered across multiple homologous
394 regions that have been determined in the search phase a variation of the
395 alignment algorithm for proteins is employed. For details see Kent 2002.
396
397 Documentation:
209 ++++++++++++++ 398 ++++++++++++++
210 399
211 See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html) 400 See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html)
212 401
213 Source code: 402 Source code: