Mercurial > repos > yating-l > ucsc_blat
comparison blat.xml @ 35:87644259e668 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ucsc_blat/ commit f38778f3a25020809c3f6cf17aafb8dbfc54b2e8
author | iuc |
---|---|
date | Sat, 28 Sep 2024 16:29:10 +0000 |
parents | 53cf9d25ef39 |
children | e564213a92b8 |
comparison
equal
deleted
inserted
replaced
34:53cf9d25ef39 | 35:87644259e668 |
---|---|
1 <tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | 1 <tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> |
2 <description>BLAST-like sequence alignment tool</description> | 2 <description>BLAST-like sequence alignment tool</description> |
3 <macros> | 3 <macros> |
4 <token name="@TOOL_VERSION@">377</token> | 4 <token name="@TOOL_VERSION@">469</token> |
5 <token name="@VERSION_SUFFIX@">0</token> | 5 <token name="@VERSION_SUFFIX@">0</token> |
6 | |
7 <xml name="mask_cond" tokens="maskarg,label,help"> | |
8 <conditional name="@MASKARG@_type"> | |
9 <param argument="-@MASKARG@" type="select" label="@LABEL@" help="@HELP@"> | |
10 <option value="" selected="true">No masking</option> | |
11 <option value="lower">lower - mask out lower-cased sequence</option> | |
12 <option value="upper">upper - mask out upper-cased sequence</option> | |
13 <option value="file.out">out - mask database according to RepeatMasker out</option> | |
14 </param> | |
15 <when value="" /> | |
16 <when value="lower" /> | |
17 <when value="upper" /> | |
18 <when value="file.out"> | |
19 <param name="@MASKARG@_file" type="data" format="txt" label="RepeatMasker file.out" /> | |
20 </when> | |
21 </conditional> | |
22 </xml> | |
6 </macros> | 23 </macros> |
7 <xrefs> | 24 <xrefs> |
8 <xref type="bio.tools">blat</xref> | 25 <xref type="bio.tools">blat</xref> |
9 </xrefs> | 26 </xrefs> |
10 <requirements> | 27 <requirements> |
11 <requirement type="package" version="@TOOL_VERSION@">ucsc-blat</requirement> | 28 <requirement type="package" version="@TOOL_VERSION@">ucsc-blat</requirement> |
12 </requirements> | 29 </requirements> |
13 <command detect_errors="exit_code"><![CDATA[ | 30 <command detect_errors="exit_code"><![CDATA[ |
14 #if str($reference_source.reference_source_selector) == "history": | 31 #if str($reference_source.reference_source_selector) == "history": |
15 ## blat depends on file extension | 32 ## blat depends on file extension |
16 #if $reference_source.database.is_of_type("fasta"): | 33 #if $reference_source.database.is_of_type("fasta.gz"): |
34 #set $reference_fasta_filename = "localref.fa.gz" | |
35 #elif $reference_source.database.is_of_type("fasta"): | |
17 #set $reference_fasta_filename = "localref.fa" | 36 #set $reference_fasta_filename = "localref.fa" |
18 #elif $reference_source.database.is_of_type("twobit"): | 37 #elif $reference_source.database.is_of_type("twobit"): |
19 #set $reference_fasta_filename = "localref.2bit" | 38 #set $reference_fasta_filename = "localref.2bit" |
20 #else | 39 #else |
21 #set $reference_fasta_filename = "localref" | 40 #set $reference_fasta_filename = "localref" |
23 ln -s '$reference_source.database' '$reference_fasta_filename' && | 42 ln -s '$reference_source.database' '$reference_fasta_filename' && |
24 #else: | 43 #else: |
25 #set $reference_fasta_filename = str($reference_source.database.fields.path) | 44 #set $reference_fasta_filename = str($reference_source.database.fields.path) |
26 #end if | 45 #end if |
27 | 46 |
47 ## blat depends on file extension | |
48 #if $query.is_of_type("fasta.gz"): | |
49 #set $query_filename = "query.fa.gz" | |
50 #elif $query.is_of_type("fasta"): | |
51 #set $query_filename = "query.fa" | |
52 #elif $query.is_of_type("twobit"): | |
53 #set $query_filename = "query.2bit" | |
54 #else | |
55 #set $query_filename = "query" | |
56 #end if | |
57 ln -s '$query' '$query_filename' && | |
58 | |
28 blat | 59 blat |
29 -q=$query_type | 60 -q=$query_type |
30 -t=$database_type | 61 -t=$database_type |
31 $oneOff | 62 ## Basic alignment parameters |
32 #if str($minScore) | 63 #if str($basic_align.minScore) |
33 -minScore=$minScore | 64 -minScore=$basic_align.minScore |
34 #end if | 65 #end if |
35 -maxGap=$maxGap | 66 #if str($basic_align.minIdentity) |
36 #if str($repMatch) | 67 -minIdentity=$basic_align.minIdentity |
37 -repMatch=$repMatch | 68 #end if |
38 #end if | 69 $basic_align.trimT |
39 #if $mask_type.mask == "file.out": | 70 $basic_align.noTrimA |
40 -mask='$mask_type.mask_file' | 71 $basic_align.trimHardA |
41 #else: | 72 $basic_align.fastMap |
42 -mask=$mask_type.mask | 73 $basic_align.fine |
43 #end if | 74 #if str($basic_align.maxIntron) |
75 -maxIntron=$basic_align.maxIntron | |
76 #end if | |
77 $basic_align.extendThroughN | |
78 ## Advanced alignment parameters | |
79 #if str($adv_align.tileSize) | |
80 -tileSize=$adv_align.tileSize | |
81 #end if | |
82 #if str($adv_align.stepSize) | |
83 -stepSize=$adv_align.stepSize | |
84 #end if | |
85 $adv_align.oneOff | |
86 #if str($adv_align.minMatch) | |
87 -minMatch=$adv_align.minMatch | |
88 #end if | |
89 -maxGap=$adv_align.maxGap | |
90 #if str($adv_align.repMatch) | |
91 -repMatch=$adv_align.repMatch | |
92 #end if | |
93 ## Repeat masking parameters | |
94 #if $repeat.mask_type.mask == "file.out": | |
95 -mask='$repeat.mask_type.mask_file' | |
96 #elif $repeat.mask_type.mask: | |
97 -mask=$repeat.mask_type.mask | |
98 #end if | |
99 #if $repeat.qMask_type.qMask == "file.out": | |
100 -qMask='$repeat.qMask_type.qMask_file' | |
101 #elif $repeat.qMask_type.qMask: | |
102 -qmask=$repeat.qMask_type.qMask | |
103 #end if | |
104 #if $repeat.repeats_type.repeats == "file.out": | |
105 -repeats='$repeat.repeats_type.repeats_file' | |
106 #elif $repeat.repeats_type.repeats: | |
107 -repeats=$repeat.repeats_type.repeats | |
108 #end if | |
109 #if str($repeat.minRepDivergence) | |
110 -minRepDivergence=$repeat.minRepDivergence | |
111 #end if | |
112 | |
44 #if str($dots) | 113 #if str($dots) |
45 -dots=$dots | 114 -dots=$dots |
46 #end if | 115 #end if |
47 $trimT | |
48 $noTrimA | |
49 $trimHardA | |
50 $fastMap | |
51 $fine | |
52 #if str($maxIntron) | |
53 -maxIntron=$maxIntron | |
54 #end if | |
55 $extendThroughN | |
56 '$reference_fasta_filename' | 116 '$reference_fasta_filename' |
57 '$query' | 117 '$query_filename' |
58 -out=$out | 118 -out=$out |
59 '$output' | 119 '$output' |
60 ]]></command> | 120 ]]></command> |
61 <inputs> | 121 <inputs> |
62 <conditional name="reference_source"> | 122 <conditional name="reference_source"> |
65 <option value="history">History</option> | 125 <option value="history">History</option> |
66 </param> | 126 </param> |
67 <when value="cached"> | 127 <when value="cached"> |
68 <param name="database" type="select" label="Select database"> | 128 <param name="database" type="select" label="Select database"> |
69 <options from_data_table="all_fasta"> | 129 <options from_data_table="all_fasta"> |
130 <!-- <column name="name" index="0"/> | |
131 <column name="value" index="2"/> --> | |
70 <filter type="sort_by" column="2" /> | 132 <filter type="sort_by" column="2" /> |
71 </options> | 133 </options> |
72 <validator type="no_options" message="A built-in database is not available" /> | 134 <validator type="no_options" message="A built-in database is not available" /> |
73 </param> | 135 </param> |
74 </when> | 136 </when> |
75 <when value="history"> | 137 <when value="history"> |
76 <param name="database" type="data" format="fasta, twobit" label="Using database file, either a .fa, .nib or .2bit file" /> | 138 <param name="database" type="data" format="fasta,fasta.gz,twobit" label="Using database file, either a fasta, fasta.gz or twobit dataset" /> |
77 </when> | 139 </when> |
78 </conditional> | 140 </conditional> |
79 <param name="query" type="data" format="fasta, twobit" label="Query data, either a .fa, .nib or .2bit file"/> | 141 <param name="query" type="data" format="fasta,fasta.gz,twobit" label="Query data, either a fasta, fasta.gz or twobit dataset"/> |
80 <param argument="-t" name="database_type" type="select" format="txt" multiple="false" label="database type" help="Choose your database type, the default is dnax"> | 142 <param argument="-t" name="database_type" type="select" format="txt" multiple="false" label="database type" help="Choose your database type, the default is dnax"> |
81 <option value="dna">dna - DNA sequence</option> | 143 <option value="dna" selected="true">dna - DNA sequence</option> |
82 <option value="prot">prot - protein sequence</option> | 144 <option value="prot">prot - protein sequence</option> |
83 <option value="dnax" selected="true">dnax - DNA sequence translated in six frames to protein</option> | 145 <option value="dnax">dnax - DNA sequence translated in six frames to protein</option> |
84 </param> | 146 </param> |
85 <param argument="-q" name="query_type" type="select" format="txt" multiple="false" label="query type" help="Choose your query type, the default is rnax"> | 147 <param argument="-q" name="query_type" type="select" format="txt" multiple="false" label="query type" help="Choose your query type, the default is rnax"> |
86 <option value="dna">dna - DNA sequence </option> | 148 <option value="dna" selected="true">dna - DNA sequence </option> |
87 <option value="rna">rna - RNA sequence</option> | 149 <option value="rna">rna - RNA sequence</option> |
88 <option value="prot">prot - protein sequence</option> | 150 <option value="prot">prot - protein sequence</option> |
89 <option value="dnax">dnax - DNA sequence translated in six frames to protein</option> | 151 <option value="dnax">dnax - DNA sequence translated in six frames to protein</option> |
90 <option value="rnax" selected="true">rnax - DNA sequence translated in three frames to protein</option> | 152 <option value="rnax">rnax - DNA sequence translated in three frames to protein</option> |
91 </param> | 153 </param> |
92 <param argument="-oneOff" type="boolean" truevalue="-oneOff=1" falsevalue="" label="If set, this allows one mismatch in tile and still triggers an alignments" /> | 154 <section name="basic_align" title="Alignment parameters" expanded="true"> |
93 <param argument="-minScore" type="integer" value="30" label="Minimum score" help="It is the matches minus the mismatches minus some sort of gap penalty" /> | 155 <param argument="-minScore" type="integer" value="30" label="Minimum score" help="It is the matches minus the mismatches minus some sort of gap penalty" /> |
94 <param argument="-maxGap" type="integer" value="2" min="0" max="3" label="Maximum gap between tiles in a clump" help="Usually set from 0 to 3. Only relevant for minMatch > 1" /> | 156 <param argument="-minIdentity" type="integer" value="" optional="true" min="0" max="100" label="Minimum sequence identity (in percent)" help="Default is 90 for nucleotide searches, 25 for protein or translated protein searches" /> |
95 <param argument="-repMatch" type="integer" value="" optional="true" label="Number of repetitions of a tile allowed before it is marked as overused" help="Typically this is 256 for tileSize 12, 1024 for tileSize 11, 4096 for tileSize 10. Also affected by stepSize. When stepSize is halved repMatch is doubled to compensate" /> | 157 <param argument="-trimT" type="boolean" truevalue="-trimT" falsevalue="" label="Trim leading poly-T" /> |
96 <conditional name="mask_type"> | 158 <param argument="-noTrimA" type="boolean" truevalue="-noTrimA" falsevalue="" label="Don't trim trailing poly-A" /> |
97 <param argument="-mask" type="select" label="Mask out repeats" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored entirely in protein or translated searches. Default is lower"> | 159 <param argument="-trimHardA" type="boolean" truevalue="-trimHardA" falsevalue="" label="Remove poly-A tail from qSize and alignments in .psl output" /> |
98 <option value="lower" selected="true">lower - mask out lower-cased sequence</option> | 160 <param argument="-fastMap" type="boolean" truevalue="-fastMap" falsevalue="" label="Run for fast DNA/DNA remapping" help="It does not allow introns and require high %ID. Query sizes must not exceed 5000" /> |
99 <option value="upper">upper - mask out upper-cased sequence</option> | 161 <param argument="-fine" type="boolean" truevalue="-fine" falsevalue="" label="Refine search for small initial and terminal exons" help="For high-quality mRNAs. Not recommended for ESTs" /> |
100 <option value="out">out - mask according to database.out RepeatMasker .out file</option> | 162 <param argument="-maxIntron" type="integer" value="750000" optional="true" label="Maximum intron size" /> |
101 <option value="file.out">file.out - mask database according to RepeatMasker file.out</option> | 163 <param argument="-extendThroughN" type="boolean" truevalue="-extendThroughN" falsevalue="" label="Allow extension of alignment through large blocks of N's" /> |
102 </param> | 164 </section> |
103 <when value="lower" /> | 165 <section name="adv_align" title="Advanced alignment parameters" expanded="false"> |
104 <when value="upper" /> | 166 <param argument="-tileSize" type="integer" value="" optional="true" min="1" label="Tile size" help="Sets the size of match that triggers an alignment. Usually between 8 and 12. Default is 11 for DNA and 5 for protein" /> |
105 <when value="out" /> | 167 <param argument="-stepSize" type="integer" value="" optional="true" min="1" label="Spacing between tiles" help="Default is tileSize" /> |
106 <when value="file.out"> | 168 <param argument="-oneOff" type="boolean" truevalue="-oneOff=1" falsevalue="" label="If set, this allows one mismatch in tile and still triggers an alignments" /> |
107 <param name="mask_file" type="data" format="txt" label="RepeatMasker file.out" /> | 169 <param argument="-minMatch" type="integer" value="" optional="true" min="1" label="Minimum number of tile matches" help="Usually set from 2 to 4. Default is 2 for nucleotide, 1 for protein." /> |
108 </when> | 170 <param argument="-maxGap" type="integer" value="2" min="0" max="3" label="Maximum gap between tiles in a clump" help="Usually set from 0 to 3. Only relevant for minMatch > 1" /> |
109 </conditional> | 171 <param argument="-repMatch" type="integer" value="" optional="true" label="Number of repetitions of a tile allowed before it is marked as overused" help="Typically this is 256 for tileSize 12, 1024 for tileSize 11, 4096 for tileSize 10. Also affected by stepSize. When stepSize is halved repMatch is doubled to compensate" /> |
172 </section> | |
173 <section name="repeat" title="Repeat masking parameters" expanded="true"> | |
174 <expand macro="mask_cond" maskarg="mask" label="Mask out repeats" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored entirely in protein or translated searches. Default is no masking"/> | |
175 <expand macro="mask_cond" maskarg="qMask" label="Mask out repeats in query sequence" help="Analoguous to -mask, but for the query sequence"/> | |
176 <expand macro="mask_cond" maskarg="repeats" label="Report matches in repeats separately" help="Repeat bases will not be masked in any way, but matches in repeat areas will be reported separately from matches in other areas in the output"/> | |
177 <param argument="-minRepDivergence" type="integer" value="" min="0" max="100" optional="true" label="Minimum divergence of repeats (percent)" help="to allow them to be unmasked. Default is 15. Only relevant for masking using RepeatMasker .out files" /> | |
178 </section> | |
110 <param argument="-dots" type="integer" value="" optional="true" label="Output a dot every N sequences in log" help="Dots show program's progress" /> | 179 <param argument="-dots" type="integer" value="" optional="true" label="Output a dot every N sequences in log" help="Dots show program's progress" /> |
111 <param argument="-trimT" type="boolean" truevalue="-trimT" falsevalue="" label="Trim leading poly-T" /> | |
112 <param argument="-noTrimA" type="boolean" truevalue="-noTrimA" falsevalue="" label="Don't trim trailing poly-A" /> | |
113 <param argument="-trimHardA" type="boolean" truevalue="-trimHardA" falsevalue="" label="Remove poly-A tail from qSize and alignments in .psl output" /> | |
114 <param argument="-fastMap" type="boolean" truevalue="-fastMap" falsevalue="" label="Run for fast DNA/DNA remapping" help="It does not allow introns and require high %ID. Query sizes must not exceed 5000" /> | |
115 <param argument="-fine" type="boolean" truevalue="-fine" falsevalue="" label="Refine search for small initial and terminal exons" help="For high-quality mRNAs. Not recommended for ESTs" /> | |
116 <param argument="-maxIntron" type="integer" value="750000" optional="true" label="Maximum intron size" /> | |
117 <param argument="-extendThroughN" type="boolean" truevalue="-extendThroughN" falsevalue="" label="Allow extension of alignment through large blocks of N's" /> | |
118 <param name="out" type="select" label="Select output file format (-out)"> | 180 <param name="out" type="select" label="Select output file format (-out)"> |
119 <option value="psl">Tab-separated format, no sequence (psl)</option> | 181 <option value="psl">Tab-separated format, no sequence (psl)</option> |
120 <option value="psl -noHead">Tab-separated format, no sequence, no header (psl -noHead)</option> | 182 <option value="psl -noHead">Tab-separated format, no sequence, no header (psl -noHead)</option> |
183 <option value="pslx">Tab-separated format (pslx)</option> | |
184 <option value="pslx -noHead">Tab-separated format, no header (pslx -noHead)</option> | |
121 <option value="axt">Blastz-associated axt format (axt)</option> | 185 <option value="axt">Blastz-associated axt format (axt)</option> |
122 <option value="maf">Multiz-associated maf format (maf)</option> | 186 <option value="maf">Multiz-associated maf format (maf)</option> |
123 <option value="sim4">Similar to sim4 format (sim4)</option> | 187 <option value="sim4">Similar to sim4 format (sim4)</option> |
124 <option value="wublast">Similar to WU-BLAST format (wublast)</option> | 188 <option value="wublast">Similar to WU-BLAST format (wublast)</option> |
125 <option value="blast">Similar to NCBI BLAST format (blast)</option> | 189 <option value="blast">Similar to NCBI BLAST format (blast)</option> |
127 <option value="blast9">NCBI BLAST tabular format with comments (blast9)</option> | 191 <option value="blast9">NCBI BLAST tabular format with comments (blast9)</option> |
128 </param> | 192 </param> |
129 </inputs> | 193 </inputs> |
130 <outputs> | 194 <outputs> |
131 <data name="output" format="tabular" label="${tool.name} on ${on_string}"> | 195 <data name="output" format="tabular" label="${tool.name} on ${on_string}"> |
132 <change_format> | 196 <change_format><!-- add test --> |
133 <when input="out" value="axt" format="axt" /> | 197 <when input="out" value="axt" format="axt" /> |
134 <when input="out" value="maf" format="maf" /> | 198 <when input="out" value="maf" format="maf" /> |
135 <when input="out" value="sim4" format="txt" /> | 199 <when input="out" value="sim4" format="txt" /> |
136 <when input="out" value="wublast" format="tabular" /> | |
137 <when input="out" value="blast" format="tabular" /> | |
138 </change_format> | 200 </change_format> |
139 </data> | 201 </data> |
140 </outputs> | 202 </outputs> |
141 <tests> | 203 <tests> |
142 <!-- test on query of GenBank RefSeq records for Gallus gallus and database of Amazona vittata --> | 204 <!-- test on query of GenBank RefSeq records for Gallus gallus and database of Amazona vittata --> |
143 <test> | 205 <test> |
144 <param name="reference_source_selector" value="history" /> | 206 <conditional name="reference_source"> |
145 <param name="database" value="amaVit1_Gallus/amaVit1.fa" /> | 207 <param name="reference_source_selector" value="history" /> |
146 <param name="query" value="amaVit1_Gallus/Gallus_gallus_RefSeq.fa" /> | 208 <param name="database" value="amaVit1_Gallus/amaVit1.fa" ftype="fasta" /> |
209 </conditional> | |
210 <param name="query" value="amaVit1_Gallus/Gallus_gallus_RefSeq.fa" ftype="fasta" /> | |
147 <param name="database_type" value="dnax" /> | 211 <param name="database_type" value="dnax" /> |
148 <param name="query_type" value="rnax" /> | 212 <param name="query_type" value="rnax" /> |
149 <param name="mask" value="lower" /> | 213 <conditional name="mask_type"> |
150 <param name="out" value="psl -noHead" /> | 214 <param name="mask" value="lower" /> |
151 <output name="output" value="amaVit1_Gallus/amaVit1_Gallus_gallus_sorted.psl" sort="true"/> | 215 </conditional> |
216 <param name="out" value="maf" /> | |
217 <output name="output" value="amaVit1_Gallus/amaVit1_Gallus_gallus_sorted.maf" ftype="maf"/> | |
218 <assert_command> | |
219 <has_text text="-tileSize=" negate="true"/> | |
220 <has_text text="-stepSize=" negate="true"/> | |
221 <has_text text="-mask=lower"/> | |
222 </assert_command> | |
152 </test> | 223 </test> |
153 <!-- test on query of partial mRNA of Drosophila melanogaster and the database of Drosophila biamipes dot chromosome --> | 224 <!-- test on query of partial mRNA of Drosophila melanogaster and the |
225 database of Drosophila biamipes dot chromosome | |
226 - also test cached reference --> | |
154 <test> | 227 <test> |
155 <param name="reference_source_selector" value="history" /> | 228 <conditional name="reference_source"> |
156 <param name="database" value="dbia3/dbia3.fa" /> | 229 <param name="reference_source_selector" value="cached"/> |
157 <param name="query" value="dbia3/dmel-transcript.fa" /> | 230 <param name="database" value="dbdia display name"/> |
231 </conditional> | |
232 <param name="query" value="dbia3/dmel-transcript.fa" ftype="fasta" /> | |
158 <param name="database_type" value="dnax" /> | 233 <param name="database_type" value="dnax" /> |
159 <param name="query_type" value="rnax" /> | 234 <param name="query_type" value="rnax" /> |
160 <param name="mask" value="lower" /> | 235 <section name="basic_align"> |
236 <param name="maxIntron" value="" /> | |
237 </section> | |
238 <section name="adv_align"> | |
239 <param name="tileSize" value="5"/><!--explicitly set default .. to check if it is on the CL--> | |
240 <param name="stepSize" value="5"/><!--explicitly set default .. to check if it is on the CL--> | |
241 </section> | |
161 <param name="out" value="psl -noHead" /> | 242 <param name="out" value="psl -noHead" /> |
162 <param name="maxIntron" value="" /> | 243 <output name="output" value="dbia3/dbia3.sorted.psl" ftype="tabular" sort="true"> |
163 <output name="output" value="dbia3/dbia3.sorted.psl" sort="true"/> | 244 <assert_contents> |
245 <has_n_columns n="21"/> | |
246 </assert_contents> | |
247 </output> | |
248 <assert_command> | |
249 <has_text text="-tileSize=5"/> | |
250 <has_text text="-mask" negate="true"/> | |
251 </assert_command> | |
252 </test> | |
253 <test> | |
254 <conditional name="reference_source"> | |
255 <param name="reference_source_selector" value="cached"/> | |
256 <param name="database" value="dbdia display name"/> | |
257 </conditional> | |
258 <param name="query" value="dbia3/dmel-transcript.fa" ftype="fasta" /> | |
259 <param name="database_type" value="dnax" /> | |
260 <param name="query_type" value="rnax" /> | |
261 <section name="basic_align"> | |
262 <param name="maxIntron" value="" /> | |
263 </section> | |
264 <section name="adv_align"> | |
265 <param name="tileSize" value="5"/><!--explicitly set default .. to check if it is on the CL--> | |
266 <param name="stepSize" value="5"/><!--explicitly set default .. to check if it is on the CL--> | |
267 </section> | |
268 <param name="out" value="pslx -noHead" /> | |
269 <output name="output" value="dbia3/dbia3.sorted.psl" ftype="tabular" sort="true" compare="contains"> | |
270 <assert_contents> | |
271 <has_n_columns n="23"/> | |
272 </assert_contents> | |
273 </output> | |
274 <assert_command> | |
275 <has_text text="-tileSize=5"/> | |
276 <has_text text="-mask" negate="true"/> | |
277 </assert_command> | |
164 </test> | 278 </test> |
165 <!-- test on the database masked by repeat masker --> | 279 <!-- test on the database masked by repeat masker --> |
166 <test> | 280 <test> |
167 <param name="reference_source_selector" value="history" /> | 281 <conditional name="reference_source"> |
168 <param name="database" value="dbia3/dbia3_masked.2bit" /> | 282 <param name="reference_source_selector" value="history" /> |
169 <param name="query" value="dbia3/dmel-transcript.fa" /> | 283 <param name="database" value="dbia3/dbia3_masked.2bit" ftype="twobit" /> |
284 </conditional> | |
285 <param name="query" value="dbia3/dmel-transcript.fa" ftype="fasta"/> | |
170 <param name="database_type" value="dnax" /> | 286 <param name="database_type" value="dnax" /> |
171 <param name="query_type" value="rnax" /> | 287 <param name="query_type" value="rnax" /> |
172 <param name="oneOff" value="false" /> | 288 <param name="oneOff" value="false" /> |
173 <param name="minScore" value="30" /> | 289 <param name="minScore" value="30" /> |
174 <param name="maxGap" value="2" /> | 290 <param name="maxGap" value="2" /> |
175 <param name="trimT" value="false" /> | 291 <param name="trimT" value="false" /> |
176 <param name="noTrimA" value="false" /> | 292 <param name="noTrimA" value="false" /> |
177 <param name="fine" value="false" /> | 293 <param name="fine" value="false" /> |
178 <param name="maxIntron" value="750000" /> | 294 <param name="maxIntron" value="750000" /> |
179 <param name="extendThroughN" value="false" /> | 295 <param name="extendThroughN" value="false" /> |
180 <param name="mask" value="file.out" /> | 296 <conditional name="mask_type"> |
181 <param name="mask_file" value="dbia3/dbia3_RM.out" /> | 297 <param name="mask" value="file.out" /> |
182 <param name="out" value="psl -noHead" /> | 298 <param name="mask_file" value="dbia3/dbia3_RM.out" /> |
299 </conditional> | |
300 <param name="out" value="psl" ftype="tabular" /> | |
183 <output name="output" value="dbia3/dbia3_masked.sorted.psl"/> | 301 <output name="output" value="dbia3/dbia3_masked.sorted.psl"/> |
302 <assert_command> | |
303 <has_text text="-tileSize=" negate="true"/> | |
304 <has_text text="-stepSize=" negate="true"/> | |
305 <has_text text="-mask='/"/> | |
306 </assert_command> | |
184 </test> | 307 </test> |
185 </tests> | 308 <!-- tiny test data from https://davetang.org/muse/2012/05/15/using-blat/ --> |
309 <test> | |
310 <conditional name="reference_source"> | |
311 <param name="reference_source_selector" value="history" /> | |
312 <param name="database" value="mini-db.fa.gz" ftype="fasta.gz" /> | |
313 </conditional> | |
314 <param name="query" value="mini-query.fa.gz" ftype="fasta.gz"/> | |
315 <param name="minScore" value="0" /> | |
316 <section name="adv_align"> | |
317 <param name="stepSize" value="1"/> | |
318 </section> | |
319 <param name="out" value="psl" ftype="tabular" /> | |
320 <output name="output"> | |
321 <assert_contents> | |
322 <has_n_lines n="7"/> | |
323 </assert_contents> | |
324 </output> | |
325 <assert_command> | |
326 <has_text text="-minScore=0"/> | |
327 <has_text text="-stepSize=1"/> | |
328 </assert_command> | |
329 </test> </tests> | |
186 <help> | 330 <help> |
187 <![CDATA[ | 331 <![CDATA[ |
188 BLAT | 332 BLAT |
189 ==== | 333 ==== |
190 BLAT is a bioinformatics software a tool which performs rapid mRNA/DNA and cross-species protein alignments. | 334 BLAT is a bioinformatics software a tool which performs rapid sequence alignments (mRNA/DNA and cross-species protein). |
191 | 335 It is designed to find sequences of high similarity and have a certain minimum length. With the default setting this is |
192 blat (version: v36)- Standalone blat sequence search command line tool. | 336 |
193 ------------------------------------------------------------------------- | 337 - >95% similarity and a minimum length of 25 bases for nucleotide sequences |
194 | 338 - >80% similarity and a minimum lenth of 20 amino acids for proteins |
195 usage: | 339 |
196 ++++++ | 340 More divergent or shorter sequence alignments may be missed. |
197 | 341 The algorithm works in two phases: |
198 $ blat database query [-ooc=11.ooc] output.psl | 342 |
199 | 343 1. Search phase: find regions of probable homology using an index of the reference sequence |
200 where: | 344 2. Alignment phase: Detailed Alignment of the sequences in these regions |
201 database and query are each either a .fa, .nib or .2bit file, | 345 |
202 or a list of these files with one file name per line. | 346 Search phase |
203 -ooc=11.ooc tells the program to load over-occurring 11-mers from | 347 ++++++++++++ |
204 an external file. This will increase the speed | 348 |
205 by a factor of 40 in many cases, but is not required. | 349 Builds an index of the reference containing the nonoverlapping K-mers and their |
206 output.psl is the name of the output file. | 350 positions (by default, can be changed using `-tileSize` and `-stepSize`). Hits, |
207 | 351 i.e. exactly matching k-mers in query and reference, are then found by looking |
208 documentation: | 352 up each overlapping K-mer of the query sequence. By enabling `-oneOff` the |
353 algorithm allows for a single substitition. Note that this increases the run | |
354 time of this phase significantly. | |
355 | |
356 The hits are then split into buckets of 64k (based on the database position) | |
357 and sorted on the diagonal (database minus query positions). Hits within the | |
358 gap limit form so called proto-clumps. Those are then sorted by database position | |
359 and put into clumps if they are within the window limit (wrt database coordinate). | |
360 | |
361 Clumps with less than the minimum number of hits are discarded (-minMatch) and | |
362 those within 300 bases or 100 amino acids in the database are merged together. | |
363 The resulting clumps define regions of the database which are homologous to the | |
364 query sequence which are then aligned. | |
365 | |
366 Alignment phase | |
367 +++++++++++++++ | |
368 | |
369 The alignment is performed differently for nucleotide and | |
370 aminoacid sequences. | |
371 | |
372 **Alignment for nucleotide sequences**: A hit list (exactly matching k-mers) for | |
373 the query and the homologous region of the database is generated. If necessary | |
374 hits are mode unique by extending them until they are unique or have a maximum | |
375 size. The hits are then extended maximally allowing no mismatches, and overlapping | |
376 hits are merged. | |
377 Subsequent (wrt query and reference) extended hits are then linked in an | |
378 alignment. If there are gaps in query and reference, the algorithm recurses | |
379 using a smaller value for k until no additional hits are found or gaps are | |
380 smaller than 6 bases. | |
381 | |
382 **Protein Alignments**: The hits from the search stage are extended into maximally | |
383 scoring ungapped alignments (HSPs) (match cost 2 and mismatch cost 1). The HSPs | |
384 are organized in a directed graph where an edge connect HSPs A and B if A starts | |
385 before B wrt query and database coordinates. The weight of the edge is then | |
386 defined as the score of B minus a gap penalty based on the distance between A | |
387 and B (overlapping HSPs are treated differently, see Kent 2002). The maximal | |
388 scoring alignment is then determined as the maximum weight path through the | |
389 graph and the HSPs of this path are removed. This is repeated until no HSPs are | |
390 left. | |
391 | |
392 **Stitching and Filling In**: | |
393 In order to find also alignments of genes scattered across multiple homologous | |
394 regions that have been determined in the search phase a variation of the | |
395 alignment algorithm for proteins is employed. For details see Kent 2002. | |
396 | |
397 Documentation: | |
209 ++++++++++++++ | 398 ++++++++++++++ |
210 | 399 |
211 See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html) | 400 See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html) |
212 | 401 |
213 Source code: | 402 Source code: |