0
|
1 <tool id="blast_reciprocal_best_hits" name="BLAST Reciprocal Best Hits (RBH)" version="0.1.0">
|
|
2 <description>from two FASTA files</description>
|
|
3 <requirements>
|
|
4 <requirement type="binary">makeblastdb</requirement>
|
|
5 <requirement type="binary">blastp</requirement>
|
|
6 <requirement type="binary">blastn</requirement>
|
|
7 <requirement type="package" version="2.2.29">blast+</requirement>
|
|
8 </requirements>
|
|
9 <version_command interpreter="python">
|
|
10 blast_rbh.py --version
|
|
11 </version_command>
|
|
12 <command interpreter="python">
|
|
13 blast_rbh.py "$fasta_a" "$fasta_b" $seq.dbtype
|
|
14 #if $seq.dbtype=="nucl"
|
|
15 $seq.nucl_type
|
|
16 #else
|
|
17 $seq.prot_type
|
|
18 #end if
|
|
19 $identity $q_cover "$output"
|
|
20 </command>
|
|
21 <inputs>
|
|
22 <!-- Galaxy does not have sub-types for protein vs nucletide FASTA -->
|
|
23 <param name="fasta_a" type="data" format="fasta"
|
|
24 label="Genes/proteins from species A"
|
|
25 description="FASTA file, one sequence per gene/protein." />
|
|
26 <param name="fasta_b" type="data" format="fasta"
|
|
27 label="Genes/proteins from species B"
|
|
28 description="FASTA file, one sequence per gene/protein." />
|
|
29 <conditional name="seq">
|
|
30 <param name="dbtype" type="select" label="Molecule type of FASTA inputs">
|
|
31 <option value="prot">protein</option>
|
|
32 <option value="nucl">nucleotide</option>
|
|
33 </param>
|
|
34 <when value="prot">
|
|
35 <param name="prot_type" type="select" display="radio" label="Type of BLAST">
|
|
36 <option value="blastp">blastp - Traditional BLASTP to compare a protein query to a protein database</option>
|
|
37 <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option>
|
|
38 </param>
|
|
39 </when>
|
|
40 <when value="nucl">
|
|
41 <param name="nucl_type" type="select" display="radio" label="Type of BLAST">
|
|
42 <option value="megablast">megablast - Traditional megablast used to find very similar (e.g., intraspecies or closely related species) sequences</option>
|
|
43 <option value="blastn">blastn - Traditional BLASTN requiring an exact match of 11, for somewhat similar sequences</option>
|
|
44 <option value="blastn-short">blastn-short - BLASTN program optimized for sequences shorter than 50 bases</option>
|
|
45 <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option>
|
|
46 </param>
|
|
47 </when>
|
|
48 </conditional>
|
|
49 <param name="identity" type="float" value="70" min="0" max="100"
|
|
50 label="Minimum percentage identity for BLAST matches"
|
|
51 help="Default is 70%, use 0 for no filtering." />
|
|
52 <param name="q_cover" type="float" value="50" min="0" max="100"
|
|
53 label="Minimum percentage query coverage for BLAST matches"
|
|
54 help="Default is 50%, use 0 for no filtering." />
|
|
55 </inputs>
|
|
56 <outputs>
|
|
57 <data name="output" format="tabular" label="BLAST RBH: $fasta_a.name vs $fasta_b.name" />
|
|
58 </outputs>
|
|
59 <requirements>
|
|
60 </requirements>
|
|
61 <tests>
|
|
62 <test>
|
1
|
63 <param name="fasta_a" value="four_human_proteins.fasta" ftype="fasta"/>
|
|
64 <param name="fasta_b" value="rhodopsin_proteins.fasta" ftype="fasta"/>
|
|
65 <param name="dbtype" value="prot"/>
|
|
66 <param name="nucl_type" value="blastp"/>
|
|
67 <param name="identity" value="0.0"/>
|
|
68 <param name="q_cover" value="0.0"/>
|
|
69 <output name="output" file="rbh_blastp_four_human_vs_rhodopsin_proteins.tabular" ftype="tabular"/>
|
|
70 </test>
|
|
71 <test>
|
0
|
72 <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/>
|
|
73 <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/>
|
|
74 <param name="dbtype" value="nucl"/>
|
|
75 <param name="nucl_type" value="megablast"/>
|
|
76 <param name="identity" value="0.0"/>
|
|
77 <param name="q_cover" value="0.0"/>
|
|
78 <output name="output" file="rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular" ftype="tabular"/>
|
|
79 </test>
|
|
80 <test>
|
|
81 <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/>
|
|
82 <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/>
|
|
83 <param name="dbtype" value="nucl"/>
|
|
84 <param name="nucl_type" value="megablast"/>
|
|
85 <param name="identity" value="92"/>
|
|
86 <param name="q_cover" value="86"/>
|
|
87 <output name="output" file="rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular" ftype="tabular"/>
|
|
88 </test>
|
|
89 <!-- push the percentage identity over the 92.07% level -->
|
|
90 <test>
|
|
91 <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/>
|
|
92 <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/>
|
|
93 <param name="dbtype" value="nucl"/>
|
|
94 <param name="nucl_type" value="megablast"/>
|
|
95 <param name="identity" value="92.5"/>
|
|
96 <param name="q_cover" value="86"/>
|
|
97 <output name="output" file="rbh_none.tabular" ftype="tabular"/>
|
|
98 </test>
|
|
99 <!-- push the coverage over the 86% level -->
|
|
100 <test>
|
|
101 <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/>
|
|
102 <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/>
|
|
103 <param name="dbtype" value="nucl"/>
|
|
104 <param name="nucl_type" value="megablast"/>
|
|
105 <param name="identity" value="92"/>
|
|
106 <param name="q_cover" value="87"/>
|
|
107 <output name="output" file="rbh_none.tabular" ftype="tabular"/>
|
|
108 </test>
|
|
109 <test>
|
|
110 <param name="fasta_a" value="three_human_mRNA.fasta" ftype="fasta"/>
|
|
111 <param name="fasta_b" value="rhodopsin_nucs.fasta" ftype="fasta"/>
|
|
112 <param name="dbtype" value="nucl"/>
|
|
113 <param name="nucl_type" value="blastn"/>
|
|
114 <param name="identity" value="0.0"/>
|
|
115 <param name="q_cover" value="0.0"/>
|
|
116 <output name="output" file="rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular" ftype="tabular"/>
|
|
117 </test>
|
|
118 </tests>
|
|
119 <help>
|
|
120 **What it does**
|
|
121
|
2
|
122 Takes two FASTA files (*species A* and *species B*), builds a BLAST database
|
0
|
123 for each, runs reciprocal BLAST searchs (*A vs B*, and *B vs A*), optionally
|
2
|
124 filters the HSPs, and then compiles a list of the reciprocal best hits (RBH).
|
0
|
125
|
2
|
126 The output from this tool is a tabular file containing eight columns, with
|
|
127 information about the BLAST matches used:
|
0
|
128
|
2
|
129 ====== =================================
|
0
|
130 Column Description
|
2
|
131 ------ ---------------------------------
|
|
132 1 ID from *species A*
|
|
133 2 ID from *species B*
|
0
|
134 3 Bitscore from *A vs B*
|
2
|
135 4 Percentage identity from *A vs B*
|
|
136 5 Query coverage from *A vs B*
|
|
137 6 Bitscore from *B vs A*
|
|
138 7 Percentage identity from *B vs A*
|
|
139 8 Query coverage from *B vs A*
|
|
140 ====== =================================
|
|
141
|
|
142 These values correspond to the ``bitscore``, ``pident`` and ``qcovhsp``
|
|
143 values in the BLAST+ tabular output.
|
0
|
144
|
|
145 .. class:: warningmark
|
|
146
|
|
147 **Note**
|
|
148
|
|
149 If you are trying to use BLAST RBH matches to identify candidate orthologues
|
|
150 or transfer annotation, you *must* use a percentage identity and minimum
|
|
151 coverage threshold or similiar. See:
|
|
152
|
|
153 Punta and Ofran (2008) The Rough Guide to In Silico Function Prediction,
|
|
154 or How To Use Sequence and Structure Information To Predict Protein
|
|
155 Function. PLoS Comput Biol 4(10): e1000160.
|
|
156 http://dx.doi.org/10.1371/journal.pcbi.1000160
|
|
157
|
|
158 The defaults are to require 70% sequence identity over the aligned region
|
|
159 (using ``pident`` in the BLAST+ tabular output), and that the HSP alignment
|
|
160 covers at least 50% of the query sequence (using ``qcovhsp`` in the BLAST+
|
|
161 tabular output).
|
|
162
|
|
163
|
|
164 **References**
|
|
165
|
|
166 A specific paper covering this tool is planned, but please also cite:
|
|
167
|
|
168 Christiam Camacho et al. (2009).
|
|
169 BLAST+: architecture and applications.
|
|
170 BMC Bioinformatics. 15;10:421.
|
|
171 http://dx.doi.org/10.1186/1471-2105-10-421
|
|
172
|
|
173 This wrapper is available to install into other Galaxy Instances via the Galaxy
|
|
174 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blast_rbh
|
|
175 </help>
|
|
176 </tool>
|