Repository 'ncbi_blast_plus'
hg clone https://testtoolshed.g2.bx.psu.edu/repos/peterjc/ncbi_blast_plus

Changeset 48:b19b6addd05c (2015-01-27)
Previous changeset 47:d0de6862cda1 (2014-10-31)
Commit message:
Uploaded v0.1.02 preview 1, using tool_data_table_conf.xml for loc files, etc
modified:
test-data/blastn_rhodopsin_vs_three_human.xml
test-data/blastp_four_human_vs_rhodopsin.xml
test-data/blastx_rhodopsin_vs_four_human.xml
test-data/blastx_rhodopsin_vs_four_human_all.tabular
test-data/four_human_proteins.fasta
test-data/tblastn_four_human_vs_rhodopsin.html
test-data/tblastn_four_human_vs_rhodopsin.xml
tools/ncbi_blast_plus/README.rst
tools/ncbi_blast_plus/blastxml_to_tabular.py
tools/ncbi_blast_plus/blastxml_to_tabular.xml
tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml
tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
tools/ncbi_blast_plus/ncbi_macros.xml
tools/ncbi_blast_plus/ncbi_makeblastdb.xml
tools/ncbi_blast_plus/ncbi_makeprofiledb.xml
tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
tools/ncbi_blast_plus/repository_dependencies.xml
tools/ncbi_blast_plus/tool_dependencies.xml
added:
test-data/blastdb.loc
test-data/blastdb_d.loc
test-data/blastdb_p.loc
test-data/blastn_chimera_vs_three_human_max1.tabular
test-data/blastn_chimera_vs_three_human_max1.txt
test-data/chimera.fasta
test-data/four_human_proteins.dbinfo.txt
test-data/three_human_mRNA.dbinfo.txt
test-data/three_human_mRNA.fasta.log.txt
test-data/three_human_mRNA.fasta.nhd
test-data/three_human_mRNA.fasta.nhi
test-data/three_human_mRNA.fasta.nhr
test-data/three_human_mRNA.fasta.nin
test-data/three_human_mRNA.fasta.nog
test-data/three_human_mRNA.fasta.nsd
test-data/three_human_mRNA.fasta.nsi
test-data/three_human_mRNA.fasta.nsq
test-data/tool_data_table_conf.xml.test
tool-data/tool_data_table_conf.xml.sample
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/blastdb.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastdb.loc Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,7 @@
+# This is a test file distributed with the Galaxy BLAST+ wrapper for
+# defining a list of nucleotide BLAST databases used in functional
+# tests for blastn etc.
+#
+# See the file tool-data/blastdb.loc.sample for more information.
+#
+three_human_mRNA Three Human mRNAs ${__HERE__}/three_human_mRNA.fasta
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/blastdb_d.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastdb_d.loc Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,7 @@
+# This is a test file distributed with the Galaxy BLAST+ wrapper for
+# defining a list of protein domain BLAST databases used in functional
+# tests of rpsblast etc.
+#
+# See the file tool-data/blastdb_d.loc.sample for more information.
+#
+cd00003_and_cd00008 Domains CD00003 (PNPsynthase) and CD00008 (PIN_53EXO-like) ${__HERE__}/cd00003_and_cd00008
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/blastdb_p.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastdb_p.loc Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,8 @@
+# This is a test file distributed with the Galaxy BLAST+ wrapper for
+# defining a list of protein BLAST databases used in functional tests
+# for blastp etc.
+#
+# See the file tool-data/blastdb_p.loc.sample for more information.
+#
+four_human_proteins Four Human Proteins (no taxid) ${__HERE__}/four_human_proteins.fasta
+four_human_proteins_taxid Four Human Proteins (with taxid) ${__HERE__}/four_human_proteins_taxid.fasta
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/blastn_chimera_vs_three_human_max1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_chimera_vs_three_human_max1.tabular Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,1 @@
+chimera ENA|AB011145|AB011145.1 100.00 4560 0 0 1 4560 121 4680 0.0  8421
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/blastn_chimera_vs_three_human_max1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_chimera_vs_three_human_max1.txt Tue Jan 27 06:06:18 2015 -0500
b
b'@@ -0,0 +1,356 @@\n+BLASTN 2.2.30+\n+\n+\n+Reference: Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb\n+Miller (2000), "A greedy algorithm for aligning DNA sequences", J\n+Comput Biol 2000; 7(1-2):203-14.\n+\n+\n+\n+Database: Just 3 human mRNA sequences\n+           3 sequences; 10,732 total letters\n+\n+\n+\n+Query= chimera chunks of AB011145 plus M10051 plus BC112106\n+\n+Length=9973\n+                                                                      Score     E\n+Sequences producing significant alignments:                          (Bits)  Value\n+\n+  ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein,...   8421   0.0  \n+\n+\n+> ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, \n+partial cds.\n+Length=4796\n+\n+ Score =  8421 bits (4560),  Expect = 0.0\n+ Identities = 4560/4560 (100%), Gaps = 0/4560 (0%)\n+ Strand=Plus/Plus\n+\n+Query  1     GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC  60\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  121   GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC  180\n+\n+Query  61    CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA  120\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  181   CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA  240\n+\n+Query  121   TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC  180\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  241   TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC  300\n+\n+Query  181   TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT  240\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  301   TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT  360\n+\n+Query  241   TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA  300\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  361   TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA  420\n+\n+Query  301   CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA  360\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  421   CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA  480\n+\n+Query  361   TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA  420\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  481   TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA  540\n+\n+Query  421   CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC  480\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  541   CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC  600\n+\n+Query  481   TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA  540\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  601   TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA  660\n+\n+Query  541   TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT  600\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  661   TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT  720\n+\n+Query  601   TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG  660\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  721   TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG  780\n+\n+Query  661   GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA  720\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  781   GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA  840\n+\n+Query  721   TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA  780\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  841   TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA  900\n+\n+Query  781   ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAA'..b'3780\n+\n+Query  3661  GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT  3720\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  3781  GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT  3840\n+\n+Query  3721  TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT  3780\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  3841  TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT  3900\n+\n+Query  3781  GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT  3840\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  3901  GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT  3960\n+\n+Query  3841  TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG  3900\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  3961  TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG  4020\n+\n+Query  3901  GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA  3960\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4021  GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA  4080\n+\n+Query  3961  CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA  4020\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4081  CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA  4140\n+\n+Query  4021  CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA  4080\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4141  CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA  4200\n+\n+Query  4081  GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA  4140\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4201  GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA  4260\n+\n+Query  4141  GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT  4200\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4261  GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT  4320\n+\n+Query  4201  GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG  4260\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4321  GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG  4380\n+\n+Query  4261  GAAACTTTTAGATGACATTCTACAAATTAtttttttCTTTAAATTAAAAGAACCTAGCCA  4320\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4381  GAAACTTTTAGATGACATTCTACAAATTATTTTTTTCTTTAAATTAAAAGAACCTAGCCA  4440\n+\n+Query  4321  ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA  4380\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4441  ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA  4500\n+\n+Query  4381  AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA  4440\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4501  AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA  4560\n+\n+Query  4441  ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA  4500\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4561  ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA  4620\n+\n+Query  4501  GGTTGTCAAGAAGGCttttttttttttcttttttAAACCTGAGGGCAAAAAGGAATGGAT  4560\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4621  GGTTGTCAAGAAGGCTTTTTTTTTTTTCTTTTTTAAACCTGAGGGCAAAAAGGAATGGAT  4680\n+\n+\n+\n+Lambda      K        H\n+    1.33    0.621     1.12 \n+\n+Gapped\n+Lambda      K        H\n+    1.28    0.460    0.850 \n+\n+Effective search space used: 106299490\n+\n+\n+  Database: Just 3 human mRNA sequences\n+    Posted date:  Dec 26, 2014  5:54 AM\n+  Number of letters in database: 10,732\n+  Number of sequences in database:  3\n+\n+\n+\n+Matrix: blastn matrix 1 -2\n+Gap Penalties: Existence: 0, Extension: 2.5\n'
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/blastn_rhodopsin_vs_three_human.xml
--- a/test-data/blastn_rhodopsin_vs_three_human.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/test-data/blastn_rhodopsin_vs_three_human.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>blastn</BlastOutput_program>
-  <BlastOutput_version>BLASTN 2.2.29+</BlastOutput_version>
+  <BlastOutput_version>BLASTN 2.2.30+</BlastOutput_version>
   <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), &quot;A greedy algorithm for aligning DNA sequences&quot;, J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/blastp_four_human_vs_rhodopsin.xml
--- a/test-data/blastp_four_human_vs_rhodopsin.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>blastp</BlastOutput_program>
-  <BlastOutput_version>BLASTP 2.2.29+</BlastOutput_version>
+  <BlastOutput_version>BLASTP 2.2.30+</BlastOutput_version>
   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID>
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/blastx_rhodopsin_vs_four_human.xml
--- a/test-data/blastx_rhodopsin_vs_four_human.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/test-data/blastx_rhodopsin_vs_four_human.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>blastx</BlastOutput_program>
-  <BlastOutput_version>BLASTX 2.2.29+</BlastOutput_version>
+  <BlastOutput_version>BLASTX 2.2.30+</BlastOutput_version>
   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/blastx_rhodopsin_vs_four_human_all.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_all.tabular Fri Oct 31 11:43:23 2014 -0400
+++ b/test-data/blastx_rhodopsin_vs_four_human_all.tabular Tue Jan 27 06:06:18 2015 -0500
b
b'@@ -1,10 +1,10 @@\n-gi|57163782|ref|NM_001009242.1|\tsp|P08100|OPSD_HUMAN\t96.55\t348\t12\t0\t1\t1044\t1\t348\t0.0\t  639\tsp|P08100|OPSD_HUMAN\t1647\t336\t343\t0\t98.56\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\t1047\t348\tN/A\t0\tgi|57163782|ref|NM_001009242.1|\tgi|57163782|ref|NM_001009242.1|\t0\t0\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tN/A\tN/A\t1/0\t15KA71FL4TS79VA9ML34VI71LI7SASA21LI13TA1GV11\t99\t99\tN/A\tN/A\tN/A\tN/A\tN/A\n-gi|2734705|gb|U59921.1|BBU59921\tsp|P08100|OPSD_HUMAN\t85.24\t332\t49\t0\t42\t1037\t1\t332\t0.0\t  551\tsp|P08100|OPSD_HUMAN\t1419\t283\t315\t0\t94.88\t3\t0\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE\t1574\t348\tN/A\t0\tgi|2734705|gb|U59921.1|BBU59921\tgi|2734705|gb|U59921.1|BBU59921\t0\t0\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tN/A\tN/A\t3/0\t10IV1MF2KA20YF1IM1CA7LV7ML5IV17FV1NDHL4CG3VS1ML1ST1MLNH3IVLF1AP3YNVL36SG4VI7IV3SA2VA3LA9ML5VI26LM8RQ2CF30VIFA13FY2STNH3EN7VI7SASA9LM8IL2LI6FL1ED1DE\t63\t63\tN/A\tN/A\tN/A\tN/A\tN/A\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t96.40\t111\t4\t0\t1\t333\t11\t121\t3e-67\t  220\tsp|P08100|OPSD_HUMAN\t561\t107\t109\t0\t98.20\t1\t0\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG\t4301\t348\tN/A\t0\tgi|283855845|gb|GQ290303.1|\tgi|283855845|gb|GQ290303.1|\t0\t0\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tN/A\tN/A\t1/0\t5KA9HY61FL4TS28\t22\t8\tN/A\tN/A\tN/A\tN/A\tN/A\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t92.31\t65\t5\t0\t3174\t3368\t248\t312\t2e-35\t  127\tsp|P08100|OPSD_HUMAN\t319\t60\t64\t0\t98.46\t3\t0\tKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ\tKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ\t4301\t348\tN/A\t0\tgi|283855845|gb|GQ290303.1|\tgi|283855845|gb|GQ290303.1|\t0\t0\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tN/A\tN/A\t3/0\t18LV3GS19LI7SASA13\t22\t5\tN/A\tN/A\tN/A\tN/A\tN/A\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t91.53\t59\t5\t0\t2855\t3031\t177\t235\t2e-33\t  121\tsp|P08100|OPSD_HUMAN\t304\t54\t57\t0\t96.61\t2\t0\tRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS\tRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA\t4301\t348\tN/A\t0\tgi|283855845|gb|GQ290303.1|\tgi|283855845|gb|GQ290303.1|\t0\t0\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tN/A\tN/A\t2/0\t6ML34VI14VARASA\t22\t4\tN/A\tN/A\tN/A\tN/A\tN/A\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t93.22\t59\t4\t0\t1404\t1580\t119\t177\t1e-25\t97.1\tsp|P08100|OPSD_HUMAN\t240\t55\t56\t0\t94.92\t3\t0\tLAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR\tLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR\t4301\t348\tN/A\t0\tgi|283855845|gb|GQ290303.1|\tgi|283855845|gb|GQ29030'..b'7.1\tsp|P08100|OPSD_HUMAN\t240\t55\t56\t0\t94.92\t3\t0\tLAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR\tLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR\t4301\t348\tN/A\t0\tgi|283855845|gb|GQ290303.1|\tgi|283855845|gb|GQ290303.1|\t0\t0\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tN/A\tN/A\t3/0\t1AG36LV1LF13VA4\t22\t1\tN/A\tN/A\tN/A\tN/A\tN/A\n+gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t88.46\t26\t3\t0\t4222\t4299\t312\t337\t1e-12\t57.0\tsp|P08100|OPSD_HUMAN\t136\t23\t24\t0\t92.31\t1\t0\tQFRNCMLTTLCCGKNPLGDDEASTTA\tQFRNCMLTTICCGKNPLGDDEASATV\t4301\t348\tN/A\t0\tgi|283855845|gb|GQ290303.1|\tgi|283855845|gb|GQ290303.1|\t0\t0\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tN/A\tN/A\t1/0\t9LI13TA1AV\t22\t1\tN/A\tN/A\tN/A\tN/A\tN/A\n+gi|283855822|gb|GQ290312.1|\tsp|P08100|OPSD_HUMAN\t95.09\t326\t16\t0\t1\t978\t11\t336\t0.0\t  589\tsp|P08100|OPSD_HUMAN\t1518\t310\t322\t0\t98.77\t1\t0\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT\t983\t348\tN/A\t0\tgi|283855822|gb|GQ290312.1|\tgi|283855822|gb|GQ290312.1|\t0\t0\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tN/A\tN/A\t1/0\t5KA66ND4FL4TS5ML7AP49LV25ML34VI40VI6LV19VI11SASA21LI13TA1\t99\t33\tN/A\tN/A\tN/A\tN/A\tN/A\n+gi|18148870|dbj|AB062417.1|\tsp|P08100|OPSD_HUMAN\t93.39\t348\t23\t0\t1\t1044\t1\t348\t0.0\t  619\tsp|P08100|OPSD_HUMAN\t1596\t325\t337\t0\t96.84\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\t1047\t348\tN/A\t0\tgi|18148870|dbj|AB062417.1|\tgi|18148870|dbj|AB062417.1|\t0\t0\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tN/A\tN/A\t1/0\t15KA9AY22MV38FL4TS79VA9ML10PLHKEP1TV14IT2LM1VI47LV3GS11DN14TSSA1VI17VL2LI13TA13\t99\t33\tN/A\tN/A\tN/A\tN/A\tN/A\n+gi|12583664|dbj|AB043817.1|\tsp|P08100|OPSD_HUMAN\t81.68\t333\t61\t0\t23\t1021\t1\t333\t0.0\t  532\tsp|P08100|OPSD_HUMAN\t1371\t272\t307\t0\t92.19\t2\t0\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA\t1344\t348\tN/A\t0\tgi|12583664|dbj|AB043817.1|\tgi|12583664|dbj|AB043817.1|\t0\t0\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tsp|P08100|OPSD_HUMAN\tN/A\tN/A\t2/0\t10IV1MF22AQ2AM1SA4FL2IVAL12IVEQ23FL4TS1ML3ML12IL14CS8WYMV5VMTS6SN6MAVF2TV6LA3FA20RLAK1GEIV10TVCV2ST2LMAIVI1SF4RQ2CF12EA3RK2RK6VI4SA2VI9WF8TN9SA5SA1LI3MV3CM6HN2IL2LI6FLEGEDEDDEGA\t74\t25\tN/A\tN/A\tN/A\tN/A\tN/A\n'
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/chimera.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chimera.fasta Tue Jan 27 06:06:18 2015 -0500
b
b'@@ -0,0 +1,168 @@\n+>chimera chunks of AB011145 plus M10051 plus BC112106\n+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC\n+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA\n+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC\n+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT\n+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA\n+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA\n+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA\n+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC\n+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA\n+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT\n+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG\n+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA\n+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA\n+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG\n+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA\n+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC\n+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT\n+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA\n+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC\n+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA\n+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG\n+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC\n+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC\n+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA\n+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT\n+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA\n+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT\n+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT\n+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA\n+TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA\n+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA\n+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG\n+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG\n+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT\n+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT\n+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT\n+TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG\n+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA\n+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT\n+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC\n+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT\n+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT\n+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT\n+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT\n+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT\n+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA\n+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA\n+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA\n+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT\n+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT\n+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT\n+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT\n+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA\n+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT\n+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT\n+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT\n+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT\n+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG\n+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC\n+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT\n+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA\n+GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT\n+TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT\n+GAGAGGGAATAATCTGAGC'..b'GGGGACAAGGCATCC\n+TGTGAAAATGAGTTACTTAAATTTTCTTACATTCGGACATCTTTTGACAAGATCTTGCTG\n+AGATGGGAGCCGTACTGGCCCCCCGACTTCCGAGACCTCTTGGGGTTCATGCTGTTCTAC\n+AAAGAGGCCCCTTATCAGAATGTGACGGAGTTCGATGGGCAGGATGCGTGTGGTTCCAAC\n+AGTTGGACGGTGGTAGACATTGACCCACCCCTGAGGTCCAACGACCCCAAATCACAGAAC\n+CACCCAGGGTGGCTGATGCGGGGTCTCAAGCCCTGGACCCAGTATGCCATCTTTGTGAAG\n+ACCCTGGTCACCTTTTCGGATGAACGCCGGACCTATGGGGCCAAGAGTGACATCATTTAT\n+GTCCAGACAGATGCCACCAACCCCTCTGTGCCCCTGGATCCAATCTCAGTGTCTAACTCA\n+TCATCCCAGATTATTCTGAAGTGGAAACCACCCTCCGACCCCAATGGCAACATCACCCAC\n+TACCTGGTTTTCTGGGAGAGGCAGGCGGAAGACAGTGAGCTGTTCGAGCTGGATTATTGC\n+CTCAAAGGGCTGAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT\n+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG\n+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT\n+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC\n+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG\n+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG\n+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC\n+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC\n+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT\n+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG\n+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG\n+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT\n+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC\n+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT\n+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT\n+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT\n+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC\n+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG\n+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG\n+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC\n+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC\n+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA\n+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG\n+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA\n+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC\n+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC\n+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA\n+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT\n+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA\n+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG\n+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT\n+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC\n+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC\n+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG\n+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT\n+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT\n+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG\n+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC\n+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT\n+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC\n+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT\n+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG\n+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC\n+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA\n+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC\n+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC\n+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG\n+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC\n+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG\n+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA\n+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG\n+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT\n+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG\n+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA\n+GCCATCCCACCAG\n'
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/four_human_proteins.dbinfo.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.dbinfo.txt Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,7 @@
+Database: Just 4 human proteins
+ 4 sequences; 3,297 total residues
+
+Date: Feb 10, 2014  6:40 PM Longest sequence: 1,382 residues
+
+Volumes:
+ /mnt/galaxy/galaxy_blast/test-data/four_human_proteins_taxid.fasta
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/four_human_proteins.fasta
--- a/test-data/four_human_proteins.fasta Fri Oct 31 11:43:23 2014 -0400
+++ b/test-data/four_human_proteins.fasta Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,61 +1,48 @@
 >sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
-MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
-SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
-REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
-VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
-CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
-CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
-HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFP
+NENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSK
+RNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPV
+IAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLL
+RDRDEL
 >sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
-MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEG
-GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
-DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
-LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
-KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
-DPEHRPDIFQVSYFAFKFAKKDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDT
-IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
-ILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ
-QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQY
-QQAFFQQQMLAQHQPSQQQASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
-ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEELLDREFDLLRSNRLEERASSD
-KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
-QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPE
-NLGHRPLLMDSEDEEEEEKHSSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
-QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
-APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
-EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARR
-HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
-WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
-SQQSQPVELDPFGAAPFPSKQ
+MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEGGFSTVFLVRTHGGIRCALKR
+MYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFC
+DTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQVSYFAFKFAK
+KDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDTIGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLA
+PGEFGNHRPKGALRPGNGPEILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ
+QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQYQQAFFQQQMLAQHQPSQQQA
+SPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSVADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEEL
+LDREFDLLRSNRLEERASSDKNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPENLGHRPLLMDSEDEEEEEKH
+SSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSAQLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNL
+PQHRFPAAGLEQEEFDVFTKAPFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARRHKKVGRRDSQSSNEFLTISD
+SKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLSWHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKM
+DDFGAVPFTELVVQSITPHQSQQSQPVELDPFGAAPFPSKQ
 >sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
-MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
-QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
-VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
-ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
-GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
-CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
-TVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETL
-EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
-RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
-NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
-DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
-RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
-KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAF
-PNTSSTSVPTSPEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
-SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
-SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
-PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
-EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
-FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
-AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
-RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
-CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEME
-FEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
-PS
+MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPK
+LIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDW
+SRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYT
+MNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRS
+YALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVD
+IDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIIL
+KWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPFEK
+VVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGL
+IVLYEVSYRRYGDEELHLCVSRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSREKITLLRELGQGSFGMVYEG
+NARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKGFTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRP
+EAENNPGRPPPTLQEMIQMAAEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDNCPERVTDLMRMCWQFNPKMR
+PTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEMEFEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIP
+YTHMNGGKKNGRILTLPRSNPS
 >sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
-VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
-GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
-EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES
-ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
-YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA
+VADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFT
+WVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES
+ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTT
+ICCGKNPLGDDEASATVSKTETSQVAPA
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/tblastn_four_human_vs_rhodopsin.html
--- a/test-data/tblastn_four_human_vs_rhodopsin.html Fri Oct 31 11:43:23 2014 -0400
+++ b/test-data/tblastn_four_human_vs_rhodopsin.html Tue Jan 27 06:06:18 2015 -0500
b
@@ -3,7 +3,7 @@
 <BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
 <PRE>
 
-<b>TBLASTN 2.2.29+</b>
+<b>TBLASTN 2.2.30+</b>
 
 
 <b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/tblastn_four_human_vs_rhodopsin.xml
--- a/test-data/tblastn_four_human_vs_rhodopsin.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/test-data/tblastn_four_human_vs_rhodopsin.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>tblastn</BlastOutput_program>
-  <BlastOutput_version>TBLASTN 2.2.29+</BlastOutput_version>
+  <BlastOutput_version>TBLASTN 2.2.30+</BlastOutput_version>
   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/three_human_mRNA.dbinfo.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.dbinfo.txt Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,7 @@
+Database: Just 3 human mRNA sequences
+ 3 sequences; 10,732 total bases
+
+Date: Dec 26, 2014  5:54 AM Longest sequence: 4,796 bases
+
+Volumes:
+ /mnt/galaxy/galaxy_blast/test-data/three_human_mRNA.fasta
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/three_human_mRNA.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta.log.txt Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,5 @@
+New DB title:  Just 3 human mRNA sequences
+Sequence type: Nucleotide
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/three_human_mRNA.fasta.nhd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta.nhd Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,3 @@
+12956943350
+13082197871
+19180330422
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/three_human_mRNA.fasta.nhi
b
Binary file test-data/three_human_mRNA.fasta.nhi has changed
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/three_human_mRNA.fasta.nhr
b
Binary file test-data/three_human_mRNA.fasta.nhr has changed
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/three_human_mRNA.fasta.nin
b
Binary file test-data/three_human_mRNA.fasta.nin has changed
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/three_human_mRNA.fasta.nog
b
Binary file test-data/three_human_mRNA.fasta.nog has changed
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/three_human_mRNA.fasta.nsd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta.nsd Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,3 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/three_human_mRNA.fasta.nsi
b
Binary file test-data/three_human_mRNA.fasta.nsi has changed
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/three_human_mRNA.fasta.nsq
b
Binary file test-data/three_human_mRNA.fasta.nsq has changed
b
diff -r d0de6862cda1 -r b19b6addd05c test-data/tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tool_data_table_conf.xml.test Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,15 @@
+<tables>
+    <!-- test files! -->
+    <table name="blastdb" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/blastdb.loc" />
+    </table>
+    <table name="blastdb_p" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/blastdb_p.loc" />
+    </table>
+    <table name="blastdb_d" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/blastdb_d.loc" />
+    </table>
+</tables>
b
diff -r d0de6862cda1 -r b19b6addd05c tool-data/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/tool_data_table_conf.xml.sample Tue Jan 27 06:06:18 2015 -0500
b
@@ -0,0 +1,14 @@
+<tables>
+    <table name="blastdb" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/blastdb.loc" />
+    </table>
+    <table name="blastdb_p" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/blastdb_p.loc" />
+    </table>
+    <table name="blastdb_d" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/blastdb_d.loc" />
+    </table>
+</tables>
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/README.rst Tue Jan 27 06:06:18 2015 -0500
b
@@ -7,7 +7,7 @@
 
 See the licence text below.
 
-Currently tested with NCBI BLAST 2.2.29+ (i.e. version 2.2.29 of BLAST+),
+Currently tested with NCBI BLAST 2.2.30+ (i.e. version 2.2.30 of BLAST+),
 and does not work with the NCBI 'legacy' BLAST suite (e.g. ``blastall``).
 
 Note that these wrappers (and the associated datatypes) were originally
@@ -20,6 +20,22 @@
 http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
 
 
+Citation
+========
+
+Please cite the following paper (currently available as a preprint):
+
+NCBI BLAST+ integrated into Galaxy.
+P.J.A. Cock, J.M. Chilton, B. Gruening, J.E. Johnson, N. Soranzo
+bioRxiv DOI: http://dx.doi.org/10.1101/014043 (preprint)
+
+You should also cite the NCBI BLAST+ tools:
+
+BLAST+: architecture and applications.
+C. Camacho et al. BMC Bioinformatics 2009, 10:421.
+DOI: http://dx.doi.org/10.1186/1471-2105-10-421
+
+
 Automated Installation
 ======================
 
@@ -35,8 +51,7 @@
 
 For those not using Galaxy's automated installation from the Tool Shed, put
 the XML and Python files in the ``tools/ncbi_blast_plus/`` folder and add the
-XML files to your ``tool_conf.xml`` as normal (and do the same in
-``tool_conf.xml.sample`` in order to run the unit tests). For example, use::
+XML files to your ``tool_conf.xml`` as normal.  For example, use::
 
   <section name="NCBI BLAST+" id="ncbi_blast_plus_tools">
     <tool file="ncbi_blast_plus/ncbi_blastn_wrapper.xml" />
@@ -61,15 +76,16 @@
 
 As described above for an automated installation, you must also tell Galaxy
 about any system level BLAST databases using the ``tool-data/blastdb*.loc``
-files.
+files. Also merge the ``tool-data/tool_data_table_conf.xml.sample`` contents
+into your ``tool_data_table_conf.xml`` file.
 
 You must install the NCBI BLAST+ standalone tools somewhere on the system
-path. Currently the unit tests are written using BLAST 2.2.29+.
+path. Currently the unit tests are written using BLAST+ 2.2.30.
 
 Run the functional tests (adjusting the section identifier to match your
 ``tool_conf.xml.sample`` file)::
 
-    ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools
+    ./run_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools
 
 Configuration
 =============
@@ -157,7 +173,7 @@
         - Set number of threads via ``$GALAXY_SLOTS`` environment variable.
         - More descriptive default output names.
         - Tests require updated BLAST DB definitions (``blast_datatypes`` v0.0.18).
-        - Pre-check for duplicate identifiers in makeblastdb wrapper.
+        - Pre-check for duplicate identifiers in ``makeblastdb`` wrapper.
         - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27.
         - Now depends on ``package_blast_plus_2_2_28`` in ToolShed.
         - Extended tabular output includes 'salltitles' as column 25.
@@ -170,8 +186,8 @@
         - More detailed descriptions for BLASTN and BLASTP task option.
         - Wrappers for segmasker, dustmasker and convert2blastmask
           (contribution from Bjoern Gruening).
-        - Supports using maskinfo with makeblastdb wrapper.
-        - Supports setting a taxonomy ID in makeblastdb wrapper.
+        - Supports using maskinfo with ``makeblastdb`` wrapper.
+        - Supports setting a taxonomy ID in ``makeblastdb`` wrapper.
         - Subtle changes like new conditional settings will require some old
           workflows be updated to cope.
 v0.1.01 - Requires ``blastdbd`` datatype (``blast_datatypes`` v0.0.19).
@@ -181,6 +197,19 @@
           domain database from the user's history.
         - Tool definitions now embed citation information (by John Chilton).
         - BLAST tools support GI and SeqID filters (added by Bjoern Gruening).
+v0.1.02 - Now depends on ``package_blast_plus_2_2_30`` in ToolShed.
+        - Tests updated for BLAST+ 2.2.30 instead of BLAST+ 2.2.29.
+        - New tasks ``blastp-fast``, ``blastx-fast`` and ``tblastn-fast``.
+        - New minimum query HSP coverage option, ``-qcov_hsp_perc``.
+        - Removed ``-word_size`` from RPS-BLAST and RPS-TBLASTN wrappers, this
+          is set during database construction and should not have been offered
+          as a command line option in releases prior to BLAST+ 2.2.30.
+        - BLAST database ``blastdb*.loc`` files now accessed via the XML
+          table definitions in Galaxy's ``tool_data_table_conf.xml`` file,
+          setup via ``tool-data/tool_data_table_conf.xml.sample``
+        - Replace ``.extra_files_path`` with ``.files_path`` (internal change,
+   thanks to Bjoern Gruening and John Chilton).
+        - Added "NCBI BLAST+ integrated into Galaxy" preprint citation.
 ======= ======================================================================
 
 
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/blastxml_to_tabular.py
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Tue Jan 27 06:06:18 2015 -0500
[
@@ -66,7 +66,7 @@
 from optparse import OptionParser
 
 if "-v" in sys.argv or "--version" in sys.argv:
-    print "v0.1.01"
+    print "v0.1.04"
     sys.exit(0)
 
 if sys.version_info[:2] >= ( 2, 5 ):
@@ -162,7 +162,7 @@
     blast_program = None
     # get an iterable
     try: 
-        context = ElementTree.iterparse(in_file, events=("start", "end"))
+        context = ElementTree.iterparse(blastxml_filename, events=("start", "end"))
     except:
         stop_err("Invalid data format.")
     # turn it into an iterator
@@ -321,7 +321,7 @@
                         #Only a subset of the columns are needed
                         values = [values[colnames.index(c)] for c in cols]
                     #print "\t".join(values) 
-                    outfile.write("\t".join(values) + "\n")
+                    output_handle.write("\t".join(values) + "\n")
             # prevents ElementTree from growing large datastructure
             root.clear()
             elem.clear()
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/blastxml_to_tabular.xml
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.1.01">
+<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.1.04">
     <description>Convert BLAST XML output to tabular</description>
     <version_command interpreter="python">blastxml_to_tabular.py --version</version_command>
     <command interpreter="python">
@@ -9,7 +9,7 @@
 #else
 -c "$output.out_format"
 #end if
-#for i in $blastxml_file#${i} #end for#
+#for i in $blastxml_file#"${i}" #end for#
     </command>
     <stdio>
         <!-- Anything other than zero is an error -->
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.1.01">
+<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.1.02">
     <description>Show BLAST database information from blastdbcmd</description>
     <macros>
         <token name="@BINARY@">blastdbcmd</token>
@@ -15,6 +15,18 @@
     <outputs>
         <data name="info" format="txt" label="${db_opts.database.fields.name} info" />
     </outputs>
+    <tests>
+        <test>
+            <param name="db_opts|db_type" value="prot" />
+            <param name="db_opts|database" value="four_human_proteins" />
+            <output name="info" file="four_human_proteins.dbinfo.txt" ftype="txt" lines_diff="4" />
+        </test>
+        <test>
+            <param name="db_opts|db_type" value="nucl" />
+            <param name="db_opts|database" value="three_human_mRNA" />
+            <output name="info" file="three_human_mRNA.dbinfo.txt" ftype="txt" lines_diff="4" />
+        </test>
+    </tests>
     <help>
     
 **What it does**
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.1.01">
+<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.1.02">
     <description>Extract sequence(s) from BLAST database</description>
     <macros>
         <token name="@BINARY@">blastdbcmd</token>
@@ -70,6 +70,16 @@
     <outputs>
         <data name="seq" format="fasta" label="Sequences from ${db_opts.database.fields.name}" />
     </outputs>
+    <tests>
+        <test>
+            <param name="db_opts|db_type" value="prot" />
+            <param name="db_opts|database" value="four_human_proteins" />
+            <param name="id_opts|id_type" value="prompt" />
+            <param name="id_opts|entries" value="all" />
+            <param name="outfmt" value="original" />
+            <output name="seq" file="four_human_proteins.fasta" ftype="fasta" />
+        </test>
+    </tests>
     <help>
     
 **What it does**
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.1.01">
+<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.1.02">
     <description>Search nucleotide database with nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
@@ -19,12 +19,15 @@
 @THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
 $adv_opts.strand
-@ADVANCED_OPTIONS@
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_WORD_SIZE@
 #if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ):
 -perc_identity $adv_opts.identity_cutoff
 #end if
 $adv_opts.ungapped
 @ADV_ID_LIST_FILTER@
+@ADV_QCOV_HSP_PERC@
 ## End of advanced options:
 #end if
     </command>
@@ -64,6 +67,7 @@
             <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
             <expand macro="input_parse_deflines" />
             <expand macro="advanced_optional_id_files" />
+            <expand macro="input_qcov_hsp_perc" />
         </expand>
     </inputs>
     <outputs>
@@ -104,6 +108,24 @@
             <param name="adv_opts_selector" value="basic" />
             <output name="output1" file="blastn_rhodopsin_vs_three_human.columns.tabular" ftype="tabular" />
         </test>
+        <test>
+            <param name="query" value="chimera.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="three_human_mRNA" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="max_hits" value="1" />
+            <output name="output1" file="blastn_chimera_vs_three_human_max1.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="chimera.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="three_human_mRNA" />
+            <param name="out_format" value="0" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="max_hits" value="1" />
+            <output name="output1" file="blastn_chimera_vs_three_human_max1.txt" ftype="txt" />
+        </test>
     </tests>
     <help>
     
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.1.01">
+<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.1.02">
     <description>Search protein database with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
@@ -19,10 +19,13 @@
 @THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
 -matrix $adv_opts.matrix
-@ADVANCED_OPTIONS@
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_WORD_SIZE@
 ##Ungapped disabled for now - see comments below
 ##$adv_opts.ungapped
 @ADV_ID_LIST_FILTER@
+@ADV_QCOV_HSP_PERC@
 ## End of advanced options:
 #end if
     </command>
@@ -36,6 +39,7 @@
 
         <param name="blast_type" type="select" display="radio" label="Type of BLAST">
             <option value="blastp">blastp - Traditional BLASTP to compare a protein query to a protein database</option>
+            <option value="blastp-fast">blastp-fast - Use longer words for seeding, faster but less accurate</option>
             <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option>
         </param>
         <expand macro="input_evalue" />
@@ -54,6 +58,7 @@
             -->
             <expand macro="input_parse_deflines" />
             <expand macro="advanced_optional_id_files" />
+            <expand macro="input_qcov_hsp_perc" />
         </expand>
     </inputs>
     <outputs>
@@ -76,6 +81,7 @@
             <param name="max_hits" value="0" />
             <param name="word_size" value="0" />
             <param name="parse_deflines" value="True" />
+            <param name="qcov_hsp_perc" value="25" />
             <output name="output1" file="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
         </test>
         <test>
@@ -92,6 +98,7 @@
             <param name="max_hits" value="0" />
             <param name="word_size" value="0" />
             <param name="parse_deflines" value="True" />
+            <param name="qcov_hsp_perc" value="25" />
             <output name="output1" file="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" />
         </test>
         <test>
@@ -108,6 +115,7 @@
             <param name="max_hits" value="0" />
             <param name="word_size" value="0" />
             <param name="parse_deflines" value="True" />
+            <param name="qcov_hsp_perc" value="25" />
             <output name="output1" file="blastp_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
         </test>
         <test>
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.1.01">
+<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.1.02">
     <description>Search protein database with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
@@ -14,15 +14,19 @@
 -query "$query"
 @BLAST_DB_SUBJECT@
 -query_gencode $query_gencode
+-task $blast_type
 -evalue $evalue_cutoff
 @BLAST_OUTPUT@
 @THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
 $adv_opts.strand
 -matrix $adv_opts.matrix
-@ADVANCED_OPTIONS@
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_WORD_SIZE@
 $adv_opts.ungapped
 @ADV_ID_LIST_FILTER@
+@ADV_QCOV_HSP_PERC@
 ## End of advanced options:
 #end if
     </command>
@@ -34,6 +38,10 @@
 
         <expand macro="input_conditional_protein_db" />
         <expand macro="input_query_gencode" />
+        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
+            <option value="blastx">blastx - Traditional BLASTX to compare translated nucleotide query to protein database</option>
+            <option value="blastx-fast">blastx-fast - Use longer words for seeding, faster but less accurate</option>
+        </param>
         <expand macro="input_evalue" />
 
         <expand macro="input_out_format" />
@@ -47,6 +55,7 @@
             <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
             <expand macro="input_parse_deflines" />
             <expand macro="advanced_optional_id_files" />
+            <expand macro="input_qcov_hsp_perc" />
         </expand>
     </inputs>
     <outputs>
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_convert2blastmask_wrapper" name="NCBI BLAST+ convert2blastmask" version="0.1.01">
+<tool id="ncbi_convert2blastmask_wrapper" name="NCBI BLAST+ convert2blastmask" version="0.1.02">
     <description>Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb</description>
     <macros>
         <token name="@BINARY@">convert2blastmask</token>
@@ -80,7 +80,7 @@
 **References**
 
 If you use this Galaxy tool in work leading to a scientific publication please
-cite the following papers (a more specific paper covering this wrapper is planned):
+cite the following papers:
 
 @REFERENCES@
     </help>
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_dustmasker_wrapper" name="NCBI BLAST+ dustmasker" version="0.1.01">
+<tool id="ncbi_dustmasker_wrapper" name="NCBI BLAST+ dustmasker" version="0.1.02">
     <!-- dustmasker wrapper from Edward Kirton and Nicola Soranzo -->
     <description>masks low complexity regions</description>
     <macros>
@@ -13,7 +13,7 @@
 #if $db_opts.db_opts_selector == "db":
   -in "${db_opts.database.fields.path}" -infmt blastdb
 #elif $db_opts.db_opts_selector == "histdb":
-  -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb
+  -in "${os.path.join($db_opts.histdb.files_path, 'blastdb')}" -infmt blastdb
 #else:
   -in "$subject" -infmt fasta
 #end if
@@ -92,7 +92,7 @@
 **References**
 
 If you use this Galaxy tool in work leading to a scientific publication please
-cite the following papers (a more specific paper covering this wrapper is planned):
+cite the following papers:
 
 @REFERENCES@
     </help>
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_macros.xml
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml Tue Jan 27 06:06:18 2015 -0500
[
@@ -1,4 +1,11 @@
 <macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="binary">@BINARY@</requirement>
+            <requirement type="package" version="2.2.30">blast+</requirement>
+        </requirements>
+        <version_command>@BINARY@ -version</version_command>
+    </xml>
     <xml name="output_change_format">
         <change_format>
             <when input="output.out_format" value="0" format="txt"/>
@@ -175,11 +182,7 @@
             </param>
             <when value="db">
                 <param name="database" type="select" label="Nucleotide BLAST database">
-                    <options from_file="blastdb.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
+                    <options from_data_table="blastdb" />
                 </param>
                 <param name="histdb" type="hidden" value="" />
                 <param name="subject" type="hidden" value="" /> 
@@ -205,11 +208,7 @@
             </param>
             <when value="db">
                 <param name="database" type="select" label="Protein BLAST database">
-                    <options from_file="blastdb_p.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
+                    <options from_data_table="blastdb_p" />
                 </param>
                 <param name="histdb" type="hidden" value="" />
                 <param name="subject" type="hidden" value="" />
@@ -234,11 +233,7 @@
             </param>
             <when value="db">
                 <param name="database" type="select" label="Protein domain database">
-                    <options from_file="blastdb_d.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
+                    <options from_data_table="blastdb_d" />
                 </param>
                 <param name="histdb" type="hidden" value="" />
                 <param name="subject" type="hidden" value="" />
@@ -258,20 +253,12 @@
             </param>
             <when value="nucl">
                 <param name="database" type="select" label="Nucleotide BLAST database">
-                    <options from_file="blastdb.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
+                    <options from_data_table="blastdb" />
                 </param>
             </when>
             <when value="prot">
                 <param name="database" type="select" label="Protein BLAST database">
-                    <options from_file="blastdb_p.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
+                    <options from_data_table="blastdb_p" />
                 </param>
             </when>
         </conditional>
@@ -305,12 +292,8 @@
             <option value="-strand minus">Minus (reverse complement)</option>
         </param>
     </xml>
-    <xml name="requirements">
-        <requirements>
-            <requirement type="binary">@BINARY@</requirement>
-            <requirement type="package" version="2.2.29">blast+</requirement>
-        </requirements>
-        <version_command>@BINARY@ -version</version_command>
+    <xml name="input_qcov_hsp_perc">
+        <param name="qcov_hsp_perc" type="float" value="0" min="0" max="100" label="Minimum query coverage per hsp (percentage, 0 to 100)" help="See also the output column qhspcov"/>
     </xml>
     <xml name="advanced_options">
         <conditional name="adv_opts">
@@ -349,6 +332,11 @@
             </when>
         </conditional>
     </xml>
+    <token name="@ADV_QCOV_HSP_PERC@">
+#if float(str($adv_opts.qcov_hsp_perc)) &gt; 0:
+    -qcov_hsp_perc $adv_opts.qcov_hsp_perc
+#end if
+    </token>
     <token name="@ADV_ID_LIST_FILTER@">
 #if $adv_opts.adv_optional_id_files_opts.adv_optional_id_files_opts_selector == 'negative_gilist':
     -negative_gilist $adv_opts.adv_optional_id_files_opts.negative_gilist
@@ -363,7 +351,7 @@
 #if $db_opts.db_opts_selector == "db":
   -db "${db_opts.database.fields.path}"
 #elif $db_opts.db_opts_selector == "histdb":
-  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
+  -db "${os.path.join($db_opts.histdb.files_path,'blastdb')}"
 #else:
   -subject "$db_opts.subject"
 #end if
@@ -381,12 +369,22 @@
     -outfmt $output.out_format
 #end if
     </token>
-    <token name="@ADVANCED_OPTIONS@">$adv_opts.filter_query
+    <token name="@ADV_FILTER_QUERY@">$adv_opts.filter_query</token>
+    <token name="@ADV_MAX_HITS@">
 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+## Note -max_target_seqs used to simply override -num_descriptions and -num_alignments
+## but this was changed in BLAST+ 2.2.27 onwards to force their use (raised with NCBI)
 #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+#if str($output.out_format) in ["6", "ext", "cols", "5"]:
+## Most output formats use this, including tabular and XML:
 -max_target_seqs $adv_opts.max_hits
+#else
+## Text and HTML output formats 0-4 currently need this instead:
+-num_descriptions $adv_opts.max_hits -num_alignments $adv_opts.max_hits
 #end if
+#end if
+    </token>
+    <token name="@ADV_WORD_SIZE@">
 #if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
 -word_size $adv_opts.word_size
 #end if
@@ -401,14 +399,18 @@
 ${db_opts.subject.name}
 #end if</token>
     <token name="@REFERENCES@">
+Peter J. A. Cock, John M. Chilton, Björn Grüning, James E. Johnson, Nicola Soranzo (2015).
+NCBI BLAST+ integrated into Galaxy. *BioRxiv* preprint.
+http://dx.doi.org/10.1101/014043
+
 Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
 Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
+in molecular plant pathology. *PeerJ* 1:e167
 http://dx.doi.org/10.7717/peerj.167
 
 Christiam Camacho et al. (2009).
 BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
+*BMC Bioinformatics*. 15;10:421.
 http://dx.doi.org/10.1186/1471-2105-10-421
 
 This wrapper is available to install into other Galaxy Instances via the Galaxy
@@ -418,7 +420,9 @@
         <citations>
             <citation type="doi">10.1186/1471-2105-10-421</citation>
             <citation type="doi">10.7717/peerj.167</citation>
-            <!-- TODO: Add BibTeX entry / preprint DOI for Galaxy BLAST+ paper -->
+            <!-- TODO - remove PeerJ paper once "NCBI BLAST+ integrated into Galaxy" formally published? -->
+            <citation type="doi">10.1101/014043</citation>
+            <!-- TODO - Update DOI once "NCBI BLAST+ integrated into Galaxy" formally published -->
         </citations>
     </xml>
     <token name="@OUTPUT_FORMAT@">**Output format**
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.1.01">
+<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.1.02">
     <description>Make BLAST database</description>
     <macros>
         <token name="@BINARY@">makeblastdb</token>
@@ -10,7 +10,7 @@
 ##and abort (via the ampersand ampersand trick) if any are found.
 #for i in $input_file#"${i}" #end for#
 &amp;&amp;
-makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}"
+makeblastdb -out "${os.path.join($outfile.files_path,'blastdb')}"
 $parse_seqids
 $hash_index
 ## Single call to -in with multiple filenames space separated with outer quotes
@@ -97,6 +97,7 @@
     </outputs>
     <tests>
         <!-- Note the (two line) PIN file is not reproducible run to run.
+             The same applies to the NIN file for nucleotide database.
              Likewise there is a datestamp in the log file as well, so use contains comparison
              With and without the masking makes no difference.
              With and without the taxid the only real difference is in the *.phr file.
@@ -155,6 +156,25 @@
                 <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" />
             </output>
         </test>
+        <test>
+            <param name="dbtype" value="nucl" />
+            <param name="input_file" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="title" value="Just 3 human mRNA sequences" />
+            <param name="parse_seqids" value="" />
+            <param name="hash_index" value="true" />
+            <param name="taxselect" value="id" />
+            <param name="taxid" value="9606" />
+            <output name="out_file" compare="contains" file="three_human_mRNA.fasta.log.txt" ftype="blastdbn">
+                <extra_files type="file" value="three_human_mRNA.fasta.nhr" name="blastdb.nhr" />
+                <extra_files type="file" value="three_human_mRNA.fasta.nin" name="blastdb.nin" lines_diff="2" />
+                <extra_files type="file" value="three_human_mRNA.fasta.nsq" name="blastdb.nsq" />
+                <extra_files type="file" value="three_human_mRNA.fasta.nog" name="blastdb.nog" />
+                <extra_files type="file" value="three_human_mRNA.fasta.nhd" name="blastdb.nhd" />
+                <extra_files type="file" value="three_human_mRNA.fasta.nhi" name="blastdb.nhi" />
+                <extra_files type="file" value="three_human_mRNA.fasta.nsd" name="blastdb.nsd" />
+                <extra_files type="file" value="three_human_mRNA.fasta.nsi" name="blastdb.nsi" />
+            </output>
+        </test>
     </tests>
     <help>
 **What it does**
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_makeprofiledb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_makeprofiledb" name="NCBI BLAST+ makeprofiledb" version="0.1.01">
+<tool id="ncbi_makeprofiledb" name="NCBI BLAST+ makeprofiledb" version="0.1.02">
     <description>Make profile database</description>
     <macros>
         <token name="@BINARY@">makeprofiledb</token>
@@ -7,8 +7,8 @@
     <expand macro="requirements" />
     <command>
 ##Unlike makeblastdb, makeprofiledb needs directory to exist already:
-mkdir -p $outfile.extra_files_path &amp;&amp;
-makeprofiledb -out "${os.path.join($outfile.extra_files_path,'blastdb')}"
+mkdir -p $outfile.files_path &amp;&amp;
+makeprofiledb -out "${os.path.join($outfile.files_path,'blastdb')}"
 
 ##We turn $infile_list into $infiles with a configfile entry defined below
 -in $infiles
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.1.01">
+<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.1.02">
     <description>Search protein domain database (PSSMs) with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
@@ -15,13 +15,15 @@
 #if $db_opts.db_opts_selector == "db":
   -db "${db_opts.database.fields.path}"
 #elif $db_opts.db_opts_selector == "histdb":
-  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
+  -db "${os.path.join($db_opts.histdb.files_path,'blastdb')}"
 #end if
 -evalue $evalue_cutoff
 @BLAST_OUTPUT@
 @THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
-@ADVANCED_OPTIONS@
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_QCOV_HSP_PERC@
 ## End of advanced options:
 #end if
     </command>
@@ -41,8 +43,8 @@
             <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
             <expand macro="input_filter_query_default_false" />
             <expand macro="input_max_hits" />
-            <expand macro="input_word_size" />
             <expand macro="input_parse_deflines" />
+            <expand macro="input_qcov_hsp_perc" />
         </expand>
     </inputs>
     <outputs>
@@ -52,6 +54,16 @@
 
         </data>
     </outputs>
+    <tests>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="cd00003_and_cd00008" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="out_format" value="6" />
+            <output name="output1" file="empty_file.dat" ftype="tabular" />
+        </test>
+    </tests>
     <help>
     
 @SEARCH_TIME_WARNING@
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.1.01">
+<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.1.02">
     <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
@@ -15,14 +15,16 @@
 #if $db_opts.db_opts_selector == "db":
   -db "${db_opts.database.fields.path}"
 #elif $db_opts.db_opts_selector == "histdb":
-  -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
+  -db "${os.path.join($db_opts.histdb.files_path,'blastdb')}"
 #end if
 -evalue $evalue_cutoff
 @BLAST_OUTPUT@
 ## rpstblastn does not support multiple threads up to release 2.2.27+. Added in BLAST 2.2.28+.
 ##-num_threads 8
 #if $adv_opts.adv_opts_selector=="advanced":
-@ADVANCED_OPTIONS@
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_QCOV_HSP_PERC@
 ## End of advanced options:
 #end if
     </command>
@@ -41,8 +43,8 @@
             <expand macro="input_filter_query_default_false" />
             <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
             <expand macro="input_max_hits" />
-            <expand macro="input_word_size" />
             <expand macro="input_parse_deflines" />
+            <expand macro="input_qcov_hsp_perc" />
         </expand>
     </inputs>
     <outputs>
@@ -50,6 +52,16 @@
             <expand macro="output_change_format" />
         </data>
     </outputs>
+    <tests>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="cd00003_and_cd00008" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="out_format" value="6" />
+            <output name="output1" file="empty_file.dat" ftype="tabular" />
+        </test>
+    </tests>
     <help>
     
 @SEARCH_TIME_WARNING@
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_segmasker_wrapper" name="NCBI BLAST+ segmasker" version="0.1.01">
+<tool id="ncbi_segmasker_wrapper" name="NCBI BLAST+ segmasker" version="0.1.02">
     <description>low-complexity regions in protein sequences</description>
     <macros>
         <token name="@BINARY@">segmasker</token>
@@ -12,7 +12,7 @@
 #if $db_opts.db_opts_selector == "db":
   -in "${db_opts.database.fields.path}" -infmt blastdb
 #elif $db_opts.db_opts_selector == "histdb":
-  -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb
+  -in "${os.path.join($db_opts.histdb.files_path, 'blastdb')}" -infmt blastdb
 #else:
   -in "$subject" -infmt fasta
 #end if
@@ -94,7 +94,7 @@
 **References**
 
 If you use this Galaxy tool in work leading to a scientific publication please
-cite the following papers (a more specific paper covering this wrapper is planned):
+cite the following papers:
 
 @REFERENCES@
     </help>
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.1.01">
+<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.1.02">
     <description>Search translated nucleotide database with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
@@ -13,16 +13,20 @@
 tblastn
 -query "$query"
 @BLAST_DB_SUBJECT@
+-task $blast_type
 -evalue $evalue_cutoff
 @BLAST_OUTPUT@
 @THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
 -db_gencode $adv_opts.db_gencode
 -matrix $adv_opts.matrix
-@ADVANCED_OPTIONS@
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_WORD_SIZE@
 ##Ungapped disabled for now - see comments below
 ##$adv_opts.ungapped
 @ADV_ID_LIST_FILTER@
+@ADV_QCOV_HSP_PERC@
 ## End of advanced options:
 #end if
     </command>
@@ -33,8 +37,12 @@
         <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
 
         <expand macro="input_conditional_nucleotide_db" />
+        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
+            <option value="tblastn">tblastn - Traditional TBLASTN to compare protein query to translated nucleotide database</option>
+            <option value="tblastn-fast">tblastn-fast - Use longer words for seeding, faster but less accurate</option>
+        </param>
+        <expand macro="input_evalue" />
         <expand macro="input_out_format" />
-        <expand macro="input_evalue" />
         <expand macro="advanced_options">
             <expand macro="input_db_gencode" />
 
@@ -51,6 +59,7 @@
             -->
             <expand macro="input_parse_deflines" />
             <expand macro="advanced_optional_id_files" />
+            <expand macro="input_qcov_hsp_perc" />
         </expand>
     </inputs>
     <outputs>
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.1.01">
+<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.1.02">
     <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
@@ -21,10 +21,11 @@
 -db_gencode $adv_opts.db_gencode
 $adv_opts.strand
 -matrix $adv_opts.matrix
-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
-## Note -max_target_seqs overrides -num_descriptions and -num_alignments
-@ADVANCED_OPTIONS@
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_WORD_SIZE@
 @ADV_ID_LIST_FILTER@
+@ADV_QCOV_HSP_PERC@
 ## End of advanced options:
 #end if
     </command>
@@ -51,6 +52,7 @@
             <expand macro="input_word_size" />
             <expand macro="input_parse_deflines" />
             <expand macro="advanced_optional_id_files" />
+            <expand macro="input_qcov_hsp_perc" />
         </expand>
     </inputs>
     <outputs>
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/repository_dependencies.xml
--- a/tools/ncbi_blast_plus/repository_dependencies.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/repository_dependencies.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format).">
-    <repository changeset_revision="939a600f45e9" name="blast_datatypes" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="236046d1d441" name="blast_datatypes" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
 </repositories>
b
diff -r d0de6862cda1 -r b19b6addd05c tools/ncbi_blast_plus/tool_dependencies.xml
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Fri Oct 31 11:43:23 2014 -0400
+++ b/tools/ncbi_blast_plus/tool_dependencies.xml Tue Jan 27 06:06:18 2015 -0500
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="blast+" version="2.2.29">
-        <repository changeset_revision="e78bbab7933d" name="package_blast_plus_2_2_29" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    <package name="blast+" version="2.2.30">
+        <repository changeset_revision="f69b90d89b62" name="package_blast_plus_2_2_30" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>