Repository 'ncbi_blast_plus'
hg clone https://testtoolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus

Changeset 0:432ea9614cc9 (2015-01-30)
Next changeset 1:5e9d5e536b79 (2015-03-03)
Commit message:
Uploaded v0.1.02 preview 1, using tool_data_table_conf.xml for loc files, etc
added:
test-data/blastdb.loc
test-data/blastdb_d.loc
test-data/blastdb_p.loc
test-data/blastn_arabidopsis.extended.tabular
test-data/blastn_arabidopsis.standard.tabular
test-data/blastn_arabidopsis.xml
test-data/blastn_chimera_vs_three_human_max1.tabular
test-data/blastn_chimera_vs_three_human_max1.txt
test-data/blastn_rhodopsin_vs_three_human.columns.tabular
test-data/blastn_rhodopsin_vs_three_human.tabular
test-data/blastn_rhodopsin_vs_three_human.xml
test-data/blastn_rhodopsin_vs_three_human_converted.tabular
test-data/blastp_four_human_vs_rhodopsin.tabular
test-data/blastp_four_human_vs_rhodopsin.xml
test-data/blastp_four_human_vs_rhodopsin_converted.tabular
test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular
test-data/blastp_four_human_vs_rhodopsin_ext.tabular
test-data/blastp_human_vs_pdb_seg_no.xml
test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular
test-data/blastp_human_vs_pdb_seg_no_converted_std.tabular
test-data/blastp_rhodopsin_vs_four_human.tabular
test-data/blastp_sample.xml
test-data/blastp_sample_converted.tabular
test-data/blastx_rhodopsin_vs_four_human.tabular
test-data/blastx_rhodopsin_vs_four_human.xml
test-data/blastx_rhodopsin_vs_four_human_all.tabular
test-data/blastx_rhodopsin_vs_four_human_converted.tabular
test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular
test-data/blastx_rhodopsin_vs_four_human_ext.tabular
test-data/blastx_sample.xml
test-data/blastx_sample_converted.tabular
test-data/cd00003.smp
test-data/cd00003_and_cd00008.aux
test-data/cd00003_and_cd00008.freq
test-data/cd00003_and_cd00008.loo
test-data/cd00003_and_cd00008.phr
test-data/cd00003_and_cd00008.pin
test-data/cd00003_and_cd00008.psd
test-data/cd00003_and_cd00008.psi
test-data/cd00003_and_cd00008.psq
test-data/cd00003_and_cd00008.rps
test-data/cd00008.smp
test-data/chimera.fasta
test-data/convert2blastmask_four_human_masked.maskinfo-asn1
test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary
test-data/dustmasker_three_human.fasta
test-data/dustmasker_three_human.maskinfo-asn1
test-data/dustmasker_three_human.maskinfo-asn1-binary
test-data/empty_file.dat
test-data/four_human_proteins.dbinfo.txt
test-data/four_human_proteins.fasta
test-data/four_human_proteins.fasta.log.txt
test-data/four_human_proteins.fasta.phd
test-data/four_human_proteins.fasta.phi
test-data/four_human_proteins.fasta.phr
test-data/four_human_proteins.fasta.pin
test-data/four_human_proteins.fasta.pog
test-data/four_human_proteins.fasta.psd
test-data/four_human_proteins.fasta.psi
test-data/four_human_proteins.fasta.psq
test-data/four_human_proteins_masked.fasta
test-data/four_human_proteins_taxid.fasta.log.txt
test-data/four_human_proteins_taxid.fasta.phd
test-data/four_human_proteins_taxid.fasta.phi
test-data/four_human_proteins_taxid.fasta.phr
test-data/four_human_proteins_taxid.fasta.pin
test-data/four_human_proteins_taxid.fasta.pog
test-data/four_human_proteins_taxid.fasta.psd
test-data/four_human_proteins_taxid.fasta.psi
test-data/four_human_proteins_taxid.fasta.psq
test-data/rhodopsin_nucs.fasta
test-data/rhodopsin_proteins.fasta
test-data/segmasker_four_human.fasta
test-data/segmasker_four_human.maskinfo-asn1
test-data/segmasker_four_human.maskinfo-asn1-binary
test-data/tblastn_four_human_vs_rhodopsin.html
test-data/tblastn_four_human_vs_rhodopsin.tabular
test-data/tblastn_four_human_vs_rhodopsin.xml
test-data/tblastn_four_human_vs_rhodopsin_ext.tabular
test-data/tblastx_rhodopsin_vs_three_human.tabular
test-data/three_human_mRNA.dbinfo.txt
test-data/three_human_mRNA.fasta
test-data/three_human_mRNA.fasta.log.txt
test-data/three_human_mRNA.fasta.nhd
test-data/three_human_mRNA.fasta.nhi
test-data/three_human_mRNA.fasta.nhr
test-data/three_human_mRNA.fasta.nin
test-data/three_human_mRNA.fasta.nog
test-data/three_human_mRNA.fasta.nsd
test-data/three_human_mRNA.fasta.nsi
test-data/three_human_mRNA.fasta.nsq
test-data/tool_data_table_conf.xml.test
tool-data/blastdb.loc.sample
tool-data/blastdb_d.loc.sample
tool-data/blastdb_p.loc.sample
tool-data/tool_data_table_conf.xml.sample
tools/ncbi_blast_plus/README.rst
tools/ncbi_blast_plus/blastxml_to_tabular.py
tools/ncbi_blast_plus/blastxml_to_tabular.xml
tools/ncbi_blast_plus/check_no_duplicates.py
tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml
tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
tools/ncbi_blast_plus/ncbi_macros.xml
tools/ncbi_blast_plus/ncbi_makeblastdb.xml
tools/ncbi_blast_plus/ncbi_makeprofiledb.xml
tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
tools/ncbi_blast_plus/repository_dependencies.xml
tools/ncbi_blast_plus/tool_dependencies.xml
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastdb.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastdb.loc Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,7 @@
+# This is a test file distributed with the Galaxy BLAST+ wrapper for
+# defining a list of nucleotide BLAST databases used in functional
+# tests for blastn etc.
+#
+# See the file tool-data/blastdb.loc.sample for more information.
+#
+three_human_mRNA Three Human mRNAs ${__HERE__}/three_human_mRNA.fasta
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastdb_d.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastdb_d.loc Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,7 @@
+# This is a test file distributed with the Galaxy BLAST+ wrapper for
+# defining a list of protein domain BLAST databases used in functional
+# tests of rpsblast etc.
+#
+# See the file tool-data/blastdb_d.loc.sample for more information.
+#
+cd00003_and_cd00008 Domains CD00003 (PNPsynthase) and CD00008 (PIN_53EXO-like) ${__HERE__}/cd00003_and_cd00008
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastdb_p.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastdb_p.loc Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,8 @@
+# This is a test file distributed with the Galaxy BLAST+ wrapper for
+# defining a list of protein BLAST databases used in functional tests
+# for blastp etc.
+#
+# See the file tool-data/blastdb_p.loc.sample for more information.
+#
+four_human_proteins Four Human Proteins (no taxid) ${__HERE__}/four_human_proteins.fasta
+four_human_proteins_taxid Four Human Proteins (with taxid) ${__HERE__}/four_human_proteins_taxid.fasta
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_arabidopsis.extended.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_arabidopsis.extended.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,1 @@
+chunk_of_plant chrIII 100.00 630 0 0 1 630 4341 4970 0.0 1164 chrIII 630 630 630 0 100.00 1 1 GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT 630 23459830 gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_arabidopsis.standard.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_arabidopsis.standard.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,1 @@
+chunk_of_plant chrIII 100.00 630 0 0 1 630 4341 4970 0.0 1164
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_arabidopsis.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_arabidopsis.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,71 @@
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
+<BlastOutput>
+  <BlastOutput_program>blastn</BlastOutput_program>
+  <BlastOutput_version>BLASTN 2.2.28+</BlastOutput_version>
+  <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), &quot;A greedy algorithm for aligning DNA sequences&quot;, J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference>
+  <BlastOutput_db>/mnt/galaxy/galaxy-central/database/files/000/dataset_857_files/blastdb</BlastOutput_db>
+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
+  <BlastOutput_query-def>chunk_of_plant</BlastOutput_query-def>
+  <BlastOutput_query-len>630</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_expect>0.001</Parameters_expect>
+      <Parameters_sc-match>1</Parameters_sc-match>
+      <Parameters_sc-mismatch>-2</Parameters_sc-mismatch>
+      <Parameters_gap-open>0</Parameters_gap-open>
+      <Parameters_gap-extend>0</Parameters_gap-extend>
+      <Parameters_filter>L;m;</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+<BlastOutput_iterations>
+<Iteration>
+  <Iteration_iter-num>1</Iteration_iter-num>
+  <Iteration_query-ID>Query_1</Iteration_query-ID>
+  <Iteration_query-def>chunk_of_plant</Iteration_query-def>
+  <Iteration_query-len>630</Iteration_query-len>
+<Iteration_hits>
+<Hit>
+  <Hit_num>1</Hit_num>
+  <Hit_id>gnl|BL_ORD_ID|2</Hit_id>
+  <Hit_def>chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence</Hit_def>
+  <Hit_accession>2</Hit_accession>
+  <Hit_len>23459830</Hit_len>
+  <Hit_hsps>
+    <Hsp>
+      <Hsp_num>1</Hsp_num>
+      <Hsp_bit-score>1164.51</Hsp_bit-score>
+      <Hsp_score>630</Hsp_score>
+      <Hsp_evalue>0</Hsp_evalue>
+      <Hsp_query-from>1</Hsp_query-from>
+      <Hsp_query-to>630</Hsp_query-to>
+      <Hsp_hit-from>4341</Hsp_hit-from>
+      <Hsp_hit-to>4970</Hsp_hit-to>
+      <Hsp_query-frame>1</Hsp_query-frame>
+      <Hsp_hit-frame>1</Hsp_hit-frame>
+      <Hsp_identity>630</Hsp_identity>
+      <Hsp_positive>630</Hsp_positive>
+      <Hsp_gaps>0</Hsp_gaps>
+      <Hsp_align-len>630</Hsp_align-len>
+      <Hsp_qseq>GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT</Hsp_qseq>
+      <Hsp_hseq>GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT</Hsp_hseq>
+      <Hsp_midline>||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||</Hsp_midline>
+    </Hsp>
+  </Hit_hsps>
+</Hit>
+</Iteration_hits>
+  <Iteration_stat>
+    <Statistics>
+      <Statistics_db-num>5</Statistics_db-num>
+      <Statistics_db-len>119146348</Statistics_db-len>
+      <Statistics_hsp-len>26</Statistics_hsp-len>
+      <Statistics_eff-space>71964315672</Statistics_eff-space>
+      <Statistics_kappa>0.46</Statistics_kappa>
+      <Statistics_lambda>1.28</Statistics_lambda>
+      <Statistics_entropy>0.85</Statistics_entropy>
+    </Statistics>
+  </Iteration_stat>
+</Iteration>
+</BlastOutput_iterations>
+</BlastOutput>
+
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_chimera_vs_three_human_max1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_chimera_vs_three_human_max1.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,1 @@
+chimera ENA|AB011145|AB011145.1 100.00 4560 0 0 1 4560 121 4680 0.0  8421
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_chimera_vs_three_human_max1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_chimera_vs_three_human_max1.txt Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,356 @@\n+BLASTN 2.2.30+\n+\n+\n+Reference: Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb\n+Miller (2000), "A greedy algorithm for aligning DNA sequences", J\n+Comput Biol 2000; 7(1-2):203-14.\n+\n+\n+\n+Database: Just 3 human mRNA sequences\n+           3 sequences; 10,732 total letters\n+\n+\n+\n+Query= chimera chunks of AB011145 plus M10051 plus BC112106\n+\n+Length=9973\n+                                                                      Score     E\n+Sequences producing significant alignments:                          (Bits)  Value\n+\n+  ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein,...   8421   0.0  \n+\n+\n+> ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, \n+partial cds.\n+Length=4796\n+\n+ Score =  8421 bits (4560),  Expect = 0.0\n+ Identities = 4560/4560 (100%), Gaps = 0/4560 (0%)\n+ Strand=Plus/Plus\n+\n+Query  1     GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC  60\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  121   GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC  180\n+\n+Query  61    CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA  120\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  181   CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA  240\n+\n+Query  121   TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC  180\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  241   TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC  300\n+\n+Query  181   TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT  240\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  301   TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT  360\n+\n+Query  241   TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA  300\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  361   TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA  420\n+\n+Query  301   CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA  360\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  421   CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA  480\n+\n+Query  361   TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA  420\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  481   TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA  540\n+\n+Query  421   CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC  480\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  541   CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC  600\n+\n+Query  481   TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA  540\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  601   TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA  660\n+\n+Query  541   TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT  600\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  661   TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT  720\n+\n+Query  601   TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG  660\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  721   TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG  780\n+\n+Query  661   GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA  720\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  781   GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA  840\n+\n+Query  721   TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA  780\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  841   TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA  900\n+\n+Query  781   ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAA'..b'3780\n+\n+Query  3661  GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT  3720\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  3781  GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT  3840\n+\n+Query  3721  TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT  3780\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  3841  TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT  3900\n+\n+Query  3781  GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT  3840\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  3901  GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT  3960\n+\n+Query  3841  TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG  3900\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  3961  TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG  4020\n+\n+Query  3901  GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA  3960\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4021  GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA  4080\n+\n+Query  3961  CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA  4020\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4081  CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA  4140\n+\n+Query  4021  CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA  4080\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4141  CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA  4200\n+\n+Query  4081  GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA  4140\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4201  GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA  4260\n+\n+Query  4141  GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT  4200\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4261  GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT  4320\n+\n+Query  4201  GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG  4260\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4321  GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG  4380\n+\n+Query  4261  GAAACTTTTAGATGACATTCTACAAATTAtttttttCTTTAAATTAAAAGAACCTAGCCA  4320\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4381  GAAACTTTTAGATGACATTCTACAAATTATTTTTTTCTTTAAATTAAAAGAACCTAGCCA  4440\n+\n+Query  4321  ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA  4380\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4441  ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA  4500\n+\n+Query  4381  AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA  4440\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4501  AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA  4560\n+\n+Query  4441  ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA  4500\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4561  ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA  4620\n+\n+Query  4501  GGTTGTCAAGAAGGCttttttttttttcttttttAAACCTGAGGGCAAAAAGGAATGGAT  4560\n+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct  4621  GGTTGTCAAGAAGGCTTTTTTTTTTTTCTTTTTTAAACCTGAGGGCAAAAAGGAATGGAT  4680\n+\n+\n+\n+Lambda      K        H\n+    1.33    0.621     1.12 \n+\n+Gapped\n+Lambda      K        H\n+    1.28    0.460    0.850 \n+\n+Effective search space used: 106299490\n+\n+\n+  Database: Just 3 human mRNA sequences\n+    Posted date:  Dec 26, 2014  5:54 AM\n+  Number of letters in database: 10,732\n+  Number of sequences in database:  3\n+\n+\n+\n+Matrix: blastn matrix 1 -2\n+Gap Penalties: Existence: 0, Extension: 2.5\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_rhodopsin_vs_three_human.columns.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human.columns.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 4301 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 4301 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 4301 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 4301 1213
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 983 1213
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1047 1213
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_rhodopsin_vs_three_human.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0  1474
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132   460
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93   331
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73   265
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68   248
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0  1323
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0  1208
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_rhodopsin_vs_three_human.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human.xml Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,549 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+  <BlastOutput_program>blastn</BlastOutput_program>\n+  <BlastOutput_version>BLASTN 2.2.30+</BlastOutput_version>\n+  <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), &quot;A greedy algorithm for aligning DNA sequences&quot;, J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference>\n+  <BlastOutput_db></BlastOutput_db>\n+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+  <BlastOutput_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</BlastOutput_query-def>\n+  <BlastOutput_query-len>1047</BlastOutput_query-len>\n+  <BlastOutput_param>\n+    <Parameters>\n+      <Parameters_expect>1e-40</Parameters_expect>\n+      <Parameters_sc-match>1</Parameters_sc-match>\n+      <Parameters_sc-mismatch>-2</Parameters_sc-mismatch>\n+      <Parameters_gap-open>0</Parameters_gap-open>\n+      <Parameters_gap-extend>0</Parameters_gap-extend>\n+      <Parameters_filter>L;m;</Parameters_filter>\n+    </Parameters>\n+  </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+  <Iteration_iter-num>1</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+  <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>4933992</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>2</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+  <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>4933992</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>3</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+  <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+  <Hit_num>1</Hit_num>\n+  <Hit_id>Subject_3</Hit_id>\n+  <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def>\n+  <Hit_accession>Subject_3</Hit_accession>\n+  <Hit_len>1213</Hit_len>\n+  <Hit_hsps>\n+    <Hsp>\n+      <Hsp_num>1</Hsp_num>\n+      <Hsp_bit-score>1474.75</Hsp_bit-score>\n+      <Hsp_score>798</Hsp_score>\n+      <Hsp_evalue>0</Hsp_evalue>\n+      <Hsp_query-from>1</Hsp_query-from>\n+      <Hsp_query-to>1047</Hsp_query-to>\n+      <Hsp_hit-from>88</Hsp_hit-from>\n+      <Hsp_hit-to>1134</Hsp_hit-to>\n+      <Hsp_query-frame>1</Hsp_query-frame>\n+      <Hsp_hit-frame>1</Hsp_hit-frame>\n+      <Hsp_identity>964</Hsp_identity>\n+      <Hsp_positive>964</Hsp_positive>\n+      <Hsp_gaps>0</Hsp_gaps>\n+      <Hsp_align-len>1047</Hsp_align-len>\n+      <Hsp_qseq>ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCC'..b'|||||||| |||||||||||||||||| | || || ||||||||||||||||||||||| |||||||| || |||||||||||  | || |||||||||| |||||| || ||||||||||| || |||||||| ||||| || || || |||||  | |||||||||||||||||| | |||||||||||||| ||||||||||||||||| || ||||||||||||||||| |||||||| ||||||||||||||||||||||||||||||||||| |||||||||||||| || ||||||||||||||||| |||||||| || || ||||| |||| ||||||||| || |||||||| |||||  ||||||||||||| || ||||| |||||||||| | |   ||||  |||||| ||||| || ||||||||||||||||| || ||||||| ||||||| ||||| |||| || |||||||| |||||||| |||||||||||||||||||| || ||||||||||||||||| |||||||| |||||||| |||||||||||||| || ||||||||||||||||||||||||||||| || |||||| ||||||||||  | ||||| ||||||||||||||||| ||||| ||| |||| || |||||||||||||||||||| || ||||||||||||| || | ||| |||| ||||| |||||||| ||||||||||||||||||||||||||||||||| |||||||  ||||||| ||||||||||| || |||||||| ||||||||  | |||||||||||||| ||||| ||||| |||||||| ||||||</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>4933992</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>16</Iteration_iter-num>\n+  <Iteration_query-ID>Query_6</Iteration_query-ID>\n+  <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+  <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>6353949</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>17</Iteration_iter-num>\n+  <Iteration_query-ID>Query_6</Iteration_query-ID>\n+  <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+  <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>6353949</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>18</Iteration_iter-num>\n+  <Iteration_query-ID>Query_6</Iteration_query-ID>\n+  <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+  <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>6353949</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_rhodopsin_vs_three_human_converted.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human_converted.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132 460
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93 331
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73 265
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68 248
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_four_human_vs_rhodopsin.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_four_human_vs_rhodopsin.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,6 @@
+sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0   701
+sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0   619
+sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0   653
+sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0   631
+sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0   673
+sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0   599
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_four_human_vs_rhodopsin.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_four_human_vs_rhodopsin.xml Fri Jan 30 08:27:28 2015 -0500
[
b'@@ -0,0 +1,665 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+  <BlastOutput_program>blastp</BlastOutput_program>\n+  <BlastOutput_version>BLASTP 2.2.30+</BlastOutput_version>\n+  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+  <BlastOutput_db></BlastOutput_db>\n+  <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID>\n+  <BlastOutput_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def>\n+  <BlastOutput_query-len>406</BlastOutput_query-len>\n+  <BlastOutput_param>\n+    <Parameters>\n+      <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+      <Parameters_expect>1e-08</Parameters_expect>\n+      <Parameters_gap-open>11</Parameters_gap-open>\n+      <Parameters_gap-extend>1</Parameters_gap-extend>\n+      <Parameters_filter>F</Parameters_filter>\n+    </Parameters>\n+  </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+  <Iteration_iter-num>1</Iteration_iter-num>\n+  <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+  <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+  <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>30</Statistics_hsp-len>\n+      <Statistics_eff-space>119568</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>2</Iteration_iter-num>\n+  <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+  <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+  <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>30</Statistics_hsp-len>\n+      <Statistics_eff-space>119568</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>3</Iteration_iter-num>\n+  <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+  <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+  <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>30</Statistics_hsp-len>\n+      <Statistics_eff-space>119568</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>4</Iteration_iter-num>\n+  <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+  <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sa'..b'q>\n+      <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>\n+      <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT   E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>29</Statistics_hsp-len>\n+      <Statistics_eff-space>101761</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>24</Iteration_iter-num>\n+  <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>\n+  <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>\n+  <Iteration_query-len>348</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+  <Hit_num>1</Hit_num>\n+  <Hit_id>gi|12583665|dbj|BAB21486.1|</Hit_id>\n+  <Hit_def>fresh water form rod opsin [Conger myriaster]</Hit_def>\n+  <Hit_accession>BAB21486</Hit_accession>\n+  <Hit_len>354</Hit_len>\n+  <Hit_hsps>\n+    <Hsp>\n+      <Hsp_num>1</Hsp_num>\n+      <Hsp_bit-score>599.356</Hsp_bit-score>\n+      <Hsp_score>1544</Hsp_score>\n+      <Hsp_evalue>0</Hsp_evalue>\n+      <Hsp_query-from>1</Hsp_query-from>\n+      <Hsp_query-to>341</Hsp_query-to>\n+      <Hsp_hit-from>1</Hsp_hit-from>\n+      <Hsp_hit-to>342</Hsp_hit-to>\n+      <Hsp_query-frame>0</Hsp_query-frame>\n+      <Hsp_hit-frame>0</Hsp_hit-frame>\n+      <Hsp_identity>281</Hsp_identity>\n+      <Hsp_positive>314</Hsp_positive>\n+      <Hsp_gaps>1</Hsp_gaps>\n+      <Hsp_align-len>342</Hsp_align-len>\n+      <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE</Hsp_qseq>\n+      <Hsp_hseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE</Hsp_hseq>\n+      <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV  TW MALACA PPL GWSRYIPEGLQCSCGIDYYT  P +NNESFVIYMF  HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP   +D ASAT SKTE</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>29</Statistics_hsp-len>\n+      <Statistics_eff-space>101761</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_four_human_vs_rhodopsin_converted.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_four_human_vs_rhodopsin_converted.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,6 @@
+sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701
+sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619
+sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653
+sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631
+sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673
+sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Fri Jan 30 08:27:28 2015 -0500
[
@@ -0,0 +1,6 @@
+sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 rhodopsin [Felis catus]
+sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 RecName: Full=Rhodopsin
+sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Cynopterus brachyotis]
+sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Myotis pilosus]
+sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 rhodopsin [Bos taurus]
+sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 fresh water form rod opsin [Conger myriaster]
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_four_human_vs_rhodopsin_ext.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,6 @@
+sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0   701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 N/A
+sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0   619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 N/A
+sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0   653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A
+sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0   631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A
+sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0   673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 N/A
+sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0   599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 N/A
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_human_vs_pdb_seg_no.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_human_vs_pdb_seg_no.xml Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,322 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+  <BlastOutput_program>blastp</BlastOutput_program>\n+  <BlastOutput_version>BLASTP 2.2.24+</BlastOutput_version>\n+  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+  <BlastOutput_db>/data/blastdb/pdbaa</BlastOutput_db>\n+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+  <BlastOutput_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def>\n+  <BlastOutput_query-len>406</BlastOutput_query-len>\n+  <BlastOutput_param>\n+    <Parameters>\n+      <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+      <Parameters_expect>1e-08</Parameters_expect>\n+      <Parameters_gap-open>11</Parameters_gap-open>\n+      <Parameters_gap-extend>1</Parameters_gap-extend>\n+      <Parameters_filter>F</Parameters_filter>\n+    </Parameters>\n+  </BlastOutput_param>\n+  <BlastOutput_iterations>\n+    <Iteration>\n+      <Iteration_iter-num>1</Iteration_iter-num>\n+      <Iteration_query-ID>Query_1</Iteration_query-ID>\n+      <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+      <Iteration_query-len>406</Iteration_query-len>\n+      <Iteration_hits>\n+        <Hit>\n+          <Hit_num>1</Hit_num>\n+          <Hit_id>gi|193885198|pdb|2R2J|A</Hit_id>\n+          <Hit_def>Chain A, Crystal Structure Of Human Erp44</Hit_def>\n+          <Hit_accession>2R2J_A</Hit_accession>\n+          <Hit_len>382</Hit_len>\n+          <Hit_hsps>\n+            <Hsp>\n+              <Hsp_num>1</Hsp_num>\n+              <Hsp_bit-score>768.073791748238</Hsp_bit-score>\n+              <Hsp_score>1982</Hsp_score>\n+              <Hsp_evalue>0</Hsp_evalue>\n+              <Hsp_query-from>26</Hsp_query-from>\n+              <Hsp_query-to>406</Hsp_query-to>\n+              <Hsp_hit-from>2</Hsp_hit-from>\n+              <Hsp_hit-to>382</Hsp_hit-to>\n+              <Hsp_query-frame>0</Hsp_query-frame>\n+              <Hsp_hit-frame>0</Hsp_hit-frame>\n+              <Hsp_identity>370</Hsp_identity>\n+              <Hsp_positive>372</Hsp_positive>\n+              <Hsp_gaps>0</Hsp_gaps>\n+              <Hsp_align-len>381</Hsp_align-len>\n+              <Hsp_qseq>PVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL</Hsp_qseq>\n+              <Hsp_hseq>PLGSEITSLDTENIDEILNNADVALVNFYADWCRFSQXLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGXXXKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDXVYLGAXTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHXKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHXYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL</Hsp_hseq>\n+              <Hsp_midline>P+ +EITSLDTENIDEILNNADVALVNFYADWCRFSQ LHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNG   KREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPD VYLGA TNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFH KEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRH YVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL</Hsp_midline>\n+            </Hsp>\n+          </Hit_hsps>\n+        </Hit>\n+        <Hit>\n+          <Hit_num>2</Hit_num>\n+          <Hit_id>gi|88192228|pdb|2B5E|A</Hit_id>\n+          <Hit_d'..b'      <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA</Hsp_qseq>\n+              <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>\n+              <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT   E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+            </Hsp>\n+          </Hit_hsps>\n+        </Hit>\n+        <Hit>\n+          <Hit_num>2</Hit_num>\n+          <Hit_id>gi|195927458|pdb|3C9M|A</Hit_id>\n+          <Hit_def>Chain A, Structure Of A Mutant Bovine Rhodopsin In Hexagonal Crystal Form</Hit_def>\n+          <Hit_accession>3C9M_A</Hit_accession>\n+          <Hit_len>348</Hit_len>\n+          <Hit_hsps>\n+            <Hsp>\n+              <Hsp_num>1</Hsp_num>\n+              <Hsp_bit-score>674.085095224404</Hsp_bit-score>\n+              <Hsp_score>1738</Hsp_score>\n+              <Hsp_evalue>0</Hsp_evalue>\n+              <Hsp_query-from>1</Hsp_query-from>\n+              <Hsp_query-to>348</Hsp_query-to>\n+              <Hsp_hit-from>1</Hsp_hit-from>\n+              <Hsp_hit-to>348</Hsp_hit-to>\n+              <Hsp_query-frame>0</Hsp_query-frame>\n+              <Hsp_hit-frame>0</Hsp_hit-frame>\n+              <Hsp_identity>324</Hsp_identity>\n+              <Hsp_positive>335</Hsp_positive>\n+              <Hsp_gaps>0</Hsp_gaps>\n+              <Hsp_align-len>348</Hsp_align-len>\n+              <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA</Hsp_qseq>\n+              <Hsp_hseq>MCGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSCFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>\n+              <Hsp_midline>M GTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT   E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+            </Hsp>\n+          </Hit_hsps>\n+        </Hit>\n+      </Iteration_hits>\n+      <Iteration_stat>\n+        <Statistics>\n+          <Statistics_db-num>49615</Statistics_db-num>\n+          <Statistics_db-len>11554246</Statistics_db-len>\n+          <Statistics_hsp-len>0</Statistics_hsp-len>\n+          <Statistics_eff-space>1672994000</Statistics_eff-space>\n+          <Statistics_kappa>0.041</Statistics_kappa>\n+          <Statistics_lambda>0.267</Statistics_lambda>\n+          <Statistics_entropy>0.14</Statistics_entropy>\n+        </Statistics>\n+      </Iteration_stat>\n+    </Iteration>\n+  </BlastOutput_iterations>\n+</BlastOutput>\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,8 @@\n+sp|Q9BS26|ERP44_HUMAN\tgi|193885198|pdb|2R2J|A\t97.11\t381\t11\t0\t26\t406\t2\t382\t0.0\t768\tgi|193885198|pdb|2R2J|A\t1982\t370\t372\t0\t97.64\t1\t1\tPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL\tPLGSEITSLDTENIDEILNNADVALVNFYADWCRFSQXLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGXXXKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDXVYLGAXTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHXKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHXYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL\t406\t382\tChain A, Crystal Structure Of Human Erp44\n+sp|Q9BS26|ERP44_HUMAN\tgi|88192228|pdb|2B5E|A\t25.17\t290\t193\t8\t25\t306\t10\t283\t4e-20\t95.1\tgi|88192228|pdb|2B5E|A;gi|206581884|pdb|3BOA|A\t235\t73\t133\t24\t45.86\t1\t1\tTPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKR-EYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNI---IYKPPGHSAPDMVYLGA---MTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKF-RH\tAPEDSAVVKLATDSFNEYIQSHDLVLAEFFAPWCGHCKNMAPEYVKAAETLVEK-----NITLAQIDCTENQDLCMEHNIPGFPSLKIFKNSDVNNSIDYEGPRTAEAIVQFMIKQSQPAVAVVADLPAYLANETFVTPVIVQSGKIDADFNATFYSMANKHFNDYDFVSA--------ENADDDFKLSIYLPSAMDEP-VVYNGKKADIADADVFEKWLQVEALPYFGEIDGSVFAQYVESGLPLGYLFY--NDEEELEEYKPLFTELAKKNRGLMNFVSIDARKFGRH\t406\t504\tChain A, Crystal Structure Of Yeast Protein Disulfide Isomerase<>Chain A, Crystal Structure Of Yeast Protein Disulfide Isomerase.\n+sp|Q9NSY1|BMP2K_HUMAN\tgi|73536291|pdb|2BUJ|A\t29.39\t279\t182\t8\t40\t308\t21\t294\t1e-22\t105\tgi|73536291|pdb|2BUJ|A;gi|73536292|pdb|2BUJ|B\t262\t82\t130\t15\t46.59\t1\t1\tGVRVFAVGRHQVTLEESLAEGGFSTVFLVR-THGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTG--FTEPEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDG-VNVVEEEIKKYTTLSYRAPEMINLYGGKPITTKADIWALGCLLYKLCFFTLPF------GESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDI\tGHMVIIDNKHYLFIQK-LGEGGFSYVDLVEGLHDGHFYALKRILCHEQQDREEAQREAD-MHRLFNHPNILRLVAYCLRERGAKH-EAWLLLPFFKRGTLWNEIERLKDKGNFLTEDQILWLLLGICRGLEAIH--AKGYAHRDLKPTNILLGDEGQPVLMDLGSMNQACIHVEGSRQALTLQDWAAQRCTISYRAPELFSVQSHCVIDERTDVWSLGCVLYAMMFGEGPYDMVFQKGDSVALAVQNQLSIPQSPRHSSALWQLLNSMMTVDPHQRPHI\t1161\t317\tChain A, Crystal Structure Of The Human Serine-Threonine Kinase 16 In Complex With Staurosporine<>Chain B, Crystal Structure Of The Human Serine-Threonine Kinase 16 In Complex With Staurosporine\n+sp|Q9NSY1|BMP2K_HUMAN\tgi|270346335|pdb|2WQM|A\t27.21\t272\t166\t12\t53\t311\t36\t288\t6e-17\t86.3\tgi|270346335|pdb|2WQM|A;gi|270346336|pdb|2WQN|A\t212\t74\t129\t32\t47.43\t1\t1\tLEESLAEGGFSTVFLVRTH-GGIRCALKRMYVNNMPDLNV---CKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMN--KKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGGKPITTKADIWALGCLLYKLCFFTLPFGESQV---AICD----GNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQV\tIEKKIGRGQFSEVYRAACLLDGVPVALKKVQIFDLMDAKARADCIKEIDLLKQLN-HPNVIKYY---ASFIEDN--ELNIVLELADAGDLSRMIKHFKKQKRLIPERTVWKYFVQLCSALEHMHSRR--VMHRDIKPANVFITATGVVKLGDLG--LGRFFSSKTTAAHSL------VGTPYYMSPERIHENG---YNFKSDIWSLGCLLYEMAALQSPFYGDKMNLYSLCKKIEQCDYPPLPSDHYSEELRQLVNMCINPDPEKRPDVTYV\t1161\t310\tChain A, Structure Of Apo Human Nek7<>Chain A, Structure Of Adp-Bound Human Nek7\n+sp|P06213|INSR_HUMAN\tgi|116667097|pdb|2DTG|E\t95.91\t928\t7\t2\t28\t955\t1\t897\t0.0\t1846\tgi|116667097|pdb|2DTG|E\t4781\t890\t893\t31\t96.23\t1\t1\tHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPK'..b'83860|pdb|2I35|A;gi|157883861|pdb|2I36|A;gi|157883862|pdb|2I36|B;gi|157883863|pdb|2I36|C;gi|157883864|pdb|2I37|A;gi|157883865|pdb|2I37|B;gi|157883866|pdb|2I37|C;gi|159795066|pdb|2PED|A;gi|159795067|pdb|2PED|B;gi|192988480|pdb|3CAP|A;gi|192988481|pdb|3CAP|B;gi|195927457|pdb|3C9L|A;gi|197107530|pdb|1F88|A;gi|197107531|pdb|1F88|B;gi|206582030|pdb|3DQB|A\t1756\t325\t337\t0\t96.84\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t348\tChain A, Structure Of Bovine Rhodopsin (Dark Adapted)<>Chain A, Structure Of Bovine Rhodopsin (Metarhodopsin Ii)<>Chain A, Structure Of Bovine Rhodopsin In A Trigonal Crystal Form<>Chain B, Structure Of Bovine Rhodopsin In A Trigonal Crystal Form<>Chain A, Crystal Structure Of Bovine Rhodopsin<>Chain B, Crystal Structure Of Bovine Rhodopsin<>Chain A, Crystal Structure Of Bovine Rhodopsin At 2.6 Angstroms Resolution<>Chain B, Crystal Structure Of Bovine Rhodopsin At 2.6 Angstroms Resolution<>Chain A, Crystal Structure Of Bovine Rhodopsin At 2.2 Angstroms Resolution<>Chain B, Crystal Structure Of Bovine Rhodopsin At 2.2 Angstroms Resolution<>Chain A, Crystallographic Model Of Bathorhodopsin<>Chain B, Crystallographic Model Of Bathorhodopsin<>Chain A, Crystallographic Model Of Lumirhodopsin<>Chain B, Crystallographic Model Of Lumirhodopsin<>Chain A, Crystal Structure Of Rhombohedral Crystal Form Of Ground- State Rhodopsin<>Chain A, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain B, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain C, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain A, Crystal Structure Of A Photoactivated Rhodopsin<>Chain B, Crystal Structure Of A Photoactivated Rhodopsin<>Chain C, Crystal Structure Of A Photoactivated Rhodopsin<>Chain A, Crystallographic Model Of 9-Cis-Rhodopsin<>Chain B, Crystallographic Model Of 9-Cis-Rhodopsin<>Chain A, Crystal Structure Of Native Opsin: The G Protein-Coupled Receptor Rhodopsin In Its Ligand-Free State<>Chain B, Crystal Structure Of Native Opsin: The G Protein-Coupled Receptor Rhodopsin In Its Ligand-Free State<>Chain A, Structure Of Ground-State Bovine Rhodospin In A Hexagonal Crystal Form<>Chain A, Crystal Structure Of Bovine Rhodopsin<>Chain B, Crystal Structure Of Bovine Rhodopsin<>Chain A, Crystal Structure Of The Active G-Protein-Coupled Receptor Opsin In Complex With A C-Terminal Peptide Derived From The Galpha Subunit Of Transducin\n+sp|P08100|OPSD_HUMAN\tgi|195927458|pdb|3C9M|A\t93.10\t348\t24\t0\t1\t348\t1\t348\t0.0\t674\tgi|195927458|pdb|3C9M|A\t1738\t324\t335\t0\t96.26\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMCGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSCFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t348\tChain A, Structure Of A Mutant Bovine Rhodopsin In Hexagonal Crystal Form\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_human_vs_pdb_seg_no_converted_std.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_human_vs_pdb_seg_no_converted_std.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,8 @@
+sp|Q9BS26|ERP44_HUMAN gi|193885198|pdb|2R2J|A 97.11 381 11 0 26 406 2 382 0.0 768
+sp|Q9BS26|ERP44_HUMAN gi|88192228|pdb|2B5E|A 25.17 290 193 8 25 306 10 283 4e-20 95.1
+sp|Q9NSY1|BMP2K_HUMAN gi|73536291|pdb|2BUJ|A 29.39 279 182 8 40 308 21 294 1e-22 105
+sp|Q9NSY1|BMP2K_HUMAN gi|270346335|pdb|2WQM|A 27.21 272 166 12 53 311 36 288 6e-17 86.3
+sp|P06213|INSR_HUMAN gi|116667097|pdb|2DTG|E 95.91 928 7 2 28 955 1 897 0.0 1846
+sp|P06213|INSR_HUMAN gi|114794482|pdb|2HR7|A 99.59 485 2 0 28 512 1 485 0.0 1016
+sp|P08100|OPSD_HUMAN gi|16975387|pdb|1JFP|A 93.39 348 23 0 1 348 1 348 0.0 681
+sp|P08100|OPSD_HUMAN gi|195927458|pdb|3C9M|A 93.10 348 24 0 1 348 1 348 0.0 674
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_rhodopsin_vs_four_human.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_rhodopsin_vs_four_human.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,6 @@
+gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 348 1 348 0.0   701
+gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.33 354 53 2 1 354 1 348 0.0   605
+gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0   630
+gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0   630
+gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.10 348 23 1 1 347 1 348 0.0   651
+gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.09 349 65 1 1 349 1 348 0.0   587
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_sample.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_sample.xml Fri Jan 30 08:27:28 2015 -0500
[
b'@@ -0,0 +1,293 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+  <BlastOutput_program>blastp</BlastOutput_program>\n+  <BlastOutput_version>BLASTP 2.2.24+</BlastOutput_version>\n+  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+  <BlastOutput_db>nr</BlastOutput_db>\n+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+  <BlastOutput_query-def>Sample</BlastOutput_query-def>\n+  <BlastOutput_query-len>516</BlastOutput_query-len>\n+  <BlastOutput_param>\n+    <Parameters>\n+      <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+      <Parameters_expect>1e-30</Parameters_expect>\n+      <Parameters_gap-open>11</Parameters_gap-open>\n+      <Parameters_gap-extend>1</Parameters_gap-extend>\n+      <Parameters_filter>F</Parameters_filter>\n+    </Parameters>\n+  </BlastOutput_param>\n+  <BlastOutput_iterations>\n+    <Iteration>\n+      <Iteration_iter-num>1</Iteration_iter-num>\n+      <Iteration_query-ID>Query_1</Iteration_query-ID>\n+      <Iteration_query-def>Sample</Iteration_query-def>\n+      <Iteration_query-len>516</Iteration_query-len>\n+      <Iteration_hits>\n+        <Hit>\n+          <Hit_num>1</Hit_num>\n+          <Hit_id>gi|119953746|ref|YP_950551.1|</Hit_id>\n+          <Hit_def>tail tape measure protein [Streptococcus phage SMP] &gt;gi|118430558|gb|ABK91882.1| tail tape measure protein [Streptococcus suis phage SMP]</Hit_def>\n+          <Hit_accession>YP_950551</Hit_accession>\n+          <Hit_len>659</Hit_len>\n+          <Hit_hsps>\n+            <Hsp>\n+              <Hsp_num>1</Hsp_num>\n+              <Hsp_bit-score>949.117592429394</Hsp_bit-score>\n+              <Hsp_score>2452</Hsp_score>\n+              <Hsp_evalue>0</Hsp_evalue>\n+              <Hsp_query-from>1</Hsp_query-from>\n+              <Hsp_query-to>516</Hsp_query-to>\n+              <Hsp_hit-from>27</Hsp_hit-from>\n+              <Hsp_hit-to>542</Hsp_hit-to>\n+              <Hsp_query-frame>0</Hsp_query-frame>\n+              <Hsp_hit-frame>0</Hsp_hit-frame>\n+              <Hsp_identity>500</Hsp_identity>\n+              <Hsp_positive>500</Hsp_positive>\n+              <Hsp_gaps>0</Hsp_gaps>\n+              <Hsp_align-len>516</Hsp_align-len>\n+              <Hsp_qseq>FHLLNSGGSALSVMFAKLVGIIAGISAPIWXXXXXXXXXXXXXXXXYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_qseq>\n+              <Hsp_hseq>FHLLNSGGSALSVMFAKLVGIIAGISAPIWAVIGVIAALVAGFVLLYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_hseq>\n+              <Hsp_midline>FHLLNSGGSALSVMFAKLVGIIAGISAPIW                YNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNN'..b'NVFNAIKNTATNVWNAIKTTISNVVQTILNF---------------------------------VTPIFNTMKNTITNIFNAIRNTASSVWNSIKTTISNIVTSVKNTVINIFNALKNSITNIFNAIRNTASTVWNSIKSTVSNIVSATVNTVKNLFNGMKNTVSSIWDGVRNTISNVVNAVKNTISNVWGGITGTVSN----IFNGVKNAIDGPMNAAKNLVKNVV----DAIKGF</Hsp_hseq>\n+              <Hsp_midline>+++V     L G +V  WN+    + +         +  ++  +  VE V   +   +QT W++I AVV  ++    N+ K + D          KA  Q +       W+ +K +A  +WE I   V   I+G + + +      K+ +  +W  ++  V   W+ IK TV++  TA+   +  I  +I+TT   V+NAI   A+N+W AI TT+ +V+ TI  +                                 VT  F+ +K  I+N +  I+   S +WN+I T +S I   +K      +  +K +I+N+   I++   T WN+IK+++S           N  N +K+   + W+ +++ IS  +  +K+T+SN W  +  TV+N    I + V+   D  +NAA+N + N +    D I GF</Hsp_midline>\n+            </Hsp>\n+          </Hit_hsps>\n+        </Hit>\n+        <Hit>\n+          <Hit_num>9</Hit_num>\n+          <Hit_id>gi|163941333|ref|YP_001646217.1|</Hit_id>\n+          <Hit_def>prophage LambdaBa01, membrane protein, putative [Bacillus weihenstephanensis KBAB4] &gt;gi|163863530|gb|ABY44589.1| prophage LambdaBa01, membrane protein, putative [Bacillus weihenstephanensis KBAB4]</Hit_def>\n+          <Hit_accession>YP_001646217</Hit_accession>\n+          <Hit_len>725</Hit_len>\n+          <Hit_hsps>\n+            <Hsp>\n+              <Hsp_num>1</Hsp_num>\n+              <Hsp_bit-score>138.657684699283</Hsp_bit-score>\n+              <Hsp_score>348</Hsp_score>\n+              <Hsp_evalue>8.15996781441799e-31</Hsp_evalue>\n+              <Hsp_query-from>61</Hsp_query-from>\n+              <Hsp_query-to>480</Hsp_query-to>\n+              <Hsp_hit-from>142</Hsp_hit-from>\n+              <Hsp_hit-to>560</Hsp_hit-to>\n+              <Hsp_query-frame>0</Hsp_query-frame>\n+              <Hsp_hit-frame>0</Hsp_hit-frame>\n+              <Hsp_identity>118</Hsp_identity>\n+              <Hsp_positive>203</Hsp_positive>\n+              <Hsp_gaps>29</Hsp_gaps>\n+              <Hsp_align-len>434</Hsp_align-len>\n+              <Hsp_qseq>WEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIK---AVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKT----VWS-------AAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLIN</Hsp_qseq>\n+              <Hsp_hseq>WDAIKQWTIDAWNAIGEFLVGIWDGIVQWASEAWNSISESTSAVWNSIKEFLIGIWNGIVEFVVT-WGT--AILETYVGIWTSIFNFCMEIWNGIVEYLTSVLQGIATFFTEIWTSISTFFQEIWNGLVAFITPVLQGIADFFAM-----------IWNGISTVIQTVWNFITQYLQAIWTAILYFATPLFESIKNFISECWNKISSTTSLVWETIKNFLVSCWNGLVSFVTPIFEKIKSWIISVWDTISSATMAVWNAVKNFLQACWNGLVSIVTPIFDAIKNWIVNVWNAISSTTSAVWNAIKSYLSSLWNSIVSTASSIFNSIKSAISTVWNMISSASSSVWNGIKSTLSSIWNGIKSTASSVWNGLKDAIMTPVRWVTSAVSGAFNGMKSAVLGVWDGIKSGIRTAINGIIRIINKFI-DGFNTPAELLN</Hsp_hseq>\n+              <Hsp_midline>W+AIK     A  A+  F++ +W  +V W +E    I ++   VWN+I+  +  +   ++  V T W    A++ T + +  ++ +  +++  GI++   +V+Q I   ++  W ++      IW G+ + +   + G+   F             +W  I  V+  +W++I   +    TA+      +  SI+      WN IS+  S +W  I   ++S    +  ++    E IK+    VW        A W  +K    A    +V +VT  FD IK  I N W  I + TS +WNAI ++LS +W  I + AS+ +  IK+ IS V   I S   + WN IK+++S+  N IKS A + WN +K AI T +  + S VS  +N + S V      I S +RT  +  +     FI +  +   +L+N</Hsp_midline>\n+            </Hsp>\n+          </Hit_hsps>\n+        </Hit>\n+      </Iteration_hits>\n+      <Iteration_stat>\n+        <Statistics>\n+          <Statistics_db-num>6589360</Statistics_db-num>\n+          <Statistics_db-len>-2041834015</Statistics_db-len>\n+          <Statistics_hsp-len>0</Statistics_hsp-len>\n+          <Statistics_eff-space>504129014857</Statistics_eff-space>\n+          <Statistics_kappa>0.041</Statistics_kappa>\n+          <Statistics_lambda>0.267</Statistics_lambda>\n+          <Statistics_entropy>0.14</Statistics_entropy>\n+        </Statistics>\n+      </Iteration_stat>\n+    </Iteration>\n+  </BlastOutput_iterations>\n+</BlastOutput>\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_sample_converted.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastp_sample_converted.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,9 @@
+Sample gi|119953746|ref|YP_950551.1| 96.90 516 16 0 1 516 27 542 0.0 949
+Sample gi|148986157|ref|ZP_01819143.1| 41.27 252 115 3 49 300 679 897 2e-41 174
+Sample gi|77411259|ref|ZP_00787609.1| 41.00 261 143 2 50 310 655 904 8e-39 165
+Sample gi|76786754|ref|YP_329383.1| 39.46 261 147 2 50 310 655 904 7e-37 159
+Sample gi|153811333|ref|ZP_01964001.1| 29.98 557 277 18 3 516 573 1059 2e-36 157
+Sample gi|56962696|ref|YP_174422.1| 28.79 389 228 8 48 433 123 465 3e-33 146
+Sample gi|50914476|ref|YP_060448.1| 43.82 178 100 0 50 227 655 832 5e-33 146
+Sample gi|29374987|ref|NP_814140.1| 25.46 432 244 8 73 482 545 920 7e-31 139
+Sample gi|163941333|ref|YP_001646217.1| 27.19 434 287 7 61 480 142 560 8e-31 138
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastx_rhodopsin_vs_four_human.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,10 @@
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0   639
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0   551
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67   220
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35   127
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33   121
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0   589
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0   619
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0   532
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastx_rhodopsin_vs_four_human.xml Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,741 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+  <BlastOutput_program>blastx</BlastOutput_program>\n+  <BlastOutput_version>BLASTX 2.2.30+</BlastOutput_version>\n+  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+  <BlastOutput_db></BlastOutput_db>\n+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+  <BlastOutput_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</BlastOutput_query-def>\n+  <BlastOutput_query-len>1047</BlastOutput_query-len>\n+  <BlastOutput_param>\n+    <Parameters>\n+      <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+      <Parameters_expect>1e-10</Parameters_expect>\n+      <Parameters_gap-open>11</Parameters_gap-open>\n+      <Parameters_gap-extend>1</Parameters_gap-extend>\n+      <Parameters_filter>L;</Parameters_filter>\n+    </Parameters>\n+  </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+  <Iteration_iter-num>1</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+  <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>30</Statistics_hsp-len>\n+      <Statistics_eff-space>119944</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>2</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+  <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>30</Statistics_hsp-len>\n+      <Statistics_eff-space>119944</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>3</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+  <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>30</Statistics_hsp-len>\n+      <Statistics_eff-space>119944</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>4</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+  <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+<Hi'..b'Statistics_db-len>\n+      <Statistics_hsp-len>32</Statistics_hsp-len>\n+      <Statistics_eff-space>155584</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>23</Iteration_iter-num>\n+  <Iteration_query-ID>Query_6</Iteration_query-ID>\n+  <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+  <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>32</Statistics_hsp-len>\n+      <Statistics_eff-space>155584</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>24</Iteration_iter-num>\n+  <Iteration_query-ID>Query_6</Iteration_query-ID>\n+  <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+  <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+  <Hit_num>1</Hit_num>\n+  <Hit_id>Subject_4</Hit_id>\n+  <Hit_def>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Hit_def>\n+  <Hit_accession>Subject_4</Hit_accession>\n+  <Hit_len>348</Hit_len>\n+  <Hit_hsps>\n+    <Hsp>\n+      <Hsp_num>1</Hsp_num>\n+      <Hsp_bit-score>532.717</Hsp_bit-score>\n+      <Hsp_score>1371</Hsp_score>\n+      <Hsp_evalue>0</Hsp_evalue>\n+      <Hsp_query-from>23</Hsp_query-from>\n+      <Hsp_query-to>1021</Hsp_query-to>\n+      <Hsp_hit-from>1</Hsp_hit-from>\n+      <Hsp_hit-to>333</Hsp_hit-to>\n+      <Hsp_query-frame>2</Hsp_query-frame>\n+      <Hsp_hit-frame>0</Hsp_hit-frame>\n+      <Hsp_identity>272</Hsp_identity>\n+      <Hsp_positive>307</Hsp_positive>\n+      <Hsp_gaps>0</Hsp_gaps>\n+      <Hsp_align-len>333</Hsp_align-len>\n+      <Hsp_qseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG</Hsp_qseq>\n+      <Hsp_hseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA</Hsp_hseq>\n+      <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV  TW MALACA PPL GWSRYIPEGLQCSCGIDYYT  P +NNESFVIYMF  HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP  +++ </Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>32</Statistics_hsp-len>\n+      <Statistics_eff-space>155584</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human_all.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastx_rhodopsin_vs_four_human_all.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,10 @@
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0   639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|57163782|ref|NM_001009242.1| gi|57163782|ref|NM_001009242.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA71FL4TS79VA9ML34VI71LI7SASA21LI13TA1GV11 99 33 N/A N/A N/A N/A N/A
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0   551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A 0 gi|2734705|gb|U59921.1|BBU59921 gi|2734705|gb|U59921.1|BBU59921 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 10IV1MF2KA20YF1IM1CA7LV7ML5IV17FV1NDHL4CG3VS1ML1ST1MLNH3IVLF1AP3YNVL36SG4VI7IV3SA2VA3LA9ML5VI26LM8RQ2CF30VIFA13FY2STNH3EN7VI7SASA9LM8IL2LI6FL1ED1DE 63 21 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67   220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA9HY61FL4TS28 22 3 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35   127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 18LV3GS19LI7SASA13 22 2 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33   121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 6ML34VI14VARASA 22 1 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 1AG36LV1LF13VA4 22 1 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 9LI13TA1AV 22 1 N/A N/A N/A N/A N/A
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0   589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A 0 gi|283855822|gb|GQ290312.1| gi|283855822|gb|GQ290312.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA66ND4FL4TS5ML7AP49LV25ML34VI40VI6LV19VI11SASA21LI13TA1 99 33 N/A N/A N/A N/A N/A
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0   619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|18148870|dbj|AB062417.1| gi|18148870|dbj|AB062417.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA9AY22MV38FL4TS79VA9ML10PLHKEP1TV14IT2LM1VI47LV3GS11DN14TSSA1VI17VL2LI13TA13 99 33 N/A N/A N/A N/A N/A
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0   532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A 0 gi|12583664|dbj|AB043817.1| gi|12583664|dbj|AB043817.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 10IV1MF22AQ2AM1SA4FL2IVAL12IVEQ23FL4TS1ML3ML12IL14CS8WYMV5VMTS6SN6MAVF2TV6LA3FA20RLAK1GEIV10TVCV2ST2LMAIVI1SF4RQ2CF12EA3RK2RK6VI4SA2VI9WF8TN9SA5SA1LI3MV3CM6HN2IL2LI6FLEGEDEDDEGA 74 25 N/A N/A N/A N/A N/A
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human_converted.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,10 @@
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,10 @@
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human_ext.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,10 @@
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0   639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0   551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67   220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35   127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33   121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0   589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0   619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0   532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_sample.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastx_sample.xml Fri Jan 30 08:27:28 2015 -0500
[
b'@@ -0,0 +1,758 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+  <BlastOutput_program>blastx</BlastOutput_program>\n+  <BlastOutput_version>BLASTX 2.2.24+</BlastOutput_version>\n+  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+  <BlastOutput_db>/share/BlastDB/nr</BlastOutput_db>\n+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+  <BlastOutput_query-def>phage_suis</BlastOutput_query-def>\n+  <BlastOutput_query-len>1890</BlastOutput_query-len>\n+  <BlastOutput_param>\n+    <Parameters>\n+      <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+      <Parameters_expect>0.001</Parameters_expect>\n+      <Parameters_gap-open>11</Parameters_gap-open>\n+      <Parameters_gap-extend>1</Parameters_gap-extend>\n+      <Parameters_filter>L;</Parameters_filter>\n+    </Parameters>\n+  </BlastOutput_param>\n+  <BlastOutput_iterations>\n+    <Iteration>\n+      <Iteration_iter-num>1</Iteration_iter-num>\n+      <Iteration_query-ID>Query_1</Iteration_query-ID>\n+      <Iteration_query-def>phage_suis</Iteration_query-def>\n+      <Iteration_query-len>1890</Iteration_query-len>\n+      <Iteration_hits>\n+        <Hit>\n+          <Hit_num>1</Hit_num>\n+          <Hit_id>gi|119953746|ref|YP_950551.1|</Hit_id>\n+          <Hit_def>tail tape measure protein [Streptococcus phage SMP] &gt;gi|118430558|gb|ABK91882.1| tail tape measure protein [Streptococcus phage SMP]</Hit_def>\n+          <Hit_accession>YP_950551</Hit_accession>\n+          <Hit_len>659</Hit_len>\n+          <Hit_hsps>\n+            <Hsp>\n+              <Hsp_num>1</Hsp_num>\n+              <Hsp_bit-score>988.407949172964</Hsp_bit-score>\n+              <Hsp_score>2554</Hsp_score>\n+              <Hsp_evalue>0</Hsp_evalue>\n+              <Hsp_query-from>336</Hsp_query-from>\n+              <Hsp_query-to>1889</Hsp_query-to>\n+              <Hsp_hit-from>25</Hsp_hit-from>\n+              <Hsp_hit-to>542</Hsp_hit-to>\n+              <Hsp_query-frame>3</Hsp_query-frame>\n+              <Hsp_hit-frame>0</Hsp_hit-frame>\n+              <Hsp_identity>518</Hsp_identity>\n+              <Hsp_positive>518</Hsp_positive>\n+              <Hsp_gaps>0</Hsp_gaps>\n+              <Hsp_align-len>518</Hsp_align-len>\n+              <Hsp_qseq>NWFHLLNSGGSALSVMFAKLVGIIAGISAPIWXXXXXXXXXXXXXXXXYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_qseq>\n+              <Hsp_hseq>NWFHLLNSGGSALSVMFAKLVGIIAGISAPIWAVIGVIAALVAGFVLLYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_hseq>\n+              <Hsp_midline>NWFHLLNSGGSALSVMFAKLVGIIAGISAPIWAVIGVIAALVAGFVLLYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKT'..b'\n+              <Hsp_qseq>AIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVA------IDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDW-AKG</Hsp_qseq>\n+              <Hsp_hseq>AMAEVGGVLAEALAPVLELLAQLLQAVANWFSN-LPGPIQTFIVIMGGLITVVGLLLPGLLA-----LQAAAVAMGTTIGGLVVAAAPIVGTVLGIIAVITLLVVWIQELWQNNEGFRTAVI-EIWNAIYAFISVIIQEISTFIMTIWGTLTTWWTENQALIQAAVETVWNAISTVIQTVMSLIGPYLEAAWANIQLIITTAWEIIKTVVETAITVVLGIIKAIMQAITGDWSGAWETIKGVLQRVWQAIQQIVTTILSAIGQFISNTWNGIKNTFSNILSAISGIVSSIWNTIKSVISSVISSIVSFVSSGWSGIQQTISSILSGISSTVSSVWNGIKNSISNA----INGAKNVVSSAINAIKNLFNFKISWPHIPLPHF--SVSGSANPLDWLKGGLPKISIAWYAKG</Hsp_hseq>\n+              <Hsp_midline>A+  V   +  AL P+++    L+ AV     N +   + T + ++ G+I  V  ++ G  +     L+  A  +   I  LV  A      + G++ +    + +++ +W    G   AV+  IW+ I   +S  I  +   I  I  ++ T W      I      +W AIST + +V++ I  Y++     I+ + + AWEIIK V    +  ++G++      I    S AWE IK     +W AI   ++ I   I    S  W  IK T SN+++ I   + + WN IK+ IS+ +++I S   + W+ I+  IS+ +  I STVS+ WN + ++++NA    ++  +    +A+NA +N  +  IS     +  F   V G+A  L    GG    +I W AKG</Hsp_midline>\n+            </Hsp>\n+            <Hsp>\n+              <Hsp_num>3</Hsp_num>\n+              <Hsp_bit-score>121.708903358919</Hsp_bit-score>\n+              <Hsp_score>304</Hsp_score>\n+              <Hsp_evalue>2.99798279087674e-25</Hsp_evalue>\n+              <Hsp_query-from>543</Hsp_query-from>\n+              <Hsp_query-to>1673</Hsp_query-to>\n+              <Hsp_hit-from>637</Hsp_hit-from>\n+              <Hsp_hit-to>1004</Hsp_hit-to>\n+              <Hsp_query-frame>3</Hsp_query-frame>\n+              <Hsp_hit-frame>0</Hsp_hit-frame>\n+              <Hsp_identity>89</Hsp_identity>\n+              <Hsp_positive>168</Hsp_positive>\n+              <Hsp_gaps>29</Hsp_gaps>\n+              <Hsp_align-len>387</Hsp_align-len>\n+              <Hsp_qseq>ISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSG--IWEGIK------TAASTAWEWIKTTISNVMTT--IKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTN</Hsp_qseq>\n+              <Hsp_hseq>IIAVITLLVVWIQELW--------QNNEGFRTAVIEIWNAIYAFISVIIQEISTFIMTIWGTLTTWWTENQALIQAAVETVWNAISTVIQTVMSLIGPYLEAAWANIQLIITTAWEIIKTVVETAITVVLGIIKAIMQAITGDWSGAWETIKGVLQRVWQAIQQIVTTILSAIGQFISNTWNGIKNTFSNILSAISGIVSSIWNTIKSVISSVISSIVSFV-----------SSGWSGIQQTISSILSGISSTVSSVWNGIKNSISNAINGAKNVVSSAINAIKNLFNFKISWPHIPLPHFSVSGSANPLDWLKGGLPKISIAWYAKGGILTKPTAFGMNEKQLMVGGEAGKEAVLPLTKQNLAAIGEGIASTMGTGGNFINVSITD</Hsp_hseq>\n+              <Hsp_midline>I   +  +V ++ +LW        +N E  R     +WNAI   +  ++  +   + T W  +    T    +I+  V+T    +  +I+ VM +I      AW  ++ +  T WE IK++V+ AI  ++ I +  +  +   W   W TI  V+  +W  I+  V+  ++A+ + I N    I+ T+  + +AIS + S+IW  I + + SV+++I  ++           S+ W  I+   ++IL  I   V+  ++ IK +ISNA    K   S   NAI    +    W  I       + ++   +W+K  +  +      K  I T       +    +   ++  E      K  ++   E I ST+  G N +  ++T+</Hsp_midline>\n+            </Hsp>\n+          </Hit_hsps>\n+        </Hit>\n+      </Iteration_hits>\n+      <Iteration_stat>\n+        <Statistics>\n+          <Statistics_db-num>12310662</Statistics_db-num>\n+          <Statistics_db-len>-87459526</Statistics_db-len>\n+          <Statistics_hsp-len>0</Statistics_hsp-len>\n+          <Statistics_eff-space>1174893963300</Statistics_eff-space>\n+          <Statistics_kappa>0.041</Statistics_kappa>\n+          <Statistics_lambda>0.267</Statistics_lambda>\n+          <Statistics_entropy>0.14</Statistics_entropy>\n+        </Statistics>\n+      </Iteration_stat>\n+    </Iteration>\n+  </BlastOutput_iterations>\n+</BlastOutput>\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_sample_converted.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastx_sample_converted.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,33 @@
+phage_suis gi|119953746|ref|YP_950551.1| 100.00 518 0 0 336 1889 25 542 0.0 988
+phage_suis gi|289551554|ref|YP_003472458.1| 32.95 516 280 6 342 1889 657 1106 6e-66 256
+phage_suis gi|223044325|ref|ZP_03614360.1| 30.22 546 327 7 393 1889 655 1193 1e-64 252
+phage_suis gi|223044325|ref|ZP_03614360.1| 19.88 508 328 9 384 1796 844 1309 6e-28 130
+phage_suis gi|268611153|ref|ZP_06144880.1| 28.64 639 371 11 78 1847 440 1042 1e-60 239
+phage_suis gi|268611153|ref|ZP_06144880.1| 23.36 441 286 7 543 1856 547 938 4e-31 141
+phage_suis gi|268611153|ref|ZP_06144880.1| 25.27 459 266 11 522 1844 722 1121 8e-31 140
+phage_suis gi|268611153|ref|ZP_06144880.1| 24.63 406 267 8 501 1694 770 1144 3e-23 115
+phage_suis gi|268611153|ref|ZP_06144880.1| 27.80 241 145 3 492 1148 811 1044 6e-16 90.9
+phage_suis gi|268611153|ref|ZP_06144880.1| 19.76 253 168 6 1158 1883 547 775 3e-04 52.0
+phage_suis gi|268610688|ref|ZP_06144415.1| 28.95 639 369 11 78 1847 440 1042 3e-59 234
+phage_suis gi|268610688|ref|ZP_06144415.1| 24.64 491 316 9 501 1856 770 1245 4e-39 167
+phage_suis gi|268610688|ref|ZP_06144415.1| 23.79 517 319 9 492 1832 811 1322 3e-37 161
+phage_suis gi|268610688|ref|ZP_06144415.1| 21.91 493 322 11 510 1859 905 1377 1e-25 123
+phage_suis gi|268610688|ref|ZP_06144415.1| 20.55 292 197 5 486 1343 1138 1400 4e-10 71.6
+phage_suis gi|268610688|ref|ZP_06144415.1| 21.41 341 225 10 894 1883 467 775 8e-05 53.9
+phage_suis gi|153811333|ref|ZP_01964001.1| 28.34 621 364 16 108 1847 493 1073 8e-55 219
+phage_suis gi|153811333|ref|ZP_01964001.1| 29.67 428 250 9 519 1760 709 1099 2e-47 195
+phage_suis gi|153811333|ref|ZP_01964001.1| 29.41 391 226 7 498 1640 746 1096 1e-39 169
+phage_suis gi|153811333|ref|ZP_01964001.1| 26.49 268 174 3 492 1256 854 1111 3e-24 118
+phage_suis gi|153811333|ref|ZP_01964001.1| 27.12 306 198 4 510 1385 816 1110 1e-23 116
+phage_suis gi|262113750|emb|CAR95417.1| 38.46 286 169 1 384 1241 540 818 2e-54 218
+phage_suis gi|262113750|emb|CAR95417.1| 29.68 411 271 7 657 1871 460 858 3e-40 171
+phage_suis gi|77411259|ref|ZP_00787609.1| 37.19 285 172 1 387 1241 628 905 2e-53 215
+phage_suis gi|77411259|ref|ZP_00787609.1| 28.01 407 281 6 660 1871 548 945 1e-40 172
+phage_suis gi|77411259|ref|ZP_00787609.1| 22.82 355 207 7 978 1877 540 882 9e-14 83.6
+phage_suis gi|76786754|ref|YP_329383.1| 36.84 285 173 1 387 1241 628 905 8e-53 213
+phage_suis gi|76786754|ref|YP_329383.1| 27.27 407 284 6 660 1871 548 945 3e-38 164
+phage_suis gi|76786754|ref|YP_329383.1| 24.73 283 194 2 543 1391 637 900 3e-23 115
+phage_suis gi|76786754|ref|YP_329383.1| 22.91 323 204 6 978 1847 540 850 2e-13 82.4
+phage_suis gi|50914476|ref|YP_060448.1| 35.86 290 179 1 372 1241 623 905 4e-51 207
+phage_suis gi|50914476|ref|YP_060448.1| 27.01 411 280 7 660 1871 548 945 2e-35 155
+phage_suis gi|50914476|ref|YP_060448.1| 23.00 387 269 5 543 1673 637 1004 3e-25 121
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003.smp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cd00003.smp Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,19957 @@\n+PssmWithParameters ::= {\n+  pssm {\n+    isProtein TRUE,\n+    numRows 28,\n+    numColumns 234,\n+    byRow FALSE,\n+    query seq {\n+      id {\n+        general {\n+          db "CDD",\n+          tag id 237977\n+        }\n+      },\n+      descr {\n+        title "cd00003, PNPsynthase, Pyridoxine 5\'-phosphate (PNP) synthase\n+ domain; pyridoxal 5\'-phosphate is the active form of vitamin B6 that acts as\n+ an essential, ubiquitous coenzyme in amino acid metabolism. In bacteria,\n+ formation of pyridoxine 5\'-phosphate is a step in the biosynthesis of vitamin\n+ B6. PNP synthase, a homooctameric enzyme, catalyzes the final step in PNP\n+ biosynthesis, the condensation of 1-amino-acetone 3-phosphate and\n+ 1-deoxy-D-xylulose 5-phosphate. PNP synthase adopts a TIM barrel topology,\n+ intersubunit contacts are mediated by three \'\'extra\'\' helices, generating a\n+ tetramer of symmetric dimers with shared active sites; the open state has\n+ been proposed to accept substrates and to release products, while most of the\n+ catalytic events are likely to occur in the closed state; a hydrophilic\n+ channel running through the center of the barrel was identified as the\n+ essential structural feature that enables PNP synthase to release water\n+ molecules produced during the reaction from the closed, solvent-shielded\n+ active site."\n+      },\n+      inst {\n+        repr raw,\n+        mol aa,\n+        length 234,\n+        seq-data ncbieaa "RLGVNIDHVATLRNARGTNYPDPVEAALLAEKAGADGITVHLREDRRHIQDR\n+DVRLLRELVRTELNLEMAPTEEMLEIALEVKPHQVTLVPEKREELTTEGGLDVAGQAEKLKPIIERLKDAGIRVSLFI\n+DPDPEQIEAAKEVGADRVELHTGPYANAYDKAEREAELERIAKAAKLARELGLGVNAGHGLNYENVKPIAKIPGIAEL\n+NIGHAIISRALFVGLEEAVREMKDLI"\n+      }\n+    },\n+    intermediateData {\n+      weightedResFreqsPerPos {\n+        { 0, 10, 0 },\n+        { 575880368388257, 10, -16 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 663632240275508, 10, -16 },\n+        { 18974024208621, 10, -15 },\n+        { 435260939968393, 10, -16 },\n+        { 0, 10, 0 },\n+        { 231579235547631, 10, -16 },\n+        { 217943087911858, 10, -16 },\n+        { 197347688751049, 10, -15 },\n+        { 149575209809135, 10, -15 },\n+        { 0, 10, 0 },\n+        { 373639929584932, 10, -16 },\n+        { 0, 10, 0 },\n+        { 454596950735964, 10, -16 },\n+        { 280508357667036, 10, -15 },\n+        { 0, 10, 0 },\n+        { 276040598748792, 10, -16 },\n+        { 187780737233597, 10, -16 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 119593107246649, 10, -16 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 355209907721631, 10, -16 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 925910073190528, 10, -15 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 385689360373084, 10, -16 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 526473206870828, 10, -16 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 499042712921408, 10, -15 },\n+        { 160105971725447, 10, -16 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 217340080528288, 10, -16 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 38877105237495, 10, -14 },\n+        { 0, 10, 0 },\n+        { 217943087911858, 10, -16 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+     '..b'-414,\n+        -399,\n+        -460,\n+        5,\n+        352,\n+        16,\n+        -587,\n+        -100,\n+        -494,\n+        -32768,\n+        -32768,\n+        -400,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -195,\n+        -32768,\n+        -409,\n+        -650,\n+        -589,\n+        -382,\n+        -651,\n+        -647,\n+        402,\n+        -562,\n+        -63,\n+        -221,\n+        -629,\n+        -572,\n+        -558,\n+        -589,\n+        -496,\n+        -335,\n+        665,\n+        -607,\n+        -100,\n+        -446,\n+        -32768,\n+        -32768,\n+        -400,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -24,\n+        -32768,\n+        -591,\n+        -46,\n+        -272,\n+        -576,\n+        -277,\n+        -37,\n+        -291,\n+        283,\n+        -138,\n+        -402,\n+        -350,\n+        -488,\n+        369,\n+        578,\n+        -203,\n+        -174,\n+        -491,\n+        -587,\n+        -100,\n+        -483,\n+        -32768,\n+        -32768,\n+        -400,\n+        -32768,\n+        -32768,\n+        -32768,\n+        7,\n+        -32768,\n+        -616,\n+        241,\n+        528,\n+        -606,\n+        -496,\n+        -367,\n+        -230,\n+        196,\n+        -343,\n+        -105,\n+        -331,\n+        -454,\n+        43,\n+        272,\n+        -194,\n+        -392,\n+        -502,\n+        -619,\n+        -100,\n+        -523,\n+        -32768,\n+        -32768,\n+        -400,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -446,\n+        -32768,\n+        -500,\n+        -647,\n+        -538,\n+        178,\n+        -619,\n+        -325,\n+        -106,\n+        -485,\n+        -175,\n+        921,\n+        -550,\n+        -602,\n+        -404,\n+        -481,\n+        -486,\n+        -420,\n+        -288,\n+        -277,\n+        -100,\n+        627,\n+        -32768,\n+        -32768,\n+        -400,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -165,\n+        -32768,\n+        -534,\n+        -503,\n+        -358,\n+        -453,\n+        -555,\n+        -452,\n+        -144,\n+        482,\n+        287,\n+        -280,\n+        -440,\n+        -508,\n+        -91,\n+        361,\n+        -409,\n+        -396,\n+        122,\n+        -575,\n+        -100,\n+        -472,\n+        -32768,\n+        -32768,\n+        -400,\n+        -32768,\n+        -32768,\n+        -32768,\n+        149,\n+        -32768,\n+        -567,\n+        385,\n+        307,\n+        -606,\n+        -460,\n+        -383,\n+        -292,\n+        51,\n+        -396,\n+        -460,\n+        -145,\n+        -458,\n+        207,\n+        304,\n+        -90,\n+        -107,\n+        -277,\n+        -625,\n+        -100,\n+        -526,\n+        -32768,\n+        -32768,\n+        -400,\n+        -32768,\n+        -32768,\n+        -32768,\n+        176,\n+        -32768,\n+        31,\n+        -605,\n+        -520,\n+        -357,\n+        -572,\n+        -562,\n+        483,\n+        -473,\n+        369,\n+        -197,\n+        -576,\n+        -553,\n+        -44,\n+        -69,\n+        -433,\n+        -185,\n+        8,\n+        -549,\n+        -100,\n+        -456,\n+        -32768,\n+        -32768,\n+        -400,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -154,\n+        -32768,\n+        387,\n+        -646,\n+        -594,\n+        -334,\n+        -636,\n+        -593,\n+        485,\n+        -545,\n+        343,\n+        529,\n+        -604,\n+        -586,\n+        -513,\n+        -556,\n+        -479,\n+        -56,\n+        41,\n+        -533,\n+        -100,\n+        -450,\n+        -32768,\n+        -32768,\n+        -400,\n+        -32768,\n+        -32768\n+      },\n+      lambda { 267, 10, -3 },\n+      kappa { 695502437462053, 10, -16 },\n+      h { 14, 10, -2 },\n+      scalingFactor 100,\n+      lambdaUngapped { 315181590957692, 10, -15 },\n+      kappaUngapped { 22723615854819, 10, -14 },\n+      hUngapped { 852942415611443, 10, -15 }\n+    }\n+  },\n+  params {\n+    pseudocount 10,\n+    rpsdbparams {\n+      matrixName "BLOSUM62"\n+    }\n+  }\n+}\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.aux
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cd00003_and_cd00008.aux Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,12 @@
+BLOSUM62
+11
+1
+0.000000e+00
+0.000000e+00
+0
+0
+100.000000
+234
+6.955024e-02
+160
+4.862535e-02
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.freq
b
Binary file test-data/cd00003_and_cd00008.freq has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.loo
b
Binary file test-data/cd00003_and_cd00008.loo has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.phr
b
Binary file test-data/cd00003_and_cd00008.phr has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.pin
b
Binary file test-data/cd00003_and_cd00008.pin has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.psd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cd00003_and_cd00008.psd Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,2 @@
+gnl|cdd|1890191
+gnl|cdd|2379770
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.psi
b
Binary file test-data/cd00003_and_cd00008.psi has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.psq
b
Binary file test-data/cd00003_and_cd00008.psq has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.rps
b
Binary file test-data/cd00003_and_cd00008.rps has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/cd00008.smp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cd00008.smp Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,13679 @@\n+PssmWithParameters ::= {\n+  pssm {\n+    isProtein TRUE,\n+    numRows 28,\n+    numColumns 160,\n+    byRow FALSE,\n+    query seq {\n+      id {\n+        general {\n+          db "CDD",\n+          tag id 189019\n+        }\n+      },\n+      descr {\n+        title "cd00008, PIN_53EXO-like, PIN domains of the 5\'-3\' exonucleases\n+ of DNA polymerase I, bacteriophage T4 RNase H and T5-5\' nucleases, and\n+ homologs.  PIN (PilT N terminus) domains of the 5\'-3\' exonucleases (53EXO) of\n+ mutli-domain DNA polymerase I and single domain protein homologs, as well as,\n+ the PIN domains of bacteriophage T5-5\'nuclease (T5FEN or 5\'-3\'exonuclease),\n+ bacteriophage T4 RNase H (T4FEN), bacteriophage T3 (T3 phage\n+ exodeoxyribonuclease) and other similar  nucleases are included in this\n+ family.  The 53EXO of DNA polymerase I recognizes and endonucleolytically\n+ cleaves a structure-specific DNA substrate that has a bifurcated downstream\n+ duplex and an upstream template-primer duplex that overlaps the downstream\n+ duplex by 1 bp.  The T5-5\'nuclease is a 5\'-3\'exodeoxyribonuclease that also\n+ exhibits endonucleolytic activity on flap structures (branched duplex DNA\n+ containing a free single-stranded 5\'end). T4 RNase H, which removes the RNA\n+ primers that initiate lagging strand fragments, has 5\'- 3\'exonuclease\n+ activity on DNA/DNA and RNA/DNA duplexes and has endonuclease activity on\n+ flap or forked DNA structures. These nucleases are members of the\n+ structure-specific, 5\' nuclease family that catalyzes hydrolysis of DNA\n+ duplex-containing nucleic acid structures during DNA replication, repair, and\n+ recombination. They contain a PIN domain with a helical arch/clamp region (I\n+ domain) of variable length (approximately 16 to 30 residues in 53EXO-like PIN\n+ domains) and a H3TH (helix-3-turn-helix) domain, an atypical\n+ helix-hairpin-helix-2-like region.  Both the H3TH domain (not included here)\n+ and the helical arch/clamp region are involved in DNA binding. The active\n+ site of the 53EXO of Taq DNA polymerase I includes a set of conserved acidic\n+ residues that are essential for binding three divalent metal ions (two Mn2+\n+ ions and one Zn2+ ion) required for nuclease activity. T5-5\'nuclease requires\n+ at least two bound divalent metal ions for nuclease activity and is reported\n+ to be able to use Mg2+, Mn2+ or Co2+ as co-factors."\n+      },\n+      inst {\n+        repr raw,\n+        mol aa,\n+        length 160,\n+        seq-data ncbieaa "LMLVDGTNLAFRTKHNNSKKKEKINLSPFASSYVSSIQSLAKSYSARTTIVL\n+GDKGKSVFRLEHLPEYKGNRDEKYAEEKALDEQFFEYLKDAFELCKATTFPTFTIRGYEADDMAAYLVKKIGHEGDHV\n+WIISTDGDWDQLLTDKVSRFSPTTRREYHL"\n+      }\n+    },\n+    intermediateData {\n+      weightedResFreqsPerPos {\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 338020833333333, 10, -15 },\n+        { 0, 10, 0 },\n+        { 328645833333333, 10, -15 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 333333333333333, 10, -15 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 338020833333333, 10, -15 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 333333333333333, 10, -15 },\n+        { 328645833333333, 10, -15 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0, 10, 0 },\n+        { 0'..b'     -307,\n+        704,\n+        -250,\n+        -309,\n+        -192,\n+        -207,\n+        -249,\n+        -197,\n+        -100,\n+        -12,\n+        -32768,\n+        -32768,\n+        -399,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -64,\n+        -32768,\n+        -122,\n+        -145,\n+        -113,\n+        -102,\n+        -157,\n+        -140,\n+        -20,\n+        -97,\n+        17,\n+        497,\n+        -93,\n+        -116,\n+        -65,\n+        -112,\n+        -12,\n+        327,\n+        -19,\n+        -149,\n+        -100,\n+        -121,\n+        -32768,\n+        -32768,\n+        -399,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -142,\n+        -32768,\n+        -302,\n+        -51,\n+        332,\n+        -282,\n+        -264,\n+        602,\n+        -290,\n+        -70,\n+        -294,\n+        -205,\n+        -45,\n+        -211,\n+        22,\n+        -107,\n+        -28,\n+        303,\n+        -228,\n+        -332,\n+        -100,\n+        -69,\n+        -32768,\n+        -32768,\n+        -399,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -121,\n+        -32768,\n+        -379,\n+        -186,\n+        -84,\n+        -383,\n+        340,\n+        -157,\n+        -394,\n+        351,\n+        -349,\n+        -246,\n+        -95,\n+        -242,\n+        -9,\n+        403,\n+        -102,\n+        -177,\n+        -341,\n+        -352,\n+        -100,\n+        -289,\n+        -32768,\n+        -32768,\n+        -399,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -196,\n+        -32768,\n+        -375,\n+        -234,\n+        -82,\n+        -9,\n+        -299,\n+        -9,\n+        -301,\n+        349,\n+        -256,\n+        -192,\n+        -143,\n+        -264,\n+        -2,\n+        405,\n+        -146,\n+        -178,\n+        -268,\n+        -72,\n+        -100,\n+        490,\n+        -32768,\n+        -32768,\n+        -399,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -226,\n+        -32768,\n+        -301,\n+        -157,\n+        297,\n+        -71,\n+        -349,\n+        -222,\n+        -123,\n+        -172,\n+        207,\n+        -46,\n+        -277,\n+        -314,\n+        -71,\n+        -214,\n+        -226,\n+        -213,\n+        -167,\n+        856,\n+        -100,\n+        -25,\n+        -32768,\n+        -32768,\n+        -399,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -167,\n+        -32768,\n+        -210,\n+        -396,\n+        -331,\n+        65,\n+        -415,\n+        -122,\n+        350,\n+        -300,\n+        9,\n+        -12,\n+        -350,\n+        -347,\n+        -284,\n+        -313,\n+        -261,\n+        -135,\n+        320,\n+        -71,\n+        -100,\n+        490,\n+        -32768,\n+        -32768,\n+        -399,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -136,\n+        -32768,\n+        -287,\n+        -161,\n+        -56,\n+        -281,\n+        -243,\n+        591,\n+        -274,\n+        318,\n+        -281,\n+        -184,\n+        -43,\n+        -206,\n+        -7,\n+        7,\n+        -29,\n+        308,\n+        -222,\n+        -337,\n+        -100,\n+        -69,\n+        -32768,\n+        -32768,\n+        -399,\n+        -32768,\n+        -32768,\n+        -32768,\n+        -187,\n+        -32768,\n+        -240,\n+        -339,\n+        -301,\n+        -129,\n+        -391,\n+        -351,\n+        323,\n+        -269,\n+        251,\n+        24,\n+        -362,\n+        495,\n+        -275,\n+        -318,\n+        -249,\n+        -165,\n+        48,\n+        -331,\n+        -100,\n+        -238,\n+        -32768,\n+        -32768,\n+        -399,\n+        -32768,\n+        -32768\n+      },\n+      lambda { 267, 10, -3 },\n+      kappa { 486253485452101, 10, -16 },\n+      h { 14, 10, -2 },\n+      scalingFactor 100,\n+      lambdaUngapped { 318588052238909, 10, -15 },\n+      kappaUngapped { 158869858915243, 10, -15 },\n+      hUngapped { 43477934178065, 10, -14 }\n+    }\n+  },\n+  params {\n+    pseudocount 10,\n+    rpsdbparams {\n+      matrixName "BLOSUM62"\n+    }\n+  }\n+}\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/chimera.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chimera.fasta Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,168 @@\n+>chimera chunks of AB011145 plus M10051 plus BC112106\n+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC\n+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA\n+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC\n+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT\n+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA\n+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA\n+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA\n+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC\n+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA\n+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT\n+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG\n+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA\n+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA\n+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG\n+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA\n+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC\n+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT\n+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA\n+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC\n+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA\n+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG\n+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC\n+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC\n+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA\n+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT\n+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA\n+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT\n+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT\n+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA\n+TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA\n+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA\n+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG\n+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG\n+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT\n+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT\n+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT\n+TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG\n+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA\n+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT\n+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC\n+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT\n+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT\n+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT\n+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT\n+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT\n+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA\n+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA\n+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA\n+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT\n+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT\n+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT\n+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT\n+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA\n+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT\n+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT\n+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT\n+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT\n+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG\n+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC\n+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT\n+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA\n+GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT\n+TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT\n+GAGAGGGAATAATCTGAGC'..b'GGGGACAAGGCATCC\n+TGTGAAAATGAGTTACTTAAATTTTCTTACATTCGGACATCTTTTGACAAGATCTTGCTG\n+AGATGGGAGCCGTACTGGCCCCCCGACTTCCGAGACCTCTTGGGGTTCATGCTGTTCTAC\n+AAAGAGGCCCCTTATCAGAATGTGACGGAGTTCGATGGGCAGGATGCGTGTGGTTCCAAC\n+AGTTGGACGGTGGTAGACATTGACCCACCCCTGAGGTCCAACGACCCCAAATCACAGAAC\n+CACCCAGGGTGGCTGATGCGGGGTCTCAAGCCCTGGACCCAGTATGCCATCTTTGTGAAG\n+ACCCTGGTCACCTTTTCGGATGAACGCCGGACCTATGGGGCCAAGAGTGACATCATTTAT\n+GTCCAGACAGATGCCACCAACCCCTCTGTGCCCCTGGATCCAATCTCAGTGTCTAACTCA\n+TCATCCCAGATTATTCTGAAGTGGAAACCACCCTCCGACCCCAATGGCAACATCACCCAC\n+TACCTGGTTTTCTGGGAGAGGCAGGCGGAAGACAGTGAGCTGTTCGAGCTGGATTATTGC\n+CTCAAAGGGCTGAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT\n+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG\n+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT\n+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC\n+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG\n+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG\n+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC\n+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC\n+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT\n+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG\n+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG\n+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT\n+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC\n+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT\n+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT\n+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT\n+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC\n+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG\n+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG\n+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC\n+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC\n+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA\n+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG\n+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA\n+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC\n+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC\n+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA\n+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT\n+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA\n+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG\n+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT\n+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC\n+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC\n+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG\n+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT\n+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT\n+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG\n+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC\n+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT\n+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC\n+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT\n+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG\n+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC\n+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA\n+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC\n+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC\n+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG\n+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC\n+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG\n+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA\n+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG\n+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT\n+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG\n+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA\n+GCCATCCCACCAG\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/convert2blastmask_four_human_masked.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert2blastmask_four_human_masked.maskinfo-asn1 Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,158 @@
+Blast-db-mask-info ::= {
+  algo-id 0,
+  algo-program seg,
+  algo-options "window=12; locut=2.2; hicut=2.5",
+  masks {
+    masks {
+      int {
+        from 6,
+        to 18,
+        id swissprot {
+          name "ERP44_HUMAN",
+          accession "Q9BS26",
+          release "reviewed"
+        }
+      },
+      packed-int {
+        {
+          from 11,
+          to 46,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 325,
+          to 332,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 421,
+          to 496,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 501,
+          to 516,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 536,
+          to 558,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 636,
+          to 648,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 737,
+          to 762,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 789,
+          to 806,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 970,
+          to 983,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 999,
+          to 1010,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        }
+      },
+      packed-int {
+        {
+          from 3,
+          to 26,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        },
+        {
+          from 372,
+          to 390,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        },
+        {
+          from 766,
+          to 791,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        },
+        {
+          from 1312,
+          to 1324,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        }
+      },
+      int {
+        from 230,
+        to 246,
+        id swissprot {
+          name "OPSD_HUMAN",
+          accession "P08100",
+          release "reviewed"
+        }
+      }
+    },
+    more FALSE
+  }
+}
b
diff -r 000000000000 -r 432ea9614cc9 test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary
b
Binary file test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/dustmasker_three_human.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dustmasker_three_human.fasta Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,183 @@\n+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds\n+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG\n+CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC\n+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC\n+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA\n+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC\n+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT\n+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA\n+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA\n+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA\n+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC\n+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA\n+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT\n+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG\n+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA\n+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA\n+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG\n+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA\n+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC\n+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT\n+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA\n+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC\n+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA\n+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG\n+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC\n+TATGTGTatttttattttgaataaacagaaagaaattttgggtttttaatttttttCTCC\n+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTaaaaaaaaaaaaaCCTGCTA\n+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT\n+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA\n+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT\n+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT\n+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA\n+TTGCTTATGAAAATTCCATAGTGGTAtttttttGGATTCTTAATGTGTAACTTAAACATA\n+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA\n+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG\n+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG\n+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT\n+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT\n+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACtt\n+tttttCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG\n+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA\n+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT\n+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC\n+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT\n+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT\n+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT\n+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT\n+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT\n+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA\n+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA\n+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA\n+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT\n+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT\n+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT\n+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT\n+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA\n+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT\n+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT\n+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT\n+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT\n+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG\n+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC\n+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT\n+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTA'..b'GAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT\n+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG\n+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT\n+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC\n+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG\n+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG\n+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC\n+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC\n+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT\n+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG\n+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG\n+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT\n+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC\n+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT\n+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT\n+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT\n+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC\n+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG\n+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG\n+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC\n+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC\n+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA\n+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG\n+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA\n+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC\n+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC\n+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA\n+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT\n+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA\n+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG\n+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT\n+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC\n+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC\n+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG\n+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT\n+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT\n+ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC\n+TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA\n+GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT\n+TTTTTCGTTccccccacccgcccccAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT\n+CttttttttttttttttttttttttttttGCTGGTGTCTGAGCTTCAGTATAAAAGACAA\n+AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA\n+>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds\n+CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC\n+CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG\n+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG\n+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC\n+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT\n+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC\n+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT\n+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG\n+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC\n+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA\n+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC\n+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC\n+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG\n+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC\n+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG\n+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA\n+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG\n+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT\n+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG\n+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA\n+GCCATCCCACCAG\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/dustmasker_three_human.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dustmasker_three_human.maskinfo-asn1 Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,64 @@
+Blast-db-mask-info ::= {
+  algo-id 2,
+  algo-program dust,
+  algo-options "window=64; level=20; linker=1",
+  masks {
+    masks {
+      packed-int {
+        {
+          from 1447,
+          to 1495,
+          id local id 1
+        },
+        {
+          from 1540,
+          to 1552,
+          id local id 1
+        },
+        {
+          from 1886,
+          to 1892,
+          id local id 1
+        },
+        {
+          from 2278,
+          to 2284,
+          id local id 1
+        },
+        {
+          from 4409,
+          to 4415,
+          id local id 1
+        },
+        {
+          from 4635,
+          to 4653,
+          id local id 1
+        },
+        {
+          from 4726,
+          to 4734,
+          id local id 1
+        }
+      },
+      packed-int {
+        {
+          from 139,
+          to 219,
+          id local id 2
+        },
+        {
+          from 4569,
+          to 4584,
+          id local id 2
+        },
+        {
+          from 4621,
+          to 4648,
+          id local id 2
+        }
+      }
+    },
+    more FALSE
+  }
+}
b
diff -r 000000000000 -r 432ea9614cc9 test-data/dustmasker_three_human.maskinfo-asn1-binary
b
Binary file test-data/dustmasker_three_human.maskinfo-asn1-binary has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.dbinfo.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.dbinfo.txt Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,7 @@
+Database: Just 4 human proteins
+ 4 sequences; 3,297 total residues
+
+Date: Feb 10, 2014  6:40 PM Longest sequence: 1,382 residues
+
+Volumes:
+ /mnt/galaxy/galaxy_blast/test-data/four_human_proteins_taxid.fasta
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,48 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFP
+NENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSK
+RNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPV
+IAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLL
+RDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEGGFSTVFLVRTHGGIRCALKR
+MYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFC
+DTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQVSYFAFKFAK
+KDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDTIGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLA
+PGEFGNHRPKGALRPGNGPEILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ
+QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQYQQAFFQQQMLAQHQPSQQQA
+SPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSVADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEEL
+LDREFDLLRSNRLEERASSDKNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPENLGHRPLLMDSEDEEEEEKH
+SSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSAQLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNL
+PQHRFPAAGLEQEEFDVFTKAPFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARRHKKVGRRDSQSSNEFLTISD
+SKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLSWHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKM
+DDFGAVPFTELVVQSITPHQSQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPK
+LIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDW
+SRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYT
+MNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRS
+YALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVD
+IDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIIL
+KWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPFEK
+VVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGL
+IVLYEVSYRRYGDEELHLCVSRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSREKITLLRELGQGSFGMVYEG
+NARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKGFTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRP
+EAENNPGRPPPTLQEMIQMAAEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDNCPERVTDLMRMCWQFNPKMR
+PTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEMEFEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIP
+YTHMNGGKKNGRILTLPRSNPS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA
+VADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFT
+WVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES
+ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTT
+ICCGKNPLGDDEASATVSKTETSQVAPA
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta.log.txt Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,5 @@
+New DB title:  Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.phd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta.phd Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,4 @@
+11117184492
+29249033410
+36665887501
+5392473183
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.phi
b
Binary file test-data/four_human_proteins.fasta.phi has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.phr
b
Binary file test-data/four_human_proteins.fasta.phr has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.pin
b
Binary file test-data/four_human_proteins.fasta.pin has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.pog
b
Binary file test-data/four_human_proteins.fasta.pog has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.psd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta.psd Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,4 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
+gnl|bl_ord_id|33
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.psi
b
Binary file test-data/four_human_proteins.fasta.psi has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.psq
b
Binary file test-data/four_human_proteins.fasta.psq has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_masked.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_masked.fasta Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq
+qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy
+qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE
+NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf
+pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme
+fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes
+attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.log.txt Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,5 @@
+New DB title:  Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.phd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.phd Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,4 @@
+11117184492
+29249033410
+36665887501
+5392473183
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.phi
b
Binary file test-data/four_human_proteins_taxid.fasta.phi has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.phr
b
Binary file test-data/four_human_proteins_taxid.fasta.phr has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.pin
b
Binary file test-data/four_human_proteins_taxid.fasta.pin has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.pog
b
Binary file test-data/four_human_proteins_taxid.fasta.pog has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.psd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.psd Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,4 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
+gnl|bl_ord_id|33
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.psi
b
Binary file test-data/four_human_proteins_taxid.fasta.psi has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.psq
b
Binary file test-data/four_human_proteins_taxid.fasta.psq has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/rhodopsin_nucs.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rhodopsin_nucs.fasta Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,161 @@\n+>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA\n+ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCCTTCTCCAACAAAACGGGTGTGGTACGCAGCCCCT\n+TCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTCCTGCT\n+CATCGTGCTTGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACGGTCCAGCACAAGAAGCTGCGCACG\n+CCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCTGACCTCTTCATGGTCTTCGGTGGCTTCACCACCA\n+CCCTCTACACCTCTCTGCATGGATACTTTGTCTTTGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGC\n+CACACTGGGCGGTGAAATTGCCCTGTGGTCTTTGGTGGTCCTGGCCATTGAGCGGTACGTGGTGGTGTGT\n+AAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCATGCCATAATGGGCGTCGCTTTCACCTGGGTCATGG\n+CACTGGCCTGCGCTGCACCCCCCCTCGTTGGTTGGTCCAGGTACATCCCTGAAGGCATGCAGTGTTCATG\n+CGGGATCGACTACTACACACTCAAGCCAGAAGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTC\n+CACTTCACCATCCCCATGATCGTCATCTTCTTTTGCTACGGGCAGCTTGTCTTCACAGTCAAGGAGGCGG\n+CAGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCTGAGAAGGAGGTCACTCGCATGGTCATCATCAT\n+GGTCATTGCTTTCCTGATCTGTTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGG\n+TCCAACTTTGGCCCCATCTTCATGACACTCCCGGCGTTCTTCGCAAAGTCCTCCTCCATCTACAACCCTG\n+TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCC\n+ACTGGGTGATGACGAGGCTTCCACAACCGGTTCCAAGACGGAGACCAGCCAGGTGGCACCGGCCTAA\n+\n+>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds\n+TCTTTCTAGTTTGGGGGGGGGGACTTTAAAGAGCCGCCAATATGAACGGAACAGAAGGCCCAAACTTTTA\n+CATACCCATGTCCAACAAGACTGGGGTGGTGCGAAGCCCCTTTGAATACCCTCAGTATTACCTGGCAGAG\n+CCATGGCAATATTCCATTCTGTGCGCGTACATGTTCCTGCTCATTCTACTTGGGTTCCCAATCAACTTCA\n+TGACCTTGTACGTCACCATCCAGCACAAGAAGCTCCGGACACCCTTAAACTATATCCTGCTGAATTTGGC\n+CTTTGCCAACCACTTCATGGTCCTGTGTGGATTCACGGTGACAATGTACTCCTCAATGAACGGATACTTC\n+ATCCTCGGAGCCACCGGTTGCTATGTTGAAGGCTTCTTCGCTACCCTTGGTGGTGAAATCGCCCTTTGGT\n+CCCTGGTGGTCTTGGCCATTGAACGATACGTGGTCGTCTGTAAGCCCATGAGCAACTTCCGATTTAGTGA\n+GAACCATGCCGTCATGGGCGTAGCGTTCACCTGGATAATGGCTTTGTCCTGTGCTGTTCCTCCACTCCTT\n+GGATGGTCCAGGTACATCCCCGAGGGCATGCAGTGCTCCTGCGGAGTCGACTACTACACCCTGAAGCCCG\n+AGGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTCGTCCACTTCACCATCCCCCTGATTATCATTTT\n+CTTCTGCTATGGCCGCCTGGTGTGCACTGTGAAAGAGGCTGCAGCTCAACAGCAAGAGTCCGCCACCACC\n+CAGAAGGCCGAGAAAGAGGTGACCAGGATGGTGATCATCATGGTGGTCTTCTTCCTTATCTGTTGGGTCC\n+CCTACGCCTCTGTCGCTTTCTTCATCTTCAGCAATCAGGGCTCTGAGTTCGGCCCCATCTTCATGACCGT\n+CCCAGCTTTCTTTGCCAAGAGTTCTTCCATCTACAACCCCGTCATCTACATCATGCTCAACAAGCAGTTC\n+CGTAACTGCATGATCACCACCCTGTGCTGCGGCAAGAATCCCTTTGGAGAAGACGATGCCTCCTCTGCCG\n+CCACCTCCAAGACAGAGGCTTCTTCTGTTTCTTCCAGCCAGGTGTCTCCTGCATAAGACCTTCCACCAGG\n+CCTGTCTCAGGGTCCGCTGCCTCACACAGCTCCCACCGCCCCAACTCCGTCTCCTGCTCGCTAAGGCGGC\n+GAAGTTCCCCTTCCATTACATAAAACGTATCTGTTCAAGAAAGGCGACGACGAAGGAGAAGAAGAGGAGC\n+CCCCCCGAACCCCTTCGCTGCTGCTGAAAACGACTTGATTGCTTCTGCAACGCAACGGGGCCTTACGGCA\n+GCGAAGGGGTTGTCATCCGGACGCGCCAAGAATTCCTTCGAGACTGTAAATATCTTAAAGGAACCGTCCT\n+GCTAGTTACCGACGCCGCTCCTGTAGCCGCCGTTCCCCCGCACTCCGGCCGGTTCATACCTCTTATTTTT\n+TTGCAATGCAACAGAAAATAATATTTTTGTTCCCACGGCTTTTCCCGGTCAGGTCTGGTAGTGGCGGAGA\n+TTGGCCGACCCCTCGCACCTGTAATAAAGCGCAG\n+\n+>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds\n+GTGCCCTTCTCCAACAAGACAGGCGTGGTGCGCAGTCCCTTCGAGCATCCACAGTACTACCTGGCCGAGC\n+CATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTCGGCTTCCCCATCAACTTCCT\n+CACGCTCTATGTCACGGTTCAGCACAAGAAGCTGCGTACGCCTCTCAACTACATCCTGCTCAACCTGGCC\n+GTGGCCGACCTCTTCATGGTCTTCGGAGGCTTCACCACCACCCTCTACACCTCCCTGCATGGATACTTTG\n+TCTTCGGGCCTACGGGATGCAATCTGGAGGGCTTTTTTGCCACCCTGGGAGGTATGAGCTGAGATGCGGG\n+TAAGGAGGAGGCATAGAGGCATCTGGGAACAGTCCCAAGCTTGGGGTGAAGGCTAAGAGGCCTTCTTCCT\n+TGTTCTGTCATTGGCGTCGTCCGAAGCCCTCACTTAATCAACAAACAGTTTGGTGGTGAGGCGCTGAGCT\n+CCATTTGGAGAGGGCAGGTATCGAGCACTGTTTTATCCCCCCTGGAGTGGTGCCATTGCCTTGCTTTACA\n+GCAAAGAAACTGAGGATGAGAGGAGTCGAGGGTCTTGCCAGGTCACATCATGGCAGAGACAGAGCTGAGT\n+TTCAACCCTGCATCTATGTGCAGTTTCCCTTGGAGCAGCTATGTTAGGTCAGACCCACGGTGGGCACTGG\n+GGAGAGAGCTGCACAAGACAGGTCCCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTCCTGATTGCCA\n+GGAGTGATGTGCAGCGCAAATGTCTGAATTCCATTATTATGTGCTCCTTCTTCCTCTGAGCCAAACATCC\n+ATCTTCATGGCTCCTAGAATTGGGTCCCACCCACATGAGCAGGTCATTTTGTTTCCCTAGAGGGGAGAGG\n+TCACT'..b'CTTCAGAGGGTCAGATTTGGGATGAGAGTGGAGGCTGCGAGGGCCTGAGTG\n+GGAAGGGATTGGAGGCAAATCTCACCAACCATGTCAGTTTGCTACACACACTTTGGGTGGACCCTGACCC\n+TGACTCATGCTTCTTGCCTTCCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCCAC\n+TGGGTGACGATGAGGCCTCCACCACTGCCTC\n+\n+>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds\n+GTGCCCTTCTCCAACAAGACGGGTGTGGTGCGCAGCCCCTTCGAGTACCCGCAGTACTACCTGGCTGAGC\n+CCTGGCAGTTCTCCATGCTGGCTGCCTACATGTTTCTGCTGATCGTGCTCGGATTCCCCATCAACTTCCT\n+CACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCT\n+GTGGCCAACCTCTTCATGGTCTTTGGAGGCTTCACCACCACCCTGTATACCTCTATGCATGGATACTTCG\n+TCTTCGGGGCCACGGGATGCAATCTGGAGGGCTTCTTTGCCACGCTGGGCGGTGAAATCGCCCTGTGGTC\n+CCTGGTGGTCCTGGCCATCGAGCGGTATGTGGTGGTCTGCAAGCCCATGAGCAACTTCCGCTTTGGGGAG\n+AACCACGCCATCATGGGCCTCGCCTTCACGTGGGTCATGGCACTGGCCTGCGCTGCACCCCCACTAGCCG\n+GCTGGTCCAGGTACATCCCAGAGGGCATGCAGTGCTCGTGTGGGATTGACTACTACACGCTCAAACCGGA\n+GGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTGTCATTTTC\n+TTCTGCTACGGACAGCTGGTGTTCACAGTGAAGGAGGCGGCTGCCCAGCAGCAGGAGTCAGCCACCACCC\n+AGAAGGCCGAGAAGGAAGTCACGCGCATGGTCATCATCATGGTCGTTGCGTTCCTAATCTGTTGGCTGCC\n+CTACGCCAGCGTGGCATTCTACATCTTTACCCACCAGGGCTCTAACTTTGGCCCTGTCTTCATGACCATC\n+CCGGCATTCTTCGCCAAGTCATCCTCCATCTACAACCCGGTCATCTATATCATGATGAACAAGCAGTTCC\n+GGAACTGCATGCTCACCACCCTCTGCTGTGGCAAGAACCCACTGGGTGATGACGAAGCATCCACCACTGC\n+CTC\n+\n+>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds\n+ATGAACGGGACCGAGGGCCCAAACTTCTACGTGCCTTTCTCCAACAAGACGGGCGTCGTACGCAGCCCCT\n+TCGAGGCGCCGCAGTACTACCTGGCTGAGCCATGGCAGTTCAGCATGCTGGCCGCCTACATGTTCCTGCT\n+GATCATGCTTGGCTTCCCCATCAACTTCCTCACGCTGTACGTCACAGTCCAGCACAAGAAGCTGAGGACC\n+CCCCTCAACTACATCCTGCTCAACCTGGCCGTGGCAGATCTCTTCATGGTGTTCGGGGGCTTCACCACCA\n+CCCTGTATACCTCTCTGCACGGGTACTTCGTGTTCGGTCCGACGGGCTGCAACCTCGAGGGCTTCTTTGC\n+CACCTTAGGCGGTGAAATTGCACTGTGGTCCTTGGTGGTGCTAGCCATCGAGCGGTACGTAGTGGTGTGC\n+AAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCGTCGCATTCACCTGGGTCATGG\n+CTCTGGCCTGTGCGGCCCCCCCCCTCGTCGGCTGGTCTAGATACATCCCGGAGGGGATGCAGTGCTCGTG\n+CGGGATCGATTACTACACGCCCCACGAGGAGACCAACAATGAGTCGTTCGTCATCTACATGTTCGTTGTA\n+CACTTCATCATCCCCCTGATTGTCATATTCTTCTGCTACGGGCAGCTGGTCTTCACCGTCAAGGAGGCTG\n+CAGCCCAGCAGCAGGAGTCGGCCACCACTCAGAAGGCCGAGAAGGAGGTCACGCGTATGGTCATCATCAT\n+GGTCATCGCTTTCCTCATATGCTGGCTGCCCTACGCAGGTGTGGCGTTCTACATCTTCACCCATCAGGGA\n+TCCGACTTTGGCCCCATCTTCATGACCATCCCGGCTTTCTTTGCCAAGACGTCTGCCGTCTATAACCCCG\n+TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGGTCACCACTCTCTGCTGTGGCAAGAACCC\n+CCTAGGTGACGACGAGGCCTCCACGACCGTGTCCAAGACAGAGACCAGCCAAGTGGCCCCTGCCTAA\n+\n+>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds\n+CCGCTACTGACGAACCGCAACCATGAACGGCACTGAGGGACCTAACTTCTACATCCCCATGTCAAACGCC\n+ACTGGTGTAGTGAGGAGTCCATTTGAATACCCGCAGTACTACCTTGCAGAACCATGGGCTTTCTCAGCTC\n+TGTCTGCCTACATGTTCTTCCTGATTATCGCCGGATTCCCCATCAACTTCCTCACCCTGTATGTCACCAT\n+CGAACATAAGAAACTGAGGACCCCACTGAACTACATTCTGCTGAACCTGGCCGTGGCCGACCTCTTCATG\n+GTGTTTGGCGGATTCACCACCACGATGTACACCTCCATGCACGGCTACTTTGTCTTCGGCCCCACCGGCT\n+GCAACATCGAAGGGTTCTTCGCCACCCTCGGCGGCGAGATTGCCCTCTGGTGCCTCGTTGTCCTGGCCAT\n+TGAAAGGTGGATGGTCGTCTGCAAGCCAGTGACCAATTTCCGCTTCGGTGAGAGCCATGCCATCATGGGT\n+GTCATGGTGACCTGGACCATGGCATTGGCCTGTGCCCTCCCCCCTCTCTTCGGCTGGTCTCGGTACATTC\n+CGGAAGGTCTGCAGTGCTCGTGCGGGATCGACTACTATACCCGGGCGCCTGGGATCAACAATGAGTCCTT\n+TGTGATCTACATGTTTACCTGCCACTTCTCCATCCCACTCGCCGTCATCTCTTTCTGCTACGGCCGACTG\n+GTGTGCACCGTCAAAGAGGCCGCTGCCCAGCAACAGGAGTCCGAGACCACCCAGAGGGCTGAGCGGGAGG\n+TCACCCGCATGGTCGTCATCATGGTCATCTCCTTCCTGGTCTGCTGGGTGCCCTATGCCAGTGTGGCCTG\n+GTACATCTTTACCCACCAGGGAAGCACTTTTGGGCCCATCTTCATGACCATTCCATCCTTCTTTGCCAAG\n+AGTTCAGCCCTCTACAACCCCATGATCTACATCTGCATGAACAAGCAGTTCCGCCATTGCATGATCACCA\n+CCCTCTGCTGTGGGAAGAACCCCTTCGAGGAGGAGGATGGAGCGTCCGCCACTAGCTCTAAAACTGAGGC\n+TTCATCCGTGTCCTCCAGCTCTGTCTCCCCGGCATAAACCTTGTTTGACCGAACACCACGCATCAACACA\n+AAGACCAAGAATGCTGACTAAATGCTAACATTTCAGGGAAATCCAAAGACTTTTTACTATTTTTTTACAC\n+AACCATATAGGTTGCAAACAGAGGTTTAGCCCTGTTTACAGGTTGTCATCAATGTGATGTCAGTATGTAC\n+AATATAGTCAACTTGATAGCAAGTTGTTGGCTTATTTCAGATTGTATGGGCAATGTAATCAACCATATGT\n+GAAATAAATTGCAA\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/rhodopsin_proteins.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rhodopsin_proteins.fasta Fri Jan 30 08:27:28 2015 -0500
[
@@ -0,0 +1,43 @@
+>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]
+MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT
+PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC
+KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVV
+HFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQG
+SNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA
+
+>gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin
+MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRT
+PLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVC
+KPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVV
+HFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQG
+SEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQ
+VSPA
+
+>gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
+VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA
+VADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE
+NHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF
+FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTL
+PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS
+
+>gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus]
+VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA
+VANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE
+NHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF
+FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTI
+PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS
+
+>gi|223523|prf||0811197A rhodopsin [Bos taurus]
+MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRT
+PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC
+KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVH
+FIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGS
+DFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA
+
+>gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster]
+MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRT
+PLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVC
+KPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTC
+HFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQG
+STFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSS
+VSPA
b
diff -r 000000000000 -r 432ea9614cc9 test-data/segmasker_four_human.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/segmasker_four_human.fasta Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq
+qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy
+qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE
+NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf
+pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme
+fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes
+attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
b
diff -r 000000000000 -r 432ea9614cc9 test-data/segmasker_four_human.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/segmasker_four_human.maskinfo-asn1 Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,114 @@
+Blast-db-mask-info ::= {
+  algo-id 1,
+  algo-program seg,
+  algo-options "window=12; locut=2.2; hicut=2.5",
+  masks {
+    masks {
+      int {
+        from 6,
+        to 18,
+        id local id 1
+      },
+      packed-int {
+        {
+          from 11,
+          to 46,
+          id local id 2
+        },
+        {
+          from 325,
+          to 332,
+          id local id 2
+        },
+        {
+          from 421,
+          to 443,
+          id local id 2
+        },
+        {
+          from 437,
+          to 450,
+          id local id 2
+        },
+        {
+          from 447,
+          to 496,
+          id local id 2
+        },
+        {
+          from 501,
+          to 516,
+          id local id 2
+        },
+        {
+          from 536,
+          to 554,
+          id local id 2
+        },
+        {
+          from 545,
+          to 558,
+          id local id 2
+        },
+        {
+          from 636,
+          to 648,
+          id local id 2
+        },
+        {
+          from 737,
+          to 762,
+          id local id 2
+        },
+        {
+          from 789,
+          to 806,
+          id local id 2
+        },
+        {
+          from 970,
+          to 983,
+          id local id 2
+        },
+        {
+          from 999,
+          to 1010,
+          id local id 2
+        }
+      },
+      packed-int {
+        {
+          from 3,
+          to 26,
+          id local id 3
+        },
+        {
+          from 372,
+          to 390,
+          id local id 3
+        },
+        {
+          from 766,
+          to 782,
+          id local id 3
+        },
+        {
+          from 780,
+          to 791,
+          id local id 3
+        },
+        {
+          from 1312,
+          to 1324,
+          id local id 3
+        }
+      },
+      int {
+        from 230,
+        to 246,
+        id local id 4
+      }
+    },
+    more FALSE
+  }
+}
b
diff -r 000000000000 -r 432ea9614cc9 test-data/segmasker_four_human.maskinfo-asn1-binary
b
Binary file test-data/segmasker_four_human.maskinfo-asn1-binary has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/tblastn_four_human_vs_rhodopsin.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastn_four_human_vs_rhodopsin.html Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,787 @@\n+<HTML>\n+<TITLE>BLAST Search Results</TITLE>\n+<BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">\n+<PRE>\n+\n+<b>TBLASTN 2.2.30+</b>\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA\n+\n+Length=1047\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n+\n+Gapped\n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete\n+cds\n+\n+Length=1574\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n+\n+Gapped\n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434\n+rhodopsin (RHO) gene, exons 1 through 5 and partial cds\n+\n+Length=4301\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n+\n+Gapped\n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin\n+(RHO) mRNA, partial cds\n+\n+Length=983\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n+\n+Gapped\n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for\n+rhodopsin, complete cds\n+\n+Length=1047\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n+\n+Gapped\n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh\n+water form rod opsin, complete cds\n+\n+Length=1344\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n+\n+Gapped\n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens\n+GN=BMP2K PE=1 SV=2\n+\n+Length=1161\n+\n+<b>Subject=</b> gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA\n+\n+Length=1047\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda      K        H        a         alpha\n+   0.334    0.170    0.615    0.522     1.92 \n+\n+Gapped\n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n+\n+Effective search space used: 370988\n+\n+\n+<b>Query'..b'  711 bits (1640),  Expect = 0.0, Method: Compositional matrix adjust.\n+ Identities = 325/348 (93%), Positives = 337/348 (97%), Gaps = 0/348 (0%)\n+ Frame = +1\n+\n+Query  1     MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY  60\n+             MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLY\n+Sbjct  1     MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY  180\n+\n+Query  61    VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG  120\n+             VTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLG\n+Sbjct  181   VTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLG  360\n+\n+Query  121   GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP  180\n+             GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIP\n+Sbjct  361   GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIP  540\n+\n+Query  181   EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES  240\n+             EG+QCSCGIDYYT   E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQES\n+Sbjct  541   EGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQES  720\n+\n+Query  241   ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI  300\n+             ATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+\n+Sbjct  721   ATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAV  900\n+\n+Query  301   YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA  348\n+             YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA\n+Sbjct  901   YNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA  1044\n+\n+\n+\n+Lambda      K        H        a         alpha\n+   0.351    0.182    0.707    0.522     1.92 \n+\n+Gapped\n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n+\n+Effective search space used: 109230\n+\n+\n+<b>Query=</b> sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1\n+\n+Length=348\n+\n+<b>Subject=</b> gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh\n+water form rod opsin, complete cds\n+\n+Length=1344\n+\n+\n+ Score =   626 bits (1444),  Expect = 0.0, Method: Compositional matrix adjust.\n+ Identities = 281/342 (82%), Positives = 311/342 (91%), Gaps = 1/342 (0%)\n+ Frame = +2\n+\n+Query  1     MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY  60\n+             MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLY\n+Sbjct  23    MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLY  202\n+\n+Query  61    VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG  120\n+             VT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLG\n+Sbjct  203   VTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLG  382\n+\n+Query  121   GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP  180\n+             GEIALW LVVLAIER++VVCKP++NFRFGE HAIMGV  TW MALACA PPL GWSRYIP\n+Sbjct  383   GEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIP  562\n+\n+Query  181   EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES  240\n+             EGLQCSCGIDYYT  P +NNESFVIYMF  HF+IP+ +I FCYG+LV TVKEAAAQQQES\n+Sbjct  563   EGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQES  742\n+\n+Query  241   ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI  300\n+              TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA YIFTHQGS FGPIFMTIP+FFAKS+A+\n+Sbjct  743   ETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSAL  922\n+\n+Query  301   YNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE  341\n+             YNP+IYI MNKQFR CM+TT+CCGKNP   +D ASAT SKTE\n+Sbjct  923   YNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE  1048\n+\n+\n+\n+Lambda      K        H        a         alpha\n+   0.351    0.182    0.707    0.522     1.92 \n+\n+Gapped\n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n+\n+Effective search space used: 109230\n+\n+\n+\n+\n+Matrix: BLOSUM80\n+Gap Penalties: Existence: 10, Extension: 1\n+Neighboring words threshold: 14\n+Window for multiple hits: 25\n+</PRE>\n+</BODY>\n+</HTML>\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/tblastn_four_human_vs_rhodopsin.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastn_four_human_vs_rhodopsin.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,10 @@
+sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0   732
+sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0   646
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72   151
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72   126
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67   229
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32   122
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7
+sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0   658
+sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0   711
+sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0   626
b
diff -r 000000000000 -r 432ea9614cc9 test-data/tblastn_four_human_vs_rhodopsin.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastn_four_human_vs_rhodopsin.xml Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,741 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+  <BlastOutput_program>tblastn</BlastOutput_program>\n+  <BlastOutput_version>TBLASTN 2.2.30+</BlastOutput_version>\n+  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+  <BlastOutput_db></BlastOutput_db>\n+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+  <BlastOutput_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def>\n+  <BlastOutput_query-len>406</BlastOutput_query-len>\n+  <BlastOutput_param>\n+    <Parameters>\n+      <Parameters_matrix>BLOSUM80</Parameters_matrix>\n+      <Parameters_expect>1e-10</Parameters_expect>\n+      <Parameters_gap-open>10</Parameters_gap-open>\n+      <Parameters_gap-extend>1</Parameters_gap-extend>\n+      <Parameters_filter>F</Parameters_filter>\n+    </Parameters>\n+  </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+  <Iteration_iter-num>1</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+  <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>19</Statistics_hsp-len>\n+      <Statistics_eff-space>127710</Statistics_eff-space>\n+      <Statistics_kappa>0.071</Statistics_kappa>\n+      <Statistics_lambda>0.299</Statistics_lambda>\n+      <Statistics_entropy>0.27</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>2</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+  <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>19</Statistics_hsp-len>\n+      <Statistics_eff-space>127710</Statistics_eff-space>\n+      <Statistics_kappa>0.071</Statistics_kappa>\n+      <Statistics_lambda>0.299</Statistics_lambda>\n+      <Statistics_entropy>0.27</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>3</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+  <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>19</Statistics_hsp-len>\n+      <Statistics_eff-space>127710</Statistics_eff-space>\n+      <Statistics_kappa>0.071</Statistics_kappa>\n+      <Statistics_lambda>0.299</Statistics_lambda>\n+      <Statistics_entropy>0.27</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>4</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmi'..b'YYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>\n+      <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT   E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>18</Statistics_hsp-len>\n+      <Statistics_eff-space>109230</Statistics_eff-space>\n+      <Statistics_kappa>0.071</Statistics_kappa>\n+      <Statistics_lambda>0.299</Statistics_lambda>\n+      <Statistics_entropy>0.27</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>24</Iteration_iter-num>\n+  <Iteration_query-ID>Query_4</Iteration_query-ID>\n+  <Iteration_query-def>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>\n+  <Iteration_query-len>348</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+  <Hit_num>1</Hit_num>\n+  <Hit_id>Subject_6</Hit_id>\n+  <Hit_def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Hit_def>\n+  <Hit_accession>Subject_6</Hit_accession>\n+  <Hit_len>1344</Hit_len>\n+  <Hit_hsps>\n+    <Hsp>\n+      <Hsp_num>1</Hsp_num>\n+      <Hsp_bit-score>626.708</Hsp_bit-score>\n+      <Hsp_score>1444</Hsp_score>\n+      <Hsp_evalue>0</Hsp_evalue>\n+      <Hsp_query-from>1</Hsp_query-from>\n+      <Hsp_query-to>341</Hsp_query-to>\n+      <Hsp_hit-from>23</Hsp_hit-from>\n+      <Hsp_hit-to>1048</Hsp_hit-to>\n+      <Hsp_query-frame>0</Hsp_query-frame>\n+      <Hsp_hit-frame>2</Hsp_hit-frame>\n+      <Hsp_identity>281</Hsp_identity>\n+      <Hsp_positive>311</Hsp_positive>\n+      <Hsp_gaps>1</Hsp_gaps>\n+      <Hsp_align-len>342</Hsp_align-len>\n+      <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE</Hsp_qseq>\n+      <Hsp_hseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE</Hsp_hseq>\n+      <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE HAIMGV  TW MALACA PPL GWSRYIPEGLQCSCGIDYYT  P +NNESFVIYMF  HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR CM+TT+CCGKNP   +D ASAT SKTE</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>18</Statistics_hsp-len>\n+      <Statistics_eff-space>109230</Statistics_eff-space>\n+      <Statistics_kappa>0.071</Statistics_kappa>\n+      <Statistics_lambda>0.299</Statistics_lambda>\n+      <Statistics_entropy>0.27</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/tblastn_four_human_vs_rhodopsin_ext.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,10 @@
+sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0   732 gi|57163782|ref|NM_001009242.1| 1689 336 343 0 98.56 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 1047 N/A
+sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0   646 gi|2734705|gb|U59921.1|BBU59921 1489 290 320 1 93.57 0 3 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 1574 N/A
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72   151 gi|283855845|gb|GQ290303.1| 342 69 73 0 98.65 0 3 ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ 348 4301 N/A
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72   126 gi|283855845|gb|GQ290303.1| 284 54 57 0 96.61 0 2 RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS 348 4301 N/A
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67   229 gi|283855845|gb|GQ290303.1| 523 107 109 0 98.20 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG 348 4301 N/A
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32   122 gi|283855845|gb|GQ290303.1| 276 55 56 0 94.92 0 3 LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR 348 4301 N/A
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7 gi|283855845|gb|GQ290303.1| 125 23 24 0 92.31 0 1 QFRNCMLTTICCGKNPLGDDEASATV QFRNCMLTTLCCGKNPLGDDEASTTA 348 4301 N/A
+sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0   658 gi|283855822|gb|GQ290312.1| 1517 310 322 0 98.77 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT 348 983 N/A
+sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0   711 gi|18148870|dbj|AB062417.1| 1640 325 337 0 96.84 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 1047 N/A
+sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0   626 gi|12583664|dbj|AB043817.1| 1444 281 311 1 90.94 0 2 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 1344 N/A
b
diff -r 000000000000 -r 432ea9614cc9 test-data/tblastx_rhodopsin_vs_three_human.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastx_rhodopsin_vs_three_human.tabular Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,117 @@\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t97.39\t230\t6\t0\t1\t690\t88\t777\t0.0\t  559\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.12\t102\t6\t0\t742\t1047\t829\t1134\t0.0\t  236\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t91.22\t148\t13\t0\t1046\t603\t1133\t690\t0.0\t  308\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.32\t88\t5\t0\t566\t303\t653\t390\t0.0\t  207\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t96.34\t82\t3\t0\t248\t3\t335\t90\t0.0\t  182\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t83.33\t204\t34\t0\t18\t629\t105\t716\t4e-158\t  404\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t75.28\t89\t22\t0\t780\t1046\t867\t1133\t4e-158\t  161\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t81.28\t203\t38\t0\t609\t1\t696\t88\t5e-153\t  360\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t80.60\t67\t13\t0\t916\t716\t1003\t803\t5e-153\t  135\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t70.27\t37\t11\t0\t1047\t937\t1134\t1024\t5e-153\t64.2\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t100.00\t7\t0\t0\t646\t626\t733\t713\t5e-153\t24.0\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t89.23\t65\t7\t0\t460\t266\t547\t353\t4e-105\t  167\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t89.58\t48\t5\t0\t184\t41\t271\t128\t4e-105\t  104\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t77.78\t45\t10\t0\t882\t748\t969\t835\t4e-105\t93.9\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t67.86\t28\t9\t0\t1045\t962\t1132\t1049\t4e-105\t51.9\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t59.09\t22\t9\t0\t586\t521\t673\t608\t4e-105\t33.1\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t81.40\t86\t16\t0\t296\t553\t383\t640\t2e-87\t  185\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t84.38\t32\t5\t0\t11\t106\t98\t193\t2e-87\t74.8\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t71.43\t35\t10\t0\t941\t1045\t1028\t1132\t2e-87\t61.6\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.44\t18\t1\t0\t794\t847\t881\t934\t2e-87\t50.1\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t83.61\t238\t39\t0\t18\t731\t64\t777\t0.0\t  507\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t82.35\t85\t15\t0\t783\t1037\t829\t1083\t0.0\t  188\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t70.96\t303\t88\t0\t925\t17\t971\t63\t2e-130\t  435\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t72.22\t18\t5\t0\t1027\t974\t1073\t1020\t2e-130\t35.0\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t55.32\t188\t84\t0\t605\t42\t651\t88\t7e-89\t  245\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t61.11\t72\t28\t0\t1037\t822\t1083\t868\t7e-89\t91.3\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t49.02\t204\t104\t0\t29\t640\t75\t686\t4e-78\t  197\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t66.04\t53\t18\t0\t860\t1018\t906\t1064\t4e-78\t85.8\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t44.44\t27\t15\t0\t689\t769\t735\t815\t4e-78\t32.2\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t47.47\t198\t104\t0\t633\t40\t679\t86\t4e-65\t  177\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t68.09\t47\t15\t0\t1017\t877\t1063\t923\t4e-65\t80.3\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t57.89\t114\t48\t0\t265\t606\t311\t652\t3e-46\t  137\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t46.30\t54\t29\t0\t19\t180\t65\t226\t3e-46\t52.4\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t96.40\t111\t4\t0\t1\t333\t118\t450\t0.0\t  264\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t92.31\t65\t5\t0\t3174\t3368\t829\t1023\t0.0\t  151\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t96.43\t56\t2\t0\t2855\t3022\t616\t783\t0.0\t  141\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t93.22\t59\t4\t0\t1404\t1580\t442\t618\t0.0\t  138\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t92.00\t25\t2\t0\t4222\t4296\t1021\t1095\t0.0\t64.3\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t88.89\t9\t1\t0\t3128\t3154\t783\t809\t0.0\t22.6\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t78.38\t111\t24\t0\t333\t1\t450\t118\t7e-171\t  212\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t88.75\t80\t9\t0\t3367'..b'319\t72\t0\t3\t959\t120\t1076\t4e-174\t  593\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t79.07\t129\t27\t0\t558\t172\t675\t289\t2e-133\t  248\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t76.83\t82\t19\t0\t963\t718\t1080\t835\t2e-133\t  159\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t84.09\t44\t7\t0\t133\t2\t250\t119\t2e-133\t97.3\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t73.08\t78\t21\t0\t433\t200\t550\t317\t6e-102\t  145\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t70.15\t67\t20\t0\t799\t599\t916\t716\t6e-102\t  106\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t80.49\t41\t8\t0\t123\t1\t240\t118\t6e-102\t84.5\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t77.78\t27\t6\t0\t553\t473\t670\t590\t6e-102\t51.9\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t71.43\t14\t4\t0\t889\t848\t1006\t965\t6e-102\t32.7\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t81.82\t11\t2\t0\t958\t926\t1075\t1043\t6e-102\t28.6\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t84.85\t33\t5\t0\t239\t337\t356\t454\t4e-48\t72.5\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t86.67\t30\t4\t0\t2\t91\t119\t208\t4e-48\t71.2\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t81.40\t43\t8\t0\t404\t532\t521\t649\t4e-48\t47.3\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t88.89\t18\t2\t0\t764\t817\t881\t934\t4e-48\t44.6\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t87.50\t8\t1\t0\t935\t958\t1052\t1075\t4e-48\t21.7\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t93.91\t230\t14\t0\t1\t690\t88\t777\t0.0\t  538\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t91.18\t102\t9\t0\t742\t1047\t829\t1134\t0.0\t  233\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t88.83\t188\t21\t0\t566\t3\t653\t90\t0.0\t  394\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t84.06\t138\t22\t0\t1046\t633\t1133\t720\t0.0\t  260\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t67.11\t228\t75\t0\t684\t1\t771\t88\t7e-132\t  333\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t67.27\t110\t36\t0\t1045\t716\t1132\t803\t7e-132\t  141\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t70.20\t151\t45\t0\t3\t455\t90\t542\t1e-128\t  236\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t64.04\t89\t32\t0\t780\t1046\t867\t1133\t1e-128\t  136\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.22\t74\t25\t0\t510\t731\t597\t818\t1e-128\t  111\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.04\t106\t36\t0\t242\t559\t329\t646\t2e-58\t  161\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t85.71\t21\t3\t0\t92\t154\t179\t241\t2e-58\t53.8\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t73.68\t19\t5\t0\t791\t847\t878\t934\t2e-58\t39.1\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t61.29\t62\t24\t0\t424\t239\t511\t326\t4e-55\t81.3\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t74.36\t39\t10\t0\t133\t17\t220\t104\t4e-55\t69.8\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t65.71\t35\t12\t0\t882\t778\t969\t865\t4e-55\t56.3\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t58.14\t43\t18\t0\t649\t521\t736\t608\t4e-55\t50.6\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.67\t12\t4\t0\t972\t937\t1059\t1024\t4e-55\t23.9\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t82.13\t235\t42\t0\t11\t715\t76\t780\t0.0\t  498\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t78.31\t83\t18\t0\t770\t1018\t835\t1083\t0.0\t  177\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t72.29\t332\t92\t0\t1017\t22\t1082\t87\t1e-150\t  516\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t48.30\t147\t76\t0\t712\t272\t777\t337\t2e-98\t  169\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t54.17\t72\t33\t0\t1030\t815\t1095\t880\t2e-98\t  103\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t47.83\t69\t36\t0\t220\t14\t285\t79\t2e-98\t83.5\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t72.00\t25\t7\t0\t782\t708\t847\t773\t2e-98\t45.1\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t56.00\t75\t33\t0\t532\t756\t597\t821\t5e-65\t87.7\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t39.42\t104\t63\t0\t19\t330\t84\t395\t5e-65\t86.8\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t56.90\t58\t25\t0\t829\t1002\t894\t1067\t5e-65\t81.3\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t50.00\t30\t15\t0\t388\t477\t453\t542\t5e-65\t33.6\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.dbinfo.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.dbinfo.txt Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,7 @@
+Database: Just 3 human mRNA sequences
+ 3 sequences; 10,732 total bases
+
+Date: Dec 26, 2014  5:54 AM Longest sequence: 4,796 bases
+
+Volumes:
+ /mnt/galaxy/galaxy_blast/test-data/three_human_mRNA.fasta
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,183 @@\n+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.\n+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG\n+CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC\n+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC\n+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA\n+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC\n+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT\n+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA\n+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA\n+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA\n+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC\n+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA\n+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT\n+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG\n+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA\n+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA\n+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG\n+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA\n+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC\n+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT\n+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA\n+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC\n+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA\n+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG\n+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC\n+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC\n+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA\n+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT\n+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA\n+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT\n+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT\n+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA\n+TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA\n+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA\n+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG\n+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG\n+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT\n+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT\n+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT\n+TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG\n+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA\n+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT\n+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC\n+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT\n+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT\n+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT\n+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT\n+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT\n+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA\n+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA\n+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA\n+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT\n+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT\n+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT\n+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT\n+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA\n+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT\n+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT\n+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT\n+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT\n+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG\n+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC\n+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT\n+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGT'..b'AAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT\n+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG\n+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT\n+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC\n+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG\n+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG\n+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC\n+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC\n+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT\n+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG\n+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG\n+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT\n+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC\n+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT\n+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT\n+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT\n+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC\n+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG\n+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG\n+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC\n+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC\n+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA\n+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG\n+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA\n+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC\n+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC\n+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA\n+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT\n+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA\n+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG\n+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT\n+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC\n+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC\n+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG\n+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT\n+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT\n+ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC\n+TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA\n+GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT\n+TTTTTCGTTCCCCCCACCCGCCCCCAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT\n+CTTTTTTTTTTTTTTTTTTTTTTTTTTTTGCTGGTGTCTGAGCTTCAGTATAAAAGACAA\n+AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA\n+>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.\n+CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC\n+CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG\n+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG\n+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC\n+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT\n+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC\n+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT\n+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG\n+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC\n+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA\n+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC\n+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC\n+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG\n+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC\n+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG\n+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA\n+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG\n+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT\n+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG\n+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA\n+GCCATCCCACCAG\n'
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta.log.txt Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,5 @@
+New DB title:  Just 3 human mRNA sequences
+Sequence type: Nucleotide
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nhd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta.nhd Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,3 @@
+12956943350
+13082197871
+19180330422
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nhi
b
Binary file test-data/three_human_mRNA.fasta.nhi has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nhr
b
Binary file test-data/three_human_mRNA.fasta.nhr has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nin
b
Binary file test-data/three_human_mRNA.fasta.nin has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nog
b
Binary file test-data/three_human_mRNA.fasta.nog has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nsd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta.nsd Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,3 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nsi
b
Binary file test-data/three_human_mRNA.fasta.nsi has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nsq
b
Binary file test-data/three_human_mRNA.fasta.nsq has changed
b
diff -r 000000000000 -r 432ea9614cc9 test-data/tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tool_data_table_conf.xml.test Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,15 @@
+<tables>
+    <!-- test files! -->
+    <table name="blastdb" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/blastdb.loc" />
+    </table>
+    <table name="blastdb_p" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/blastdb_p.loc" />
+    </table>
+    <table name="blastdb_d" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/blastdb_d.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 432ea9614cc9 tool-data/blastdb.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/blastdb.loc.sample Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,39 @@
+#This is a sample file distributed with Galaxy that is used to define a
+#list of nucleotide BLAST databases, using three columns tab separated
+#(longer whitespace are TAB characters):
+#
+#<unique_id> <database_caption> <base_name_path>
+#
+#The captions typically contain spaces and might end with the build date.
+#It is important that the actual database name does not have a space in
+#it, and that there are only two tabs on each line.
+#
+#So, for example, if your database is nt and the path to your base name 
+#is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry 
+#would look like this:
+#
+#nt_02_Dec_2009      nt 02 Dec 2009      /depot/data2/galaxy/blastdb/nt/nt.chunk
+#
+#and your /depot/data2/galaxy/blastdb/nt directory would contain all of 
+#your "base names" (e.g.):
+#
+#-rw-r--r--  1 wychung galaxy  23437408 2008-04-09 11:26 nt.chunk.00.nhr
+#-rw-r--r--  1 wychung galaxy   3689920 2008-04-09 11:26 nt.chunk.00.nin
+#-rw-r--r--  1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq
+#...etc...
+#
+#Your blastdb.loc file should include an entry per line for each "base name" 
+#you have stored.  For example:
+#
+#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk
+#wgs_30_Nov_2009 wgs 30 Nov 2009 /depot/data2/galaxy/blastdb/wgs/wgs.chunk
+#test_20_Sep_2008 test 20 Sep 2008 /depot/data2/galaxy/blastdb/test/test
+#...etc...
+#
+#You can download the NCBI provided protein databases like NT from here:
+#ftp://ftp.ncbi.nlm.nih.gov/blast/db/
+#
+#See also blastdb_p.loc which is for any protein BLAST database, and
+#blastdb_d.loc which is for any protein domains databases (like CDD).
+
+
b
diff -r 000000000000 -r 432ea9614cc9 tool-data/blastdb_d.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/blastdb_d.loc.sample Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,35 @@
+#This is a sample file distributed with Galaxy that is used to define a
+#list of protein domain databases, using three columns tab separated
+#(longer whitespace are TAB characters):
+#
+#<unique_id> <database_caption> <base_name_path>
+#
+#The captions typically contain spaces and might end with the build date.
+#It is important that the actual database name does not have a space in it,
+#and that there are only two tabs on each line.
+#
+#You can download the NCBI provided databases as tar-balls from here:
+#ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/
+#
+#So, for example, if your database is CDD and the path to your base name
+#is /data/blastdb/Cdd, then the blastdb_d.loc entry would look like this:
+#
+#Cdd{tab}NCBI Conserved Domains Database (CDD){tab}/data/blastdb/Cdd
+#
+#and your /data/blastdb directory would contain all of the files associated
+#with the database, /data/blastdb/Cdd.*.
+#
+#Your blastdb_d.loc file should include an entry per line for each "base name"
+#you have stored. For example:
+#
+#Cdd NCBI CDD /data/blastdb/domains/Cdd
+#Kog KOG (eukaryotes) /data/blastdb/domains/Kog
+#Cog COG (prokaryotes) /data/blastdb/domains/Cog
+#Pfam Pfam-A /data/blastdb/domains/Pfam
+#Smart SMART /data/blastdb/domains/Smart
+#Tigr TIGR /data/blastdb/domains/Tigr
+#Prk Protein Clusters database /data/blastdb/domains/Prk
+#...etc...
+#
+#See also blastdb.loc which is for any nucleotide BLAST database, and
+#blastdb_p.loc which is for any protein BLAST databases.
b
diff -r 000000000000 -r 432ea9614cc9 tool-data/blastdb_p.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/blastdb_p.loc.sample Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,30 @@
+#This is a sample file distributed with Galaxy that is used to define a
+#list of protein BLAST databases, using three columns tab separated
+#(longer whitespace are TAB characters):
+#
+#<unique_id> <database_caption> <base_name_path>
+#
+#The captions typically contain spaces and might end with the build date.
+#It is important that the actual database name does not have a space in
+#it, and that there are only two tabs on each line.
+#
+#So, for example, if your database is NR and the path to your base name
+#is /data/blastdb/nr, then the blastdb_p.loc entry would look like this:
+#
+#nr{tab}NCBI NR (non redundant){tab}/data/blastdb/nr
+#
+#and your /data/blastdb directory would contain all of the files associated
+#with the database, /data/blastdb/nr.*.
+#
+#Your blastdb_p.loc file should include an entry per line for each "base name"
+#you have stored. For example:
+#
+#nr_05Jun2010 NCBI NR (non redundant) 05 Jun 2010 /data/blastdb/05Jun2010/nr
+#nr_15Aug2010 NCBI NR (non redundant) 15 Aug 2010 /data/blastdb/15Aug2010/nr
+#...etc...
+#
+#You can download the NCBI provided protein databases like NR from here:
+#ftp://ftp.ncbi.nlm.nih.gov/blast/db/
+#
+#See also blastdb.loc which is for any nucleotide BLAST database, and
+#blastdb_d.loc which is for any protein domains databases (like CDD).
b
diff -r 000000000000 -r 432ea9614cc9 tool-data/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/tool_data_table_conf.xml.sample Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,14 @@
+<tables>
+    <table name="blastdb" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/blastdb.loc" />
+    </table>
+    <table name="blastdb_p" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/blastdb_p.loc" />
+    </table>
+    <table name="blastdb_d" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/blastdb_d.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/README.rst Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,266 @@\n+Galaxy wrappers for NCBI BLAST+ suite\n+=====================================\n+\n+These wrappers are copyright 2010-2014 by Peter Cock (The James Hutton Institute,\n+UK) and additional contributors including Edward Kirton, John Chilton,\n+Nicola Soranzo, Jim Johnson, and Bjoern Gruening.\n+\n+See the licence text below.\n+\n+Currently tested with NCBI BLAST 2.2.30+ (i.e. version 2.2.30 of BLAST+),\n+and does not work with the NCBI \'legacy\' BLAST suite (e.g. ``blastall``).\n+\n+Note that these wrappers (and the associated datatypes) were originally\n+distributed as part of the main Galaxy repository, but as of August 2012\n+moved to the Galaxy Tool Shed as ``ncbi_blast_plus`` (and ``blast_datatypes``).\n+My thanks to Dannon Baker from the Galaxy development team for his assistance\n+with this.\n+\n+These wrappers are available from the Galaxy Tool Shed at:\n+http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+\n+\n+Citation\n+========\n+\n+Please cite the following paper (currently available as a preprint):\n+\n+NCBI BLAST+ integrated into Galaxy.\n+P.J.A. Cock, J.M. Chilton, B. Gruening, J.E. Johnson, N. Soranzo\n+bioRxiv DOI: http://dx.doi.org/10.1101/014043 (preprint)\n+\n+You should also cite the NCBI BLAST+ tools:\n+\n+BLAST+: architecture and applications.\n+C. Camacho et al. BMC Bioinformatics 2009, 10:421.\n+DOI: http://dx.doi.org/10.1186/1471-2105-10-421\n+\n+\n+Automated Installation\n+======================\n+\n+Galaxy should be able to automatically install the dependencies, i.e. the\n+BLAST+ binaries and the ``blast_datatypes`` repository which defines the\n+BLAST XML file format (``blastxml``), protein and nucleotide BLAST databases\n+(``blastdbp`` and ``blastdbn``), and so on.\n+\n+See the configuration notes below.\n+\n+Manual Installation\n+===================\n+\n+For those not using Galaxy\'s automated installation from the Tool Shed, put\n+the XML and Python files in the ``tools/ncbi_blast_plus/`` folder and add the\n+XML files to your ``tool_conf.xml`` as normal.  For example, use::\n+\n+  <section name="NCBI BLAST+" id="ncbi_blast_plus_tools">\n+    <tool file="ncbi_blast_plus/ncbi_blastn_wrapper.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_blastp_wrapper.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_blastx_wrapper.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_tblastn_wrapper.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_tblastx_wrapper.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_makeblastdb.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_dustmasker_wrapper.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_blastdbcmd_info.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_rpsblast_wrapper.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml" />\n+    <tool file="ncbi_blast_plus/ncbi_makeprofiledb.xml" />\n+    <tool file="ncbi_blast_plus/blastxml_to_tabular.xml" />\n+  </section>\n+\n+You will also need to install ``blast_datatypes`` from the Tool Shed. This\n+defines the BLAST XML file format (``blastxml``), BLAST databases, etc:\n+\n+* http://toolshed.g2.bx.psu.edu/view/devteam/blast_datatypes\n+\n+As described above for an automated installation, you must also tell Galaxy\n+about any system level BLAST databases using the ``tool-data/blastdb*.loc``\n+files. Also merge the ``tool-data/tool_data_table_conf.xml.sample`` contents\n+into your ``tool_data_table_conf.xml`` file.\n+\n+You must install the NCBI BLAST+ standalone tools somewhere on the system\n+path. Currently the unit tests are written using BLAST+ 2.2.30.\n+\n+Run the functional tests (adjusting the section identifier to match your\n+``tool_conf.xml.sample`` file)::\n+\n+    ./run_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools\n+\n+Configuration\n+=============\n+\n+You must tell Galaxy about any system level BLAST databases using configuration\n+files ``blastdb.loc`` (nucleotide databases like NT) and ``blastdb_p.loc``\n+(protein databases like NR), and ``blastdb_d.loc`` (protein domain databases\n+like CDD or SMART) which ar'..b'STP task option.\n+        - Wrappers for segmasker, dustmasker and convert2blastmask\n+          (contribution from Bjoern Gruening).\n+        - Supports using maskinfo with ``makeblastdb`` wrapper.\n+        - Supports setting a taxonomy ID in ``makeblastdb`` wrapper.\n+        - Subtle changes like new conditional settings will require some old\n+          workflows be updated to cope.\n+v0.1.01 - Requires ``blastdbd`` datatype (``blast_datatypes`` v0.0.19).\n+        - Wrapper for makeprofiledb added to create protein domain databases\n+          (based on contribution from Bjoern Gruening).\n+        - The RPS-BLAST and RPS-TBLASTN wrappers support using a protein\n+          domain database from the user\'s history.\n+        - Tool definitions now embed citation information (by John Chilton).\n+        - BLAST tools support GI and SeqID filters (added by Bjoern Gruening).\n+v0.1.02 - Now depends on ``package_blast_plus_2_2_30`` in ToolShed.\n+        - Tests updated for BLAST+ 2.2.30 instead of BLAST+ 2.2.29.\n+        - New tasks ``blastp-fast``, ``blastx-fast`` and ``tblastn-fast``.\n+        - New minimum query HSP coverage option, ``-qcov_hsp_perc``.\n+        - Removed ``-word_size`` from RPS-BLAST and RPS-TBLASTN wrappers, this\n+          is set during database construction and should not have been offered\n+          as a command line option in releases prior to BLAST+ 2.2.30.\n+        - BLAST database ``blastdb*.loc`` files now accessed via the XML\n+          table definitions in Galaxy\'s ``tool_data_table_conf.xml`` file,\n+          setup via ``tool-data/tool_data_table_conf.xml.sample``\n+        - Replace ``.extra_files_path`` with ``.files_path`` (internal change,\n+\t  thanks to Bjoern Gruening and John Chilton).\n+        - Added "NCBI BLAST+ integrated into Galaxy" preprint citation.\n+======= ======================================================================\n+\n+\n+Bug Reports\n+===========\n+\n+You can file an issue here https://github.com/peterjc/galaxy_blast/issues or ask\n+us on the Galaxy development list http://lists.bx.psu.edu/listinfo/galaxy-dev\n+\n+\n+Developers\n+==========\n+\n+This script and related tools were originally developed on the \'tools\' branch\n+of the following Mercurial repository:\n+https://bitbucket.org/peterjc/galaxy-central/\n+\n+As of July 2013, development is continuing on a dedicated GitHub repository:\n+https://github.com/peterjc/galaxy_blast\n+\n+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use\n+the following command from the GitHub repository root folder::\n+\n+    $ tools/ncbi_blast_plus/make_ncbi_blast_plus.sh\n+\n+This simplifies ensuring a consistent set of files is bundled each time,\n+including all the relevant test files.\n+\n+When updating the version of BLAST+, many of the sample data files used for\n+the unit tests must be regenerated. This script automates that task::\n+\n+    $ tools/ncbi_blast_plus/update_test_files.sh\n+\n+\n+Licence (MIT)\n+=============\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n+THE SOFTWARE.\n'
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/blastxml_to_tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Fri Jan 30 08:27:28 2015 -0500
[
b'@@ -0,0 +1,344 @@\n+#!/usr/bin/env python\n+"""Convert a BLAST XML file to tabular output.\n+\n+Takes three command line options, input BLAST XML filename, output tabular\n+BLAST filename, output format (std for standard 12 columns, or ext for the\n+extended 24 columns offered in the BLAST+ wrappers).\n+\n+The 12 columns output are \'qseqid sseqid pident length mismatch gapopen qstart\n+qend sstart send evalue bitscore\' or \'std\' at the BLAST+ command line, which\n+mean:\n+   \n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+     1 qseqid    Query Seq-id (ID of your sequence)\n+     2 sseqid    Subject Seq-id (ID of the database hit)\n+     3 pident    Percentage of identical matches\n+     4 length    Alignment length\n+     5 mismatch  Number of mismatches\n+     6 gapopen   Number of gap openings\n+     7 qstart    Start of alignment in query\n+     8 qend      End of alignment in query\n+     9 sstart    Start of alignment in subject (database hit)\n+    10 send      End of alignment in subject (database hit)\n+    11 evalue    Expectation value (E-value)\n+    12 bitscore  Bit score\n+====== ========= ============================================\n+\n+The additional columns offered in the Galaxy BLAST+ wrappers are:\n+\n+====== ============= ===========================================\n+Column NCBI name     Description\n+------ ------------- -------------------------------------------\n+    13 sallseqid     All subject Seq-id(s), separated by \';\'\n+    14 score         Raw score\n+    15 nident        Number of identical matches\n+    16 positive      Number of positive-scoring matches\n+    17 gaps          Total number of gaps\n+    18 ppos          Percentage of positive-scoring matches\n+    19 qframe        Query frame\n+    20 sframe        Subject frame\n+    21 qseq          Aligned part of query sequence\n+    22 sseq          Aligned part of subject sequence\n+    23 qlen          Query sequence length\n+    24 slen          Subject sequence length\n+    25 salltitles    All subject titles, separated by \'&lt;&gt;\'\n+====== ============= ===========================================\n+\n+Most of these fields are given explicitly in the XML file, others some like\n+the percentage identity and the number of gap openings must be calculated.\n+\n+Be aware that the sequence in the extended tabular output or XML direct from\n+BLAST+ may or may not use XXXX masking on regions of low complexity. This\n+can throw the off the calculation of percentage identity and gap openings.\n+[In fact, both BLAST 2.2.24+ and 2.2.25+ have a subtle bug in this regard,\n+with these numbers changing depending on whether or not the low complexity\n+filter is used.]\n+\n+This script attempts to produce identical output to what BLAST+ would have done.\n+However, check this with "diff -b ..." since BLAST+ sometimes includes an extra\n+space character (probably a bug).\n+"""\n+import sys\n+import re\n+import os\n+from optparse import OptionParser\n+\n+if "-v" in sys.argv or "--version" in sys.argv:\n+    print "v0.1.04"\n+    sys.exit(0)\n+\n+if sys.version_info[:2] >= ( 2, 5 ):\n+    try:\n+        from xml.etree import cElementTree as ElementTree\n+    except ImportError:\n+        from xml.etree import ElementTree as ElementTree\n+else:\n+    from galaxy import eggs\n+    import pkg_resources; pkg_resources.require( "elementtree" )\n+    from elementtree import ElementTree\n+\n+def stop_err( msg ):\n+    sys.stderr.write("%s\\n" % msg)\n+    sys.exit(1)\n+\n+if len(sys.argv) == 4 and sys.argv[3] in ["std", "x22", "ext"]:\n+    #False positive if user really has a BLAST XML file called \'std\' or \'ext\'...\n+    stop_err("""ERROR: The script API has changed, sorry.\n+\n+Instead of the old style:\n+\n+$ python blastxml_to_tabular.py input.xml output.tabular std\n+\n+Please use:\n+\n+$ python blastxml_to_tabular.py -o output.tabular -c std input.xml\n+\n+For more information, use:\n+\n+$ python blastxml_to_tabular.py -h\n+""")\n+\n+usage = """'..b'X")))\n+\n+\n+                    evalue = hsp.findtext("Hsp_evalue")\n+                    if evalue == "0":\n+                        evalue = "0.0"\n+                    else:\n+                        evalue = "%0.0e" % float(evalue)\n+                \n+                    bitscore = float(hsp.findtext("Hsp_bit-score"))\n+                    if bitscore < 100:\n+                        #Seems to show one decimal place for lower scores\n+                        bitscore = "%0.1f" % bitscore\n+                    else:\n+                        #Note BLAST does not round to nearest int, it truncates\n+                        bitscore = "%i" % bitscore\n+\n+                    values = [qseqid,\n+                              sseqid,\n+                              pident,\n+                              length, #hsp.findtext("Hsp_align-len")\n+                              str(mismatch),\n+                              gapopen,\n+                              hsp.findtext("Hsp_query-from"), #qstart,\n+                              hsp.findtext("Hsp_query-to"), #qend,\n+                              hsp.findtext("Hsp_hit-from"), #sstart,\n+                              hsp.findtext("Hsp_hit-to"), #send,\n+                              evalue, #hsp.findtext("Hsp_evalue") in scientific notation\n+                              bitscore, #hsp.findtext("Hsp_bit-score") rounded\n+                              ]\n+\n+                    if extended:\n+                        try:\n+                            sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >"))\n+                            salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >"))\n+                        except IndexError as e:\n+                            stop_err("Problem splitting multuple hits?\\n%r\\n--> %s" % (hit_def, e))\n+                        #print hit_def, "-->", sallseqid\n+                        positive = hsp.findtext("Hsp_positive")\n+                        ppos = "%0.2f" % (100*float(positive)/float(length))\n+                        qframe = hsp.findtext("Hsp_query-frame")\n+                        sframe = hsp.findtext("Hsp_hit-frame")\n+                        if blast_program == "blastp":\n+                            #Probably a bug in BLASTP that they use 0 or 1 depending on format\n+                            if qframe == "0": qframe = "1"\n+                            if sframe == "0": sframe = "1"\n+                        slen = int(hit.findtext("Hit_len"))\n+                        values.extend([sallseqid,\n+                                       hsp.findtext("Hsp_score"), #score,\n+                                       nident,\n+                                       positive,\n+                                       hsp.findtext("Hsp_gaps"), #gaps,\n+                                       ppos,\n+                                       qframe,\n+                                       sframe,\n+                                       #NOTE - for blastp, XML shows original seq, tabular uses XXX masking\n+                                       q_seq,\n+                                       h_seq,\n+                                       str(qlen),\n+                                       str(slen),\n+                                       salltitles,\n+                                       ])\n+                    if cols:\n+                        #Only a subset of the columns are needed\n+                        values = [values[colnames.index(c)] for c in cols]\n+                    #print "\\t".join(values) \n+                    output_handle.write("\\t".join(values) + "\\n")\n+            # prevents ElementTree from growing large datastructure\n+            root.clear()\n+            elem.clear()\n+\n+\n+if options.output:\n+    outfile = open(options.output, "w")\n+else:\n+    outfile = sys.stdout\n+\n+for in_file in args:\n+    blast_program = None\n+    convert(in_file, outfile)\n+\n+if options.output:\n+    outfile.close()\n+else:\n+    #Using stdout\n+    pass\n+\n'
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/blastxml_to_tabular.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,215 @@\n+<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.1.04">\n+    <description>Convert BLAST XML output to tabular</description>\n+    <version_command interpreter="python">blastxml_to_tabular.py --version</version_command>\n+    <command interpreter="python">\n+blastxml_to_tabular.py -o "$tabular_file"\n+#if $output.out_format == "cols":\n+#set cols = (str($output.std_cols)+","+str($output.ext_cols)).replace("None", " ").replace(",,", ",").replace(",", " ")\n+-c "$cols"\n+#else\n+-c "$output.out_format"\n+#end if\n+#for i in $blastxml_file#"${i}" #end for#\n+    </command>\n+    <stdio>\n+        <!-- Anything other than zero is an error -->\n+        <exit_code range="1:" />\n+        <exit_code range=":-1" />\n+    </stdio>\n+    <inputs>\n+        <param name="blastxml_file" type="data" format="blastxml" multiple="true" label="BLAST results as XML"/>\n+        <conditional name="output">\n+          <param name="out_format" type="select" label="Output format">\n+            <option value="std" selected="True">Tabular (standard 12 columns)</option>\n+            <option value="ext">Tabular (extended 25 columns)</option>\n+            <option value="cols">Tabular (select columns to output)</option>\n+          </param>\n+          <when value="std"/>\n+          <when value="ext"/>\n+          <when value="cols">\n+            <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns">\n+              <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option>\n+              <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option>\n+              <option selected="true" value="pident">pident = Percentage of identical matches</option>\n+              <option selected="true" value="length">length = Alignment length</option>\n+              <option selected="true" value="mismatch">mismatch = Number of mismatches</option>\n+              <option selected="true" value="gapopen">gapopen = Number of gap openings</option>\n+              <option selected="true" value="qstart">qstart = Start of alignment in query</option>\n+              <option selected="true" value="qend">qend = End of alignment in query</option>\n+              <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option>\n+              <option selected="true" value="send">send = End of alignment in subject (database hit)</option>\n+              <option selected="true" value="evalue">evalue = Expectation value (E-value)</option>\n+              <option selected="true" value="bitscore">bitscore = Bit score</option>\n+            </param>\n+            <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns">\n+              <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a \';\'</option>\n+              <option value="score">score = Raw score</option>\n+              <option value="nident">nident = Number of identical matches</option>\n+              <option value="positive">positive = Number of positive-scoring matches</option>\n+              <option value="gaps">gaps = Total number of gaps</option>\n+              <option value="ppos">ppos = Percentage of positive-scoring matches</option>\n+              <option value="qframe">qframe = Query frame</option>\n+              <option value="sframe">sframe = Subject frame</option>\n+              <option value="qseq">qseq = Aligned part of query sequence</option>\n+              <option value="sseq">sseq = Aligned part of subject sequence</option>\n+              <option value="qlen">qlen = Query sequence length</option>\n+              <option value="slen">slen = Subject sequence length</option>\n+              <option value="salltitles">salltitles = All subject title(s), separated by a \'&lt;&gt;\'</option>\n+            </param>\n+          </when>\n+        </conditional>\n+    </inputs>\n+    <outputs>\n+        <data nam'..b'ame="out_format" value="std" />\n+            <output name="tabular_file" file="blastn_rhodopsin_vs_three_human_converted.tabular" ftype="tabular" />\n+        </test>\n+        <test>\n+            <param name="blastxml_file" value="blastn_rhodopsin_vs_three_human.xml" ftype="blastxml" />\n+            <param name="out_format" value="cols" />\n+            <param name="std_cols" value="qseqid,sseqid,pident" />\n+            <param name="ext_cols" value="qlen,slen" />\n+            <output name="tabular_file" file="blastn_rhodopsin_vs_three_human.columns.tabular" ftype="tabular" />\n+        </test>\n+    </tests>\n+    <help>\n+    \n+**What it does**\n+\n+NCBI BLAST+ (and the older NCBI \'legacy\' BLAST) can output in a range of\n+formats including tabular and a more detailed XML format. A complex workflow\n+may need both the XML and the tabular output - but running BLAST twice is\n+slow and wasteful.\n+\n+This tool takes the BLAST XML output and can convert it into the\n+standard 12 column tabular equivalent:\n+\n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+     1 qseqid    Query Seq-id (ID of your sequence)\n+     2 sseqid    Subject Seq-id (ID of the database hit)\n+     3 pident    Percentage of identical matches\n+     4 length    Alignment length\n+     5 mismatch  Number of mismatches\n+     6 gapopen   Number of gap openings\n+     7 qstart    Start of alignment in query\n+     8 qend      End of alignment in query\n+     9 sstart    Start of alignment in subject (database hit)\n+    10 send      End of alignment in subject (database hit)\n+    11 evalue    Expectation value (E-value)\n+    12 bitscore  Bit score\n+====== ========= ============================================\n+\n+The BLAST+ tools can optionally output additional columns of information,\n+but this takes longer to calculate. Most (but not all) of these columns are\n+included by selecting the extended tabular output. The extra columns are\n+included *after* the standard 12 columns. This is so that you can write\n+workflow filtering steps that accept either the 12 or 25 column tabular\n+BLAST output. This tool now uses this extended 25 column output by default.\n+\n+====== ============= ===========================================\n+Column NCBI name     Description\n+------ ------------- -------------------------------------------\n+    13 sallseqid     All subject Seq-id(s), separated by a \';\'\n+    14 score         Raw score\n+    15 nident        Number of identical matches\n+    16 positive      Number of positive-scoring matches\n+    17 gaps          Total number of gaps\n+    18 ppos          Percentage of positive-scoring matches\n+    19 qframe        Query frame\n+    20 sframe        Subject frame\n+    21 qseq          Aligned part of query sequence\n+    22 sseq          Aligned part of subject sequence\n+    23 qlen          Query sequence length\n+    24 slen          Subject sequence length\n+    25 salltitles    All subject title(s), separated by a \'&lt;&gt;\'\n+====== ============= ===========================================\n+\n+Beware that the XML file (and thus the conversion) and the tabular output\n+direct from BLAST+ may differ in the presence of XXXX masking on regions\n+low complexity (columns 21 and 22), and thus also calculated figures like\n+the percentage identity (column 3).\n+\n+**References**\n+\n+If you use this Galaxy tool in work leading to a scientific publication please\n+cite:\n+\n+Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n+Galaxy tools and workflows for sequence analysis with applications\n+in molecular plant pathology. PeerJ 1:e167\n+http://dx.doi.org/10.7717/peerj.167\n+\n+This wrapper is available to install into other Galaxy Instances via the Galaxy\n+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+    </help>\n+    <citations>\n+      <citation type="doi">10.7717/peerj.167</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/check_no_duplicates.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/check_no_duplicates.py Fri Jan 30 08:27:28 2015 -0500
[
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+"""Check for duplicate sequence identifiers in FASTA files.
+
+This is run as a pre-check before makeblastdb, in order to avoid
+a regression bug in BLAST+ 2.2.28 which fails to catch this. See:
+http://blastedbio.blogspot.co.uk/2012/10/my-ids-not-good-enough-for-ncbi-blast.html
+
+This script takes one or more FASTA filenames as input, and
+will return a non-zero error if any duplicate identifiers
+are found.
+"""
+import sys
+import os
+
+if "-v" in sys.argv or "--version" in sys.argv:
+    print("v0.0.22")
+    sys.exit(0)
+
+def stop_err(msg, error=1):
+    sys.stderr.write("%s\n" % msg)
+    sys.exit(error)
+
+
+identifiers = set()
+files = 0
+for filename in sys.argv[1:]:
+    if not os.path.isfile(filename):
+        stop_err("Missing FASTA file %r" % filename, 2)
+    files += 1
+    handle = open(filename)
+    for line in handle:
+        if line.startswith(">"):
+            #The split will also take care of the new line character,
+            #e.g. ">test\n" and ">test description here\n" both give "test"
+            seq_id = line[1:].split(None, 1)[0]
+            if seq_id in identifiers:
+                handle.close()
+                stop_err("Repeated identifiers, e.g. %r" % seq_id, 1)
+            identifiers.add(seq_id)
+    handle.close()
+if not files:
+    stop_err("No FASTA files given to check for duplicates", 3)
+elif files == 1:
+    print("%i sequences" % len(identifiers))
+else:
+    print("%i sequences in %i FASTA files" % (len(identifiers), files))
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,48 @@
+<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.1.02">
+    <description>Show BLAST database information from blastdbcmd</description>
+    <macros>
+        <token name="@BINARY@">blastdbcmd</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out "$info"
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <expand macro="input_conditional_choose_db_type" />
+    </inputs>
+    <outputs>
+        <data name="info" format="txt" label="${db_opts.database.fields.name} info" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="db_opts|db_type" value="prot" />
+            <param name="db_opts|database" value="four_human_proteins" />
+            <output name="info" file="four_human_proteins.dbinfo.txt" ftype="txt" lines_diff="4" />
+        </test>
+        <test>
+            <param name="db_opts|db_type" value="nucl" />
+            <param name="db_opts|database" value="three_human_mRNA" />
+            <output name="info" file="three_human_mRNA.dbinfo.txt" ftype="txt" lines_diff="4" />
+        </test>
+    </tests>
+    <help>
+    
+**What it does**
+
+Calls the NCBI BLAST+ blastdbcmd command line tool with the -info
+switch to give summary information about a BLAST database, such as
+the size (number of sequences and total length) and date.
+
+-------
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
[
@@ -0,0 +1,118 @@
+<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.1.02">
+    <description>Extract sequence(s) from BLAST database</description>
+    <macros>
+        <token name="@BINARY@">blastdbcmd</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}"
+
+##TODO: What about -ctrl_a and -target_only as advanced options?
+
+#if $id_opts.id_type=="file":
+-entry_batch "$id_opts.entries"
+#else:
+##Perform some simple search/replaces to remove whitespace
+##and make it comma separated, and escape any pipe characters
+-entry "$id_opts.entries.replace('\r',',').replace('\n',',').replace(' ','').replace(',,',',').replace(',,',',').strip(',').replace('|','\|')"
+#end if
+
+##When building a BLAST database, to ensure unique IDs makeblastdb will
+##do things like turning a FASTA entry with ID of ERP44 into lcl|ERP44
+##(if using -parse_seqids) or simply assign it an ID using the record
+##number like gnl|BL_ORD_ID|123 (to cope with duplicate IDs in the FASTA
+##file). In -parse_seqids mode, a duplicate FASTA ID gives an error.
+##
+##The BLAST plain text and XML output will contain these BLAST IDs, but
+##the tabular output does not (at least, not in BLAST 2.2.25+).
+##Therefore in general, Galaxy users won't care about the (internal)
+##BLAST identifiers.
+##
+##The blastdbcmd FASTA output will also contain these IDs, but in the
+##context of the BLAST tabular output they are not helpful. Therefore
+##to recover the original ID as used in the FASTA file for makeblastdb
+##we need a litte post processing.
+##
+##We remove the NCBI's lcl|... or gnl|BL_ORD_ID|123 prefixes
+##using sed, however the exact syntax differs for Mac OS X's sed
+
+#if str($outfmt)=="blastid":
+-out "$seq"
+#else if sys.platform == "darwin":
+| sed -E 's/^>(lcl\||gnl\|BL_ORD_ID\|[0-9]* )/>/1' > "$seq"
+#else:
+| sed 's/>\(lcl|\|gnl|BL_ORD_ID|[0-9]* \)/>/1' > "$seq"
+#end if
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <expand macro="input_conditional_choose_db_type" />
+        <conditional name="id_opts">
+            <param name="id_type" type="select" label="Type of identifier list">
+              <option value="file">From file</option>
+              <option value="prompt">User entered</option>
+            </param>
+            <when value="file">
+                <param name="entries" type="data" format="txt,tabular" label="Sequence identifier(s)" help="Plain text file with one ID per line (i.e. single column tabular file)"/>
+            </when>
+            <when value="prompt">
+                <param name="entries" type="text" label="Sequence identifier(s)" help="Comma or new line separated list." optional="False" area="True" size="10x30"/>
+            </when>
+        </conditional>
+        <param name="outfmt" type="select" label="Output format">
+          <option value="original">FASTA with original identifiers</option>
+          <option value="blastid">FASTA with BLAST assigned identifiers</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="seq" format="fasta" label="Sequences from ${db_opts.database.fields.name}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="db_opts|db_type" value="prot" />
+            <param name="db_opts|database" value="four_human_proteins" />
+            <param name="id_opts|id_type" value="prompt" />
+            <param name="id_opts|entries" value="all" />
+            <param name="outfmt" value="original" />
+            <output name="seq" file="four_human_proteins.fasta" ftype="fasta" />
+        </test>
+    </tests>
+    <help>
+    
+**What it does**
+
+Extracts FASTA formatted sequences from a BLAST database
+using the NCBI BLAST+ blastdbcmd command line tool.
+
+.. class:: warningmark
+
+**BLAST assigned identifiers**
+
+When a BLAST database is constructed from a FASTA file, the
+original identifiers can be replaced with BLAST assigned
+identifiers, partly to ensure uniqueness. e.g. Sometimes
+a prefix of 'lcl|' is added (lcl is short for local),
+or an arbitrary name starting 'gnl|BL_ORD_ID|' is created.
+
+If you are using the tabular output from BLAST, it will contain
+the original identifiers - not the BLAST assigned identifiers
+suitable for use with the blastdbcmd tool.
+
+If you are using the XML or plain text output, this will also
+contain the BLAST assigned identifiers. However, this means
+getting a list of BLAST assigned identifiers isn't straightforward.
+
+-------
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />    
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,156 @@
+<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.1.02">
+    <description>Search nucleotide database with nucleotide query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into parts -->
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
+    <macros>
+        <token name="@BINARY@">blastn</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastn
+-query "$query"
+@BLAST_DB_SUBJECT@
+-task $blast_type
+-evalue $evalue_cutoff
+@BLAST_OUTPUT@
+@THREADS@
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.strand
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_WORD_SIZE@
+#if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ):
+-perc_identity $adv_opts.identity_cutoff
+#end if
+$adv_opts.ungapped
+@ADV_ID_LIST_FILTER@
+@ADV_QCOV_HSP_PERC@
+## End of advanced options:
+#end if
+    </command>
+
+    <expand macro="stdio" />
+
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+
+        <expand macro="input_conditional_nucleotide_db" />
+
+        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
+            <option value="megablast">megablast - Traditional megablast used to find very similar (e.g., intraspecies or closely related species) sequences</option>
+            <option value="blastn">blastn - Traditional BLASTN requiring an exact match of 11, for somewhat similar sequences</option>
+            <option value="blastn-short">blastn-short - BLASTN program optimized for sequences shorter than 50 bases</option>
+            <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option>
+            <!-- Using BLAST 2.2.24+ this gives an error:
+            BLAST engine error: Program type 'vecscreen' not supported
+            <option value="vecscreen">vecscreen</option>
+            In any case, vecscreen has gone in BLAST+ 2.2.28
+            -->
+            <!-- BLAST+ 2.2.28 also offers rmblastn -->
+        </param>
+        <expand macro="input_evalue" />
+        <expand macro="input_out_format" />
+        <expand macro="advanced_options">
+            <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' -->
+            <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" />
+            <expand macro="input_strand" />
+            <expand macro="input_max_hits" />
+            <param name="identity_cutoff" type="float" min="0" max="100" value="0" label="Percent identity cutoff (-perc_identity)" help="Use zero for no cutoff" />
+            
+            <!-- I'd like word_size to be optional, with minimum 4 for blastn -->
+            <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4.">
+                <validator type="in_range" min="0" />
+            </param>
+            <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
+            <expand macro="input_parse_deflines" />
+            <expand macro="advanced_optional_id_files" />
+            <expand macro="input_qcov_hsp_perc" />
+        </expand>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="${blast_type.value} $query.name vs @ON_DB_SUBJECT@">
+            <expand macro="output_change_format" />
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-40" />
+            <param name="out_format" value="5" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastn_rhodopsin_vs_three_human.xml" ftype="blastxml" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-40" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastn_rhodopsin_vs_three_human.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-40" />
+            <param name="out_format" value="cols" />
+            <param name="std_cols" value="qseqid,sseqid,pident" />
+            <param name="ext_cols" value="qlen,slen" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastn_rhodopsin_vs_three_human.columns.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="chimera.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="three_human_mRNA" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="max_hits" value="1" />
+            <output name="output1" file="blastn_chimera_vs_three_human_max1.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="chimera.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="three_human_mRNA" />
+            <param name="out_format" value="0" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="max_hits" value="1" />
+            <output name="output1" file="blastn_chimera_vs_three_human_max1.txt" ftype="txt" />
+        </test>
+    </tests>
+    <help>
+    
+@SEARCH_TIME_WARNING@
+
+**What it does**
+
+Search a *nucleotide database* using a *nucleotide query*,
+using the NCBI BLAST+ blastn command line tool.
+Algorithms include blastn, megablast, and discontiguous megablast.
+
+@FASTA_WARNING@
+
+-----
+
+@OUTPUT_FORMAT@
+
+-------
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,158 @@
+<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.1.02">
+    <description>Search protein database with protein query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into parts -->
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
+    <macros>
+        <token name="@BINARY@">blastp</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastp
+-query "$query"
+@BLAST_DB_SUBJECT@
+-task $blast_type
+-evalue $evalue_cutoff
+@BLAST_OUTPUT@
+@THREADS@
+#if $adv_opts.adv_opts_selector=="advanced":
+-matrix $adv_opts.matrix
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_WORD_SIZE@
+##Ungapped disabled for now - see comments below
+##$adv_opts.ungapped
+@ADV_ID_LIST_FILTER@
+@ADV_QCOV_HSP_PERC@
+## End of advanced options:
+#end if
+    </command>
+
+    <expand macro="stdio" />
+
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
+
+        <expand macro="input_conditional_protein_db" />
+
+        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
+            <option value="blastp">blastp - Traditional BLASTP to compare a protein query to a protein database</option>
+            <option value="blastp-fast">blastp-fast - Use longer words for seeding, faster but less accurate</option>
+            <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option>
+        </param>
+        <expand macro="input_evalue" />
+        <expand macro="input_out_format" />
+        <expand macro="advanced_options">
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_false" />
+            <expand macro="input_scoring_matrix" />
+            <expand macro="input_max_hits" />
+            <expand macro="input_word_size" />
+            <!--
+            Can't use '-ungapped' on its own, error back is:
+            Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
+            Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.'
+            <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
+            -->
+            <expand macro="input_parse_deflines" />
+            <expand macro="advanced_optional_id_files" />
+            <expand macro="input_qcov_hsp_perc" />
+        </expand>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="${blast_type.value} $query.name vs @ON_DB_SUBJECT@">
+            <expand macro="output_change_format" />
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="5" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="False" />
+            <param name="matrix" value="BLOSUM62" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="True" />
+            <param name="qcov_hsp_perc" value="25" />
+            <output name="output1" file="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="False" />
+            <param name="matrix" value="BLOSUM62" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="True" />
+            <param name="qcov_hsp_perc" value="25" />
+            <output name="output1" file="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="ext" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="False" />
+            <param name="matrix" value="BLOSUM62" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="True" />
+            <param name="qcov_hsp_perc" value="25" />
+            <output name="output1" file="blastp_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastp_rhodopsin_vs_four_human.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+    
+@SEARCH_TIME_WARNING@
+
+**What it does**
+
+Search a *protein database* using a *protein query*,
+using the NCBI BLAST+ blastp command line tool.
+
+@FASTA_WARNING@
+
+-----
+
+@OUTPUT_FORMAT@
+
+-------
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />    
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,138 @@
+<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.1.02">
+    <description>Search protein database with translated nucleotide query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into parts -->
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
+    <macros>
+        <token name="@BINARY@">blastx</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastx
+-query "$query"
+@BLAST_DB_SUBJECT@
+-query_gencode $query_gencode
+-task $blast_type
+-evalue $evalue_cutoff
+@BLAST_OUTPUT@
+@THREADS@
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.strand
+-matrix $adv_opts.matrix
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_WORD_SIZE@
+$adv_opts.ungapped
+@ADV_ID_LIST_FILTER@
+@ADV_QCOV_HSP_PERC@
+## End of advanced options:
+#end if
+    </command>
+
+    <expand macro="stdio" />
+
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+
+        <expand macro="input_conditional_protein_db" />
+        <expand macro="input_query_gencode" />
+        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
+            <option value="blastx">blastx - Traditional BLASTX to compare translated nucleotide query to protein database</option>
+            <option value="blastx-fast">blastx-fast - Use longer words for seeding, faster but less accurate</option>
+        </param>
+        <expand macro="input_evalue" />
+
+        <expand macro="input_out_format" />
+        <expand macro="advanced_options">
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_true" />
+            <expand macro="input_strand" />
+            <expand macro="input_scoring_matrix" />
+            <expand macro="input_max_hits" />
+            <expand macro="input_word_size" />
+            <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
+            <expand macro="input_parse_deflines" />
+            <expand macro="advanced_optional_id_files" />
+            <expand macro="input_qcov_hsp_perc" />
+        </expand>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="blastx $query.name vs @ON_DB_SUBJECT@">
+            <expand macro="output_change_format" />
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="5" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastx_rhodopsin_vs_four_human.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="ext" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastx_rhodopsin_vs_four_human_ext.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="cols" />
+            <param name="std_cols" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" />
+            <param name="ext_cols" value="sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" />
+            <param name="ids_cols" value="qgi,qacc,qaccver,sallseqid,sgi,sallgi,sacc,saccver,sallacc,stitle" />
+            <param name="misc_cols" value="sstrand,frames,btop,qcovs,qcovhsp" />
+            <param name="tax_cols" value="staxids,sscinames,scomnames,sblastnames,sskingdoms" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastx_rhodopsin_vs_four_human_all.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+    
+@SEARCH_TIME_WARNING@
+
+**What it does**
+
+Search a *protein database* using a *translated nucleotide query*,
+using the NCBI BLAST+ blastx command line tool.
+
+@FASTA_WARNING@
+
+-----
+
+@OUTPUT_FORMAT@
+
+-------
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,88 @@
+<tool id="ncbi_convert2blastmask_wrapper" name="NCBI BLAST+ convert2blastmask" version="0.1.02">
+    <description>Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb</description>
+    <macros>
+        <token name="@BINARY@">convert2blastmask</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+convert2blastmask
+-in $infile
+-masking_algorithm "$masking_algorithm"
+-masking_options "$masking_options"
+$parse_seqids
+-out "$outfile"
+-outfmt $outformat
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="fasta" label="masked FASTA file"/> 
+        <param name="masking_algorithm" type="select" label="Used masking algorithm">
+            <option value="dust">DUST</option>
+            <option value="seg" selected="true">SEG</option>
+            <option value="windowmasker">windowmasker</option>
+            <option value="repeat">repeat</option>
+            <option value="other">other</option>
+        </param>
+        <param name="masking_options" type="text" value="" size="20" label="Masking algorithm options to create the masked input" 
+            help ="free text to describe the options used to create the masking files. (-masking_options)">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable" />
+            </sanitizer>
+        </param>
+        <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="true" label="Parse Seq-ids in FASTA input" help="(-parse_seqids)" />
+        <param name="outformat" type="select" label="Output format">
+            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
+            <option value="maskinfo_asn1_text" selected="True">maskinfo ASN.1 text</option>
+            <option value="maskinfo_xml">maskinfo_xml</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="maskinfo-asn1" label="SEG Masked File">
+            <change_format>
+                <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+ <!--
+                <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+ -->
+                <when input="outformat" value="maskinfo_xml" format="xml" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" />
+            <param name="masking_algorithm" value="seg" />
+            <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" />
+            <param name="parse_seqids" value="True" />
+            <param name="outformat" value="maskinfo_asn1_bin" />
+            <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1-binary" />
+        </test>
+        <test>
+            <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" />
+            <param name="masking_algorithm" value="seg" />
+            <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" />
+            <param name="parse_seqids" value="True" />
+            <param name="outformat" value="maskinfo_asn1_text" />
+            <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb.
+
+More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,100 @@
+<tool id="ncbi_dustmasker_wrapper" name="NCBI BLAST+ dustmasker" version="0.1.02">
+    <!-- dustmasker wrapper from Edward Kirton and Nicola Soranzo -->
+    <description>masks low complexity regions</description>
+    <macros>
+        <token name="@BINARY@">dustmasker</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+dustmasker
+#if $db_opts.db_opts_selector == "db":
+  -in "${db_opts.database.fields.path}" -infmt blastdb
+#elif $db_opts.db_opts_selector == "histdb":
+  -in "${os.path.join($db_opts.histdb.files_path, 'blastdb')}" -infmt blastdb
+#else:
+  -in "$subject" -infmt fasta
+#end if
+-out "$outfile"
+-window $window -level $level -linker $linker -outfmt $outformat
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <expand macro="input_conditional_nucleotide_db" />
+        <param name="window" type="integer" value="64" label="DUST window length" />
+        <param name="level" type="integer" value="20" label="DUST level" help="Score threshold for subwindows" />
+        <param name="linker" type="integer" value="1" label="DUST linker" help="How close masked intervals should be to get merged together" />
+        <param name="outformat" type="select" label="Output format">
+            <!-- seqloc_* formats are not very useful
+                 and what BLAST+ calls 'interval' is not what Galaxy calls interval format
+            -->
+            <option value="fasta">FASTA</option>
+            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
+            <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option>
+            <option value="maskinfo_xml">maskinfo_xml</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="maskinfo-asn1" label="DUST Masked File">
+            <change_format>
+                <when input="outformat" value="fasta" format="fasta" />
+                <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+ <!--
+                <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+ -->
+                <when input="outformat" value="maskinfo_xml" format="xml" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="window" value="64" />
+            <param name="level" value="20" />
+            <param name="linker" value="1" />
+            <param name="outformat" value="fasta" />
+            <output name="outfile" file="dustmasker_three_human.fasta" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="window" value="64" />
+            <param name="level" value="20" />
+            <param name="linker" value="1" />
+            <param name="outformat" value="maskinfo_asn1_bin" />
+            <output name="outfile" file="dustmasker_three_human.maskinfo-asn1-binary" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="window" value="64" />
+            <param name="level" value="20" />
+            <param name="linker" value="1" />
+            <param name="outformat" value="maskinfo_asn1_text" />
+            <output name="outfile" file="dustmasker_three_human.maskinfo-asn1" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST_ algorithm.
+
+If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
+
+More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+.. _DUST: http://www.ncbi.nlm.nih.gov/pubmed/16796549
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,504 @@\n+<macros>\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="binary">@BINARY@</requirement>\n+            <requirement type="package" version="2.2.30">blast+</requirement>\n+        </requirements>\n+        <version_command>@BINARY@ -version</version_command>\n+    </xml>\n+    <xml name="output_change_format">\n+        <change_format>\n+            <when input="output.out_format" value="0" format="txt"/>\n+            <when input="output.out_format" value="0 -html" format="html"/>\n+            <when input="output.out_format" value="2" format="txt"/>\n+            <when input="output.out_format" value="2 -html" format="html"/>\n+            <when input="output.out_format" value="4" format="txt"/>\n+            <when input="output.out_format" value="4 -html" format="html"/>\n+            <when input="output.out_format" value="5" format="blastxml"/>\n+        </change_format>\n+    </xml>\n+    <xml name="input_out_format">\n+        <conditional name="output">\n+            <param name="out_format" type="select" label="Output format">\n+                <option value="6">Tabular (standard 12 columns)</option>\n+                <option value="ext" selected="True">Tabular (extended 25 columns)</option>\n+                <option value="cols">Tabular (select which columns)</option>\n+                <option value="5">BLAST XML</option>\n+                <option value="0">Pairwise text</option>\n+                <option value="0 -html">Pairwise HTML</option>\n+                <option value="2">Query-anchored text</option>\n+                <option value="2 -html">Query-anchored HTML</option>\n+                <option value="4">Flat query-anchored text</option>\n+                <option value="4 -html">Flat query-anchored HTML</option>\n+                <!--\n+                <option value="-outfmt 11">BLAST archive format (ASN.1)</option>\n+                -->\n+            </param>\n+            <when value="6"/>\n+            <when value="ext"/>\n+            <when value="cols">\n+                <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns">\n+                    <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option>\n+                    <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option>\n+                    <option selected="true" value="pident">pident = Percentage of identical matches</option>\n+                    <option selected="true" value="length">length = Alignment length</option>\n+                    <option selected="true" value="mismatch">mismatch = Number of mismatches</option>\n+                    <option selected="true" value="gapopen">gapopen = Number of gap openings</option>\n+                    <option selected="true" value="qstart">qstart = Start of alignment in query</option>\n+                    <option selected="true" value="qend">qend = End of alignment in query</option>\n+                    <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option>\n+                    <option selected="true" value="send">send = End of alignment in subject (database hit)</option>\n+                    <option selected="true" value="evalue">evalue = Expectation value (E-value)</option>\n+                    <option selected="true" value="bitscore">bitscore = Bit score</option>\n+                </param>\n+                <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns">\n+                    <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a \';\'</option>\n+                    <option value="score">score = Raw score</option>\n+                    <option value="nident">nident = Number of identical matches</option>\n+                    <option value="positive">positive = Number of positive-scoring matches</option>\n+                    <option value="gaps">gaps = To'..b'\n+        </citations>\n+    </xml>\n+    <token name="@OUTPUT_FORMAT@">**Output format**\n+\n+Because Galaxy focuses on processing tabular data, the default output of this\n+tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n+\n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+     1 qseqid    Query Seq-id (ID of your sequence)\n+     2 sseqid    Subject Seq-id (ID of the database hit)\n+     3 pident    Percentage of identical matches\n+     4 length    Alignment length\n+     5 mismatch  Number of mismatches\n+     6 gapopen   Number of gap openings\n+     7 qstart    Start of alignment in query\n+     8 qend      End of alignment in query\n+     9 sstart    Start of alignment in subject (database hit)\n+    10 send      End of alignment in subject (database hit)\n+    11 evalue    Expectation value (E-value)\n+    12 bitscore  Bit score\n+====== ========= ============================================\n+\n+The BLAST+ tools can optionally output additional columns of information,\n+but this takes longer to calculate. Many commonly used extra columns are\n+included by selecting the extended tabular output. The extra columns are\n+included *after* the standard 12 columns. This is so that you can write\n+workflow filtering steps that accept either the 12 or 25 column tabular\n+BLAST output. Galaxy now uses this extended 25 column output by default.\n+\n+====== ============= ===========================================\n+Column NCBI name     Description\n+------ ------------- -------------------------------------------\n+    13 sallseqid     All subject Seq-id(s), separated by a \';\'\n+    14 score         Raw score\n+    15 nident        Number of identical matches\n+    16 positive      Number of positive-scoring matches\n+    17 gaps          Total number of gaps\n+    18 ppos          Percentage of positive-scoring matches\n+    19 qframe        Query frame\n+    20 sframe        Subject frame\n+    21 qseq          Aligned part of query sequence\n+    22 sseq          Aligned part of subject sequence\n+    23 qlen          Query sequence length\n+    24 slen          Subject sequence length\n+    25 salltitles    All subject title(s), separated by a \'&lt;&gt;\'\n+====== ============= ===========================================\n+\n+The third option is to customise the tabular output by selecting which\n+columns you want, from the standard set of 12, the default set of 25,\n+or any of the additional columns BLAST+ offers (including species name).\n+\n+The fourth option is BLAST XML output, which is designed to be parsed by\n+another program, and is understood by some Galaxy tools.\n+\n+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+    </token>\n+    <token name="@FASTA_WARNING@">.. class:: warningmark\n+\n+You can also search against a FASTA file of subject (target)\n+sequences. This is *not* advised because it is slower (only one\n+CPU is used), but more importantly gives e-values for pairwise\n+searches (very small e-values which will look overly signficiant).\n+In most cases you should instead turn the other FASTA file into a\n+database first using *makeblastdb* and search against that.\n+    </token>\n+    <token name="@SEARCH_TIME_WARNING@">.. class:: warningmark\n+\n+**Note**. Database searches may take a substantial amount of time.\n+For large input datasets it is advisable to allow overnight processing.  \n+\n+-----\n+    </token>\n+</macros>\n'
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Fri Jan 30 08:27:28 2015 -0500
b
b'@@ -0,0 +1,204 @@\n+<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.1.02">\n+    <description>Make BLAST database</description>\n+    <macros>\n+        <token name="@BINARY@">makeblastdb</token>\n+        <import>ncbi_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements" />\n+    <command interpreter="python">check_no_duplicates.py\n+##First check for duplicates (since BLAST+ 2.2.28 fails to do so)\n+##and abort (via the ampersand ampersand trick) if any are found.\n+#for i in $input_file#"${i}" #end for#\n+&amp;&amp;\n+makeblastdb -out "${os.path.join($outfile.files_path,\'blastdb\')}"\n+$parse_seqids\n+$hash_index\n+## Single call to -in with multiple filenames space separated with outer quotes\n+## (presumably any filenames with spaces would be a problem). Note this gives\n+## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy:\n+-in "#for i in $input_file#${i} #end for#"\n+#if $title:\n+-title "$title"\n+#else:\n+##Would default to being based on the cryptic Galaxy filenames, which is unhelpful\n+-title "BLAST Database"\n+#end if\n+-dbtype $dbtype\n+## --------------------------------------------------------------------\n+## Masking\n+## --------------------------------------------------------------------\n+## HACK: If no mask files, evaluates as a list with just None in it:\n+## See Trello issue https://trello.com/c/lp5YmA1O\n+#if \' \'.join( map(str, $mask_data_file) ) != \'None\':\n+#for i in $mask_data_file:\n+-mask_data "${i}"\n+#end for\n+#end if\n+## --------------------------------------------------------------------\n+## Taxonomy\n+## --------------------------------------------------------------------\n+#if $tax.taxselect == \'id\':\n+-taxid $tax.taxid\n+## TODO - Can we use a tabular file for the taxonomy mapping?\n+## #else if $tax.taxselect == \'map\':\n+## -taxid_map $tax.taxmap\n+#end if\n+## --------------------------------------------------------------------\n+## Capture the stdout log information to the primary file (plain text):\n+&gt; "$outfile"\n+    </command>\n+    <expand macro="stdio" />\n+    <inputs>\n+        <param name="dbtype" type="select" display="radio" label="Molecule type of input">\n+            <option value="prot">protein</option>\n+            <option value="nucl">nucleotide</option>\n+        </param>\n+        <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)?\n+             NOTE Double check the new database would be self contained first\n+        -->\n+        <!-- Note this is a mandatory parameter - default should be most recent FASTA file -->\n+        <param name="input_file" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" />\n+        <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />\n+        <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe \'|\' symbols" />\n+        <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />\n+        <!-- SEQUENCE MASKING OPTIONS -->\n+        <!-- Note this is an optional parameter - default should be NO files -->\n+        <param name="mask_data_file" type="data" multiple="true" optional="true" value="" format="maskinfo-asn1,maskinfo-asn1-binary" label="Optional ASN.1 file(s) containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />\n+        <!-- TODO - Option to create GI indexed masking data? via -gi_mask and -gi_mask_name? -->\n+        <!-- TAXONOMY OPTIONS -->\n+        <conditional name="tax">\n+            <param '..b'axid.fasta.phd" name="blastdb.phd" />\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.phi" name="blastdb.phi" />\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.psd" name="blastdb.psd" />\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.psi" name="blastdb.psi" />\n+            </output>\n+        </test>\n+        <test>\n+            <param name="dbtype" value="prot" />\n+            <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n+            <param name="title" value="Just 4 human proteins" />\n+            <param name="parse_seqids" value="" />\n+            <param name="hash_index" value="true" />\n+            <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" />\n+            <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">\n+                <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />\n+                <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />\n+                <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />\n+                <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />\n+                <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />\n+                <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" />\n+                <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" />\n+                <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" />\n+            </output>\n+        </test>\n+        <test>\n+            <param name="dbtype" value="nucl" />\n+            <param name="input_file" value="three_human_mRNA.fasta" ftype="fasta" />\n+            <param name="title" value="Just 3 human mRNA sequences" />\n+            <param name="parse_seqids" value="" />\n+            <param name="hash_index" value="true" />\n+            <param name="taxselect" value="id" />\n+            <param name="taxid" value="9606" />\n+            <output name="out_file" compare="contains" file="three_human_mRNA.fasta.log.txt" ftype="blastdbn">\n+                <extra_files type="file" value="three_human_mRNA.fasta.nhr" name="blastdb.nhr" />\n+                <extra_files type="file" value="three_human_mRNA.fasta.nin" name="blastdb.nin" lines_diff="2" />\n+                <extra_files type="file" value="three_human_mRNA.fasta.nsq" name="blastdb.nsq" />\n+                <extra_files type="file" value="three_human_mRNA.fasta.nog" name="blastdb.nog" />\n+                <extra_files type="file" value="three_human_mRNA.fasta.nhd" name="blastdb.nhd" />\n+                <extra_files type="file" value="three_human_mRNA.fasta.nhi" name="blastdb.nhi" />\n+                <extra_files type="file" value="three_human_mRNA.fasta.nsd" name="blastdb.nsd" />\n+                <extra_files type="file" value="three_human_mRNA.fasta.nsi" name="blastdb.nsi" />\n+            </output>\n+        </test>\n+    </tests>\n+    <help>\n+**What it does**\n+\n+Make BLAST database from one or more FASTA files and/or BLAST databases.\n+\n+This is a wrapper for the NCBI BLAST+ tool \'makeblastdb\', which is the\n+replacement for the \'formatdb\' tool in the NCBI \'legacy\' BLAST suite.\n+\n+<!--\n+Applying masks to an existing BLAST database will not change the original database; a new database will be created.\n+For this reason, it\'s best to apply all masks at once to minimize the number of unnecessary intermediate databases.\n+-->\n+\n+**Documentation**\n+\n+http://www.ncbi.nlm.nih.gov/books/NBK1763/\n+\n+**References**\n+\n+If you use this Galaxy tool in work leading to a scientific publication please\n+cite the following papers:\n+\n+@REFERENCES@\n+    </help>\n+    <expand macro="blast_citations" />\n+</tool>\n'
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_makeprofiledb.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,128 @@
+<tool id="ncbi_makeprofiledb" name="NCBI BLAST+ makeprofiledb" version="0.1.02">
+    <description>Make profile database</description>
+    <macros>
+        <token name="@BINARY@">makeprofiledb</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+##Unlike makeblastdb, makeprofiledb needs directory to exist already:
+mkdir -p $outfile.files_path &amp;&amp;
+makeprofiledb -out "${os.path.join($outfile.files_path,'blastdb')}"
+
+##We turn $infile_list into $infiles with a configfile entry defined below
+-in $infiles
+
+#if $title:
+-title "$title"
+#else:
+##Would default to being based on the cryptic Galaxy filenames, which is unhelpful
+-title "Profile Database"
+#end if
+
+-threshold $threshold
+
+#if str($contain_pssm_scores.contain_pssm_scores_type) == 'no':
+    -gapopen $contain_pssm_scores.gapopen
+    -gapextend $contain_pssm_scores.gapextend
+    -scale $contain_pssm_scores.scale
+    -matrix $contain_pssm_scores.matrix
+#end if
+
+-obsr_threshold $obsr_threshold
+-exclude_invalid $exclude_invalid
+
+-logfile "$outfile"
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="input_file" type="data" multiple="true" optional="false" format="pssm-asn1"
+        label="Input PSSM files(s)"
+        help="One or NCBI PSSM ASN.1 format scoremat files (often named *.smp)" />
+        <param name="infile_list" type="data" multiple="true" format="pssm-asn1" />
+
+        <param name="title" type="text" value="" label="Title for the profile database" help="This is the database name shown in BLAST search output" />
+        <param name="threshold" type="float" size="5" value="9.82" label="Minimum word score to add a word to the lookup table" />
+
+        <!-- output options -->
+        <!-- Initially we're only offering the default, RPS databases for use with rpsblast and rpstblastn
+        <param name="dbtype" type="select" display="radio" label="Type of database">
+            <option value="cobalt">Cobalt</option>
+            <option value="delta">Delta</option>
+            <option value="rps" selected="true">RPS</option>
+        </param>
+        -->
+
+        <conditional name="contain_pssm_scores">
+            <param name="contain_pssm_scores_type" type="select" label="Does your input file contain PSSM scores?">
+              <option value="yes" selected="True">Yes</option>
+              <option value="no">No</option>
+            </param>
+            <when value="yes" />
+            <when value="no">
+                <param name="gapopen" type="integer" size="5" value="" label="Cost to open a gap" />
+                <param name="gapextend" type="integer" size="5" value="" label="Cost to extend a gap" />
+                <param name="scale" type="float" size="5" value="" label="PSSM scale factor" />
+                <expand macro="input_scoring_matrix" />
+            </when>
+        </conditional>
+
+        <!--  Delta Blast Options -->
+        <param name="exclude_invalid" type="boolean" truevalue="true" falsevalue="false" checked="true" 
+            label="Exclude invalid domains?"
+            help="Exclude domains that do not pass validation test" />
+        <param name="obsr_threshold" type="float" size="5" value="6.0"
+            label="Observation threshold"            
+            help="Exclude domains with with maximum number of independent observations below this threshold" />
+    </inputs>
+    <configfiles>
+        <configfile name="infiles">
+#for $infile in $input_file
+${infile}
+#end for
+        </configfile>
+    </configfiles>
+    <outputs>
+        <data name="outfile" format="blastdbd" label="RPS database from ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" value="cd00003.smp,cd00008.smp" ftype="pssm-asn1" />
+            <param name="title" value="Just 2 PSSM matrices" />
+            <param name="contain_pssm_scores_type" value="yes" />
+            <output name="out_file" file="empty_file.dat" ftype="blastdbd" >
+                <extra_files type="file" value="cd00003_and_cd00008.phr" name="blastdb.phr" />
+                <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" lines_diff="2" />
+                <extra_files type="file" value="cd00003_and_cd00008.psq" name="blastdb.psq" />
+                <extra_files type="file" value="cd00003_and_cd00008.freq" name="blastdb.freq" />
+                <extra_files type="file" value="cd00003_and_cd00008.loo" name="blastdb.loo" />
+                <extra_files type="file" value="cd00003_and_cd00008.psd" name="blastdb.psd" />
+                <extra_files type="file" value="cd00003_and_cd00008.psi" name="blastdb.psi" />
+                <extra_files type="file" value="cd00003_and_cd00008.rps" name="blastdb.rps" />
+                <extra_files type="file" value="cd00003_and_cd00008.aux" name="blastdb.aux" />
+            </output>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Make a protein domain profile database (for use with RPS-BLAST or RSP-TBLASTN)
+from one or more Position Specific Scoring Matrices (PSSM) files in the NCBI
+"scoremat" ASN.1 format (usually named ``*.smp``).
+
+This is a wrapper for the NCBI BLAST+ tool 'makeprofiledb'.
+
+More information about makeprofiledb can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,120 @@
+<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.1.02">
+    <description>Search protein domain database (PSSMs) with protein query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into parts -->
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
+    <macros>
+        <token name="@BINARY@">deltablast</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+rpsblast
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+  -db "${os.path.join($db_opts.histdb.files_path,'blastdb')}"
+#end if
+-evalue $evalue_cutoff
+@BLAST_OUTPUT@
+@THREADS@
+#if $adv_opts.adv_opts_selector=="advanced":
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_QCOV_HSP_PERC@
+## End of advanced options:
+#end if
+    </command>
+
+    <expand macro="stdio" />
+
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
+
+        <expand macro="input_conditional_pssm" />
+
+        <expand macro="input_evalue" />
+
+        <expand macro="input_out_format" />
+
+        <expand macro="advanced_options">
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_false" />
+            <expand macro="input_max_hits" />
+            <expand macro="input_parse_deflines" />
+            <expand macro="input_qcov_hsp_perc" />
+        </expand>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="rpsblast on ${on_string}">
+
+            <expand macro="output_change_format" />
+
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="cd00003_and_cd00008" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="out_format" value="6" />
+            <output name="output1" file="empty_file.dat" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+    
+@SEARCH_TIME_WARNING@
+
+**What it does**
+
+Search a *protein domain database* using a *protein query*,
+using the NCBI BLAST+ rpsblast command line tool.
+
+The protein domain databases use position-specific scoring matrices
+(PSSMs) and are available for a number of domain collections including:
+
+*CDD* - NCBI curarated meta-collection of domains, see
+http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains
+
+*Kog* - PSSMs from automatically aligned sequences and sequence
+fragments classified in the KOGs resource, the eukaryotic 
+counterpart to COGs, see http://www.ncbi.nlm.nih.gov/COG/new/
+
+*Cog* - PSSMs from automatically aligned sequences and sequence
+fragments classified in the COGs resource, which focuses primarily
+on prokaryotes, see http://www.ncbi.nlm.nih.gov/COG/new/
+
+*Pfam* - PSSMs from Pfam-A seed alignment database, see
+http://pfam.sanger.ac.uk/
+
+*Smart* - PSSMs from SMART domain alignment database, see
+http://smart.embl-heidelberg.de/
+
+*Tigr* - PSSMs from TIGRFAM database of protein families, see
+http://www.jcvi.org/cms/research/projects/tigrfams/overview/
+
+*Prk* - PSSms from automatically aligned stable clusters in the
+Protein Clusters database, see
+http://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&amp;db=proteinclusters
+
+The exact list of domain databases offered will depend on how your
+local Galaxy has been configured.
+
+-----
+
+@OUTPUT_FORMAT@
+
+-------
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,118 @@
+<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.1.02">
+    <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into parts -->
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
+    <macros>
+        <token name="@BINARY@">rpstblastn</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+rpstblastn
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+  -db "${os.path.join($db_opts.histdb.files_path,'blastdb')}"
+#end if
+-evalue $evalue_cutoff
+@BLAST_OUTPUT@
+## rpstblastn does not support multiple threads up to release 2.2.27+. Added in BLAST 2.2.28+.
+##-num_threads 8
+#if $adv_opts.adv_opts_selector=="advanced":
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_QCOV_HSP_PERC@
+## End of advanced options:
+#end if
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+
+        <expand macro="input_conditional_pssm" />
+
+        <expand macro="input_evalue" />
+
+        <expand macro="input_out_format" />
+
+        <expand macro="advanced_options">
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_false" />
+            <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+            <expand macro="input_max_hits" />
+            <expand macro="input_parse_deflines" />
+            <expand macro="input_qcov_hsp_perc" />
+        </expand>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="rpstblastn on ${on_string}">
+            <expand macro="output_change_format" />
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="db" />
+            <param name="database" value="cd00003_and_cd00008" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="out_format" value="6" />
+            <output name="output1" file="empty_file.dat" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+    
+@SEARCH_TIME_WARNING@
+
+**What it does**
+
+Search a *protein domain database* using a *nucleotide query*,
+using the NCBI BLAST+ rpstblastn command line tool.
+
+The protein domain databases use position-specific scoring matrices
+(PSSMs) and are available for a number of domain collections including:
+
+*CDD* - NCBI curarated meta-collection of domains, see
+http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains
+
+*Kog* - PSSMs from automatically aligned sequences and sequence
+fragments classified in the KOGs resource, the eukaryotic 
+counterpart to COGs, see http://www.ncbi.nlm.nih.gov/COG/new/
+
+*Cog* - PSSMs from automatically aligned sequences and sequence
+fragments classified in the COGs resource, which focuses primarily
+on prokaryotes, see http://www.ncbi.nlm.nih.gov/COG/new/
+
+*Pfam* - PSSMs from Pfam-A seed alignment database, see
+http://pfam.sanger.ac.uk/
+
+*Smart* - PSSMs from SMART domain alignment database, see
+http://smart.embl-heidelberg.de/
+
+*Tigr* - PSSMs from TIGRFAM database of protein families, see
+http://www.jcvi.org/cms/research/projects/tigrfams/overview/
+
+*Prk* - PSSms from automatically aligned stable clusters in the
+Protein Clusters database, see
+http://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&amp;db=proteinclusters
+
+The exact list of domain databases offered will depend on how your
+local Galaxy has been configured.
+
+-----
+
+@OUTPUT_FORMAT@
+
+-------
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,102 @@
+<tool id="ncbi_segmasker_wrapper" name="NCBI BLAST+ segmasker" version="0.1.02">
+    <description>low-complexity regions in protein sequences</description>
+    <macros>
+        <token name="@BINARY@">segmasker</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+segmasker
+#if $db_opts.db_opts_selector == "db":
+  -in "${db_opts.database.fields.path}" -infmt blastdb
+#elif $db_opts.db_opts_selector == "histdb":
+  -in "${os.path.join($db_opts.histdb.files_path, 'blastdb')}" -infmt blastdb
+#else:
+  -in "$subject" -infmt fasta
+#end if
+-out "$outfile"
+-window $window
+-locut $locut
+-hicut $hicut
+-outfmt $outformat
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <expand macro="input_conditional_protein_db" />
+        <param name="window" type="integer" value="12" label="SEG window length" help="(-window)" />
+        <param name="locut" type="float" value="2.2" label="SEG low cutoff" help="(-locut)" />
+        <param name="hicut" type="float" value="2.5" label="SEG high cutoff" help="(-hicut)" />
+        <param name="outformat" type="select" label="Output format">
+            <!-- seqloc_* formats are not very useful
+                 and what BLAST+ calls 'interval' is not what Galaxy calls interval format
+            -->
+            <option value="fasta">FASTA</option>
+            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
+            <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option>
+            <option value="maskinfo_xml">maskinfo_xml</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="maskinfo-asn1" label="SEG Masked File">
+            <change_format>
+                <when input="outformat" value="fasta" format="fasta" />
+                <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+ <!--
+                <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+ -->
+                <when input="outformat" value="maskinfo_xml" format="xml" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="window" value="12" />
+            <param name="locut" value="2.2" />
+            <param name="hicut" value="2.5" />
+            <param name="outformat" value="fasta" />
+            <output name="outfile" file="segmasker_four_human.fasta" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="window" value="12" />
+            <param name="locut" value="2.2" />
+            <param name="hicut" value="2.5" />
+            <param name="outformat" value="maskinfo_asn1_bin" />
+            <output name="outfile" file="segmasker_four_human.maskinfo-asn1-binary" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="window" value="12" />
+            <param name="locut" value="2.2" />
+            <param name="hicut" value="2.5" />
+            <param name="outformat" value="maskinfo_asn1_text" />
+            <output name="outfile" file="segmasker_four_human.maskinfo-asn1" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+This tool identifies and masks out low complexity regions of a protein database (or proteins in FASTA format) by using the SEG_ algorithm.
+
+If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
+
+More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,173 @@
+<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.1.02">
+    <description>Search translated nucleotide database with protein query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into parts -->
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
+    <macros>
+        <token name="@BINARY@">tblastn</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+tblastn
+-query "$query"
+@BLAST_DB_SUBJECT@
+-task $blast_type
+-evalue $evalue_cutoff
+@BLAST_OUTPUT@
+@THREADS@
+#if $adv_opts.adv_opts_selector=="advanced":
+-db_gencode $adv_opts.db_gencode
+-matrix $adv_opts.matrix
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_WORD_SIZE@
+##Ungapped disabled for now - see comments below
+##$adv_opts.ungapped
+@ADV_ID_LIST_FILTER@
+@ADV_QCOV_HSP_PERC@
+## End of advanced options:
+#end if
+    </command>
+
+    <expand macro="stdio" />
+
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
+
+        <expand macro="input_conditional_nucleotide_db" />
+        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
+            <option value="tblastn">tblastn - Traditional TBLASTN to compare protein query to translated nucleotide database</option>
+            <option value="tblastn-fast">tblastn-fast - Use longer words for seeding, faster but less accurate</option>
+        </param>
+        <expand macro="input_evalue" />
+        <expand macro="input_out_format" />
+        <expand macro="advanced_options">
+            <expand macro="input_db_gencode" />
+
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_true" />
+            <expand macro="input_scoring_matrix" />
+            <expand macro="input_max_hits" />
+            <expand macro="input_word_size" />
+            <!--
+            Can't use '-ungapped' on its own, error back is:
+            Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
+            Tried using '-ungapped -comp_based_stats F' and tblastn crashed with 'Attempt to access NULL pointer.'
+            <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
+            -->
+            <expand macro="input_parse_deflines" />
+            <expand macro="advanced_optional_id_files" />
+            <expand macro="input_qcov_hsp_perc" />
+        </expand>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="tblastn $query.name vs @ON_DB_SUBJECT@">
+            <expand macro="output_change_format" />
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="5" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="false" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin.xml" ftype="blastxml" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="ext" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="false" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="false" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <!-- Same as above, but parse deflines - on BLAST 2.2.25+ - 2.2.27+ makes no difference -->
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="true" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="0 -html" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="false" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin.html" ftype="html" />
+        </test>
+    </tests>
+    <help>
+    
+@SEARCH_TIME_WARNING@
+
+**What it does**
+
+Search a *translated nucleotide database* using a *protein query*,
+using the NCBI BLAST+ tblastn command line tool.
+
+@FASTA_WARNING@
+
+-----
+
+@OUTPUT_FORMAT@
+
+-------
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,100 @@
+<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.1.02">
+    <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>
+    <!-- If job splitting is enabled, break up the query file into parts -->
+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
+    <macros>
+        <token name="@BINARY@">tblastx</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+tblastx
+-query "$query"
+@BLAST_DB_SUBJECT@
+-query_gencode $query_gencode
+-evalue $evalue_cutoff
+@BLAST_OUTPUT@
+@THREADS@
+#if $adv_opts.adv_opts_selector=="advanced":
+-db_gencode $adv_opts.db_gencode
+$adv_opts.strand
+-matrix $adv_opts.matrix
+@ADV_FILTER_QUERY@
+@ADV_MAX_HITS@
+@ADV_WORD_SIZE@
+@ADV_ID_LIST_FILTER@
+@ADV_QCOV_HSP_PERC@
+## End of advanced options:
+#end if
+    </command>
+
+    <expand macro="stdio" />
+
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+
+        <expand macro="input_conditional_nucleotide_db" />
+        <expand macro="input_query_gencode" />
+        <expand macro="input_evalue" />
+
+        <expand macro="input_out_format" />
+        <expand macro="advanced_options">
+            <expand macro="input_db_gencode" />
+
+            <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+            <expand macro="input_filter_query_default_true" />
+            <expand macro="input_strand" />
+            <expand macro="input_scoring_matrix" />
+            <expand macro="input_max_hits" />
+            <!-- I'd like word_size to be optional, with minimum 2 for tblastx -->
+            <expand macro="input_word_size" />
+            <expand macro="input_parse_deflines" />
+            <expand macro="advanced_optional_id_files" />
+            <expand macro="input_qcov_hsp_perc" />
+        </expand>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="tblastx $query.name vs @ON_DB_SUBJECT@">
+            <expand macro="output_change_format" />
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-40" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="tblastx_rhodopsin_vs_three_human.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+    
+@SEARCH_TIME_WARNING@
+
+**What it does**
+
+Search a *translated nucleotide database* using a *translated nucleotide query*,
+using the NCBI BLAST+ tblastx command line tool.
+
+@FASTA_WARNING@
+
+-----
+
+@OUTPUT_FORMAT@
+
+-------
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+@REFERENCES@
+    </help>
+    <expand macro="blast_citations" />
+</tool>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/repository_dependencies.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format).">
+    <repository changeset_revision="236046d1d441" name="blast_datatypes" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+</repositories>
b
diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/tool_dependencies.xml Fri Jan 30 08:27:28 2015 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="blast+" version="2.2.30">
+        <repository changeset_revision="f69b90d89b62" name="package_blast_plus_2_2_30" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>