| Next changeset 1:5e9d5e536b79 (2015-03-03) |
|
Commit message:
Uploaded v0.1.02 preview 1, using tool_data_table_conf.xml for loc files, etc |
|
added:
test-data/blastdb.loc test-data/blastdb_d.loc test-data/blastdb_p.loc test-data/blastn_arabidopsis.extended.tabular test-data/blastn_arabidopsis.standard.tabular test-data/blastn_arabidopsis.xml test-data/blastn_chimera_vs_three_human_max1.tabular test-data/blastn_chimera_vs_three_human_max1.txt test-data/blastn_rhodopsin_vs_three_human.columns.tabular test-data/blastn_rhodopsin_vs_three_human.tabular test-data/blastn_rhodopsin_vs_three_human.xml test-data/blastn_rhodopsin_vs_three_human_converted.tabular test-data/blastp_four_human_vs_rhodopsin.tabular test-data/blastp_four_human_vs_rhodopsin.xml test-data/blastp_four_human_vs_rhodopsin_converted.tabular test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular test-data/blastp_four_human_vs_rhodopsin_ext.tabular test-data/blastp_human_vs_pdb_seg_no.xml test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular test-data/blastp_human_vs_pdb_seg_no_converted_std.tabular test-data/blastp_rhodopsin_vs_four_human.tabular test-data/blastp_sample.xml test-data/blastp_sample_converted.tabular test-data/blastx_rhodopsin_vs_four_human.tabular test-data/blastx_rhodopsin_vs_four_human.xml test-data/blastx_rhodopsin_vs_four_human_all.tabular test-data/blastx_rhodopsin_vs_four_human_converted.tabular test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular test-data/blastx_rhodopsin_vs_four_human_ext.tabular test-data/blastx_sample.xml test-data/blastx_sample_converted.tabular test-data/cd00003.smp test-data/cd00003_and_cd00008.aux test-data/cd00003_and_cd00008.freq test-data/cd00003_and_cd00008.loo test-data/cd00003_and_cd00008.phr test-data/cd00003_and_cd00008.pin test-data/cd00003_and_cd00008.psd test-data/cd00003_and_cd00008.psi test-data/cd00003_and_cd00008.psq test-data/cd00003_and_cd00008.rps test-data/cd00008.smp test-data/chimera.fasta test-data/convert2blastmask_four_human_masked.maskinfo-asn1 test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary test-data/dustmasker_three_human.fasta test-data/dustmasker_three_human.maskinfo-asn1 test-data/dustmasker_three_human.maskinfo-asn1-binary test-data/empty_file.dat test-data/four_human_proteins.dbinfo.txt test-data/four_human_proteins.fasta test-data/four_human_proteins.fasta.log.txt test-data/four_human_proteins.fasta.phd test-data/four_human_proteins.fasta.phi test-data/four_human_proteins.fasta.phr test-data/four_human_proteins.fasta.pin test-data/four_human_proteins.fasta.pog test-data/four_human_proteins.fasta.psd test-data/four_human_proteins.fasta.psi test-data/four_human_proteins.fasta.psq test-data/four_human_proteins_masked.fasta test-data/four_human_proteins_taxid.fasta.log.txt test-data/four_human_proteins_taxid.fasta.phd test-data/four_human_proteins_taxid.fasta.phi test-data/four_human_proteins_taxid.fasta.phr test-data/four_human_proteins_taxid.fasta.pin test-data/four_human_proteins_taxid.fasta.pog test-data/four_human_proteins_taxid.fasta.psd test-data/four_human_proteins_taxid.fasta.psi test-data/four_human_proteins_taxid.fasta.psq test-data/rhodopsin_nucs.fasta test-data/rhodopsin_proteins.fasta test-data/segmasker_four_human.fasta test-data/segmasker_four_human.maskinfo-asn1 test-data/segmasker_four_human.maskinfo-asn1-binary test-data/tblastn_four_human_vs_rhodopsin.html test-data/tblastn_four_human_vs_rhodopsin.tabular test-data/tblastn_four_human_vs_rhodopsin.xml test-data/tblastn_four_human_vs_rhodopsin_ext.tabular test-data/tblastx_rhodopsin_vs_three_human.tabular test-data/three_human_mRNA.dbinfo.txt test-data/three_human_mRNA.fasta test-data/three_human_mRNA.fasta.log.txt test-data/three_human_mRNA.fasta.nhd test-data/three_human_mRNA.fasta.nhi test-data/three_human_mRNA.fasta.nhr test-data/three_human_mRNA.fasta.nin test-data/three_human_mRNA.fasta.nog test-data/three_human_mRNA.fasta.nsd test-data/three_human_mRNA.fasta.nsi test-data/three_human_mRNA.fasta.nsq test-data/tool_data_table_conf.xml.test tool-data/blastdb.loc.sample tool-data/blastdb_d.loc.sample tool-data/blastdb_p.loc.sample tool-data/tool_data_table_conf.xml.sample tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/blastxml_to_tabular.py tools/ncbi_blast_plus/blastxml_to_tabular.xml tools/ncbi_blast_plus/check_no_duplicates.py tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml tools/ncbi_blast_plus/ncbi_macros.xml tools/ncbi_blast_plus/ncbi_makeblastdb.xml tools/ncbi_blast_plus/ncbi_makeprofiledb.xml tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml tools/ncbi_blast_plus/repository_dependencies.xml tools/ncbi_blast_plus/tool_dependencies.xml |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastdb.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastdb.loc Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,7 @@ +# This is a test file distributed with the Galaxy BLAST+ wrapper for +# defining a list of nucleotide BLAST databases used in functional +# tests for blastn etc. +# +# See the file tool-data/blastdb.loc.sample for more information. +# +three_human_mRNA Three Human mRNAs ${__HERE__}/three_human_mRNA.fasta |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastdb_d.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastdb_d.loc Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,7 @@ +# This is a test file distributed with the Galaxy BLAST+ wrapper for +# defining a list of protein domain BLAST databases used in functional +# tests of rpsblast etc. +# +# See the file tool-data/blastdb_d.loc.sample for more information. +# +cd00003_and_cd00008 Domains CD00003 (PNPsynthase) and CD00008 (PIN_53EXO-like) ${__HERE__}/cd00003_and_cd00008 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastdb_p.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastdb_p.loc Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,8 @@ +# This is a test file distributed with the Galaxy BLAST+ wrapper for +# defining a list of protein BLAST databases used in functional tests +# for blastp etc. +# +# See the file tool-data/blastdb_p.loc.sample for more information. +# +four_human_proteins Four Human Proteins (no taxid) ${__HERE__}/four_human_proteins.fasta +four_human_proteins_taxid Four Human Proteins (with taxid) ${__HERE__}/four_human_proteins_taxid.fasta |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_arabidopsis.extended.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_arabidopsis.extended.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,1 @@ +chunk_of_plant chrIII 100.00 630 0 0 1 630 4341 4970 0.0 1164 chrIII 630 630 630 0 100.00 1 1 GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT 630 23459830 gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_arabidopsis.standard.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_arabidopsis.standard.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,1 @@ +chunk_of_plant chrIII 100.00 630 0 0 1 630 4341 4970 0.0 1164 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_arabidopsis.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_arabidopsis.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,71 @@ +<?xml version="1.0"?> +<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> +<BlastOutput> + <BlastOutput_program>blastn</BlastOutput_program> + <BlastOutput_version>BLASTN 2.2.28+</BlastOutput_version> + <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), "A greedy algorithm for aligning DNA sequences", J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference> + <BlastOutput_db>/mnt/galaxy/galaxy-central/database/files/000/dataset_857_files/blastdb</BlastOutput_db> + <BlastOutput_query-ID>Query_1</BlastOutput_query-ID> + <BlastOutput_query-def>chunk_of_plant</BlastOutput_query-def> + <BlastOutput_query-len>630</BlastOutput_query-len> + <BlastOutput_param> + <Parameters> + <Parameters_expect>0.001</Parameters_expect> + <Parameters_sc-match>1</Parameters_sc-match> + <Parameters_sc-mismatch>-2</Parameters_sc-mismatch> + <Parameters_gap-open>0</Parameters_gap-open> + <Parameters_gap-extend>0</Parameters_gap-extend> + <Parameters_filter>L;m;</Parameters_filter> + </Parameters> + </BlastOutput_param> +<BlastOutput_iterations> +<Iteration> + <Iteration_iter-num>1</Iteration_iter-num> + <Iteration_query-ID>Query_1</Iteration_query-ID> + <Iteration_query-def>chunk_of_plant</Iteration_query-def> + <Iteration_query-len>630</Iteration_query-len> +<Iteration_hits> +<Hit> + <Hit_num>1</Hit_num> + <Hit_id>gnl|BL_ORD_ID|2</Hit_id> + <Hit_def>chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence</Hit_def> + <Hit_accession>2</Hit_accession> + <Hit_len>23459830</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>1164.51</Hsp_bit-score> + <Hsp_score>630</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>630</Hsp_query-to> + <Hsp_hit-from>4341</Hsp_hit-from> + <Hsp_hit-to>4970</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>630</Hsp_identity> + <Hsp_positive>630</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>630</Hsp_align-len> + <Hsp_qseq>GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT</Hsp_qseq> + <Hsp_hseq>GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT</Hsp_hseq> + <Hsp_midline>||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||</Hsp_midline> + </Hsp> + </Hit_hsps> +</Hit> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>5</Statistics_db-num> + <Statistics_db-len>119146348</Statistics_db-len> + <Statistics_hsp-len>26</Statistics_hsp-len> + <Statistics_eff-space>71964315672</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> +</Iteration> +</BlastOutput_iterations> +</BlastOutput> + |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_chimera_vs_three_human_max1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_chimera_vs_three_human_max1.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,1 @@ +chimera ENA|AB011145|AB011145.1 100.00 4560 0 0 1 4560 121 4680 0.0 8421 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_chimera_vs_three_human_max1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_chimera_vs_three_human_max1.txt Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,356 @@\n+BLASTN 2.2.30+\n+\n+\n+Reference: Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb\n+Miller (2000), "A greedy algorithm for aligning DNA sequences", J\n+Comput Biol 2000; 7(1-2):203-14.\n+\n+\n+\n+Database: Just 3 human mRNA sequences\n+ 3 sequences; 10,732 total letters\n+\n+\n+\n+Query= chimera chunks of AB011145 plus M10051 plus BC112106\n+\n+Length=9973\n+ Score E\n+Sequences producing significant alignments: (Bits) Value\n+\n+ ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein,... 8421 0.0 \n+\n+\n+> ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, \n+partial cds.\n+Length=4796\n+\n+ Score = 8421 bits (4560), Expect = 0.0\n+ Identities = 4560/4560 (100%), Gaps = 0/4560 (0%)\n+ Strand=Plus/Plus\n+\n+Query 1 GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC 60\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 121 GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC 180\n+\n+Query 61 CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA 120\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 181 CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA 240\n+\n+Query 121 TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC 180\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 241 TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC 300\n+\n+Query 181 TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT 240\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 301 TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT 360\n+\n+Query 241 TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA 300\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 361 TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA 420\n+\n+Query 301 CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA 360\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 421 CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA 480\n+\n+Query 361 TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA 420\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 481 TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA 540\n+\n+Query 421 CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC 480\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 541 CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC 600\n+\n+Query 481 TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA 540\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 601 TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA 660\n+\n+Query 541 TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT 600\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 661 TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT 720\n+\n+Query 601 TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG 660\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 721 TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG 780\n+\n+Query 661 GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA 720\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 781 GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA 840\n+\n+Query 721 TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA 780\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 841 TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA 900\n+\n+Query 781 ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAA'..b'3780\n+\n+Query 3661 GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT 3720\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 3781 GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT 3840\n+\n+Query 3721 TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT 3780\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 3841 TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT 3900\n+\n+Query 3781 GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT 3840\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 3901 GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT 3960\n+\n+Query 3841 TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG 3900\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 3961 TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG 4020\n+\n+Query 3901 GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA 3960\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4021 GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA 4080\n+\n+Query 3961 CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA 4020\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4081 CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA 4140\n+\n+Query 4021 CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA 4080\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4141 CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA 4200\n+\n+Query 4081 GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA 4140\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4201 GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA 4260\n+\n+Query 4141 GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT 4200\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4261 GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT 4320\n+\n+Query 4201 GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG 4260\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4321 GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG 4380\n+\n+Query 4261 GAAACTTTTAGATGACATTCTACAAATTAtttttttCTTTAAATTAAAAGAACCTAGCCA 4320\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4381 GAAACTTTTAGATGACATTCTACAAATTATTTTTTTCTTTAAATTAAAAGAACCTAGCCA 4440\n+\n+Query 4321 ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA 4380\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4441 ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA 4500\n+\n+Query 4381 AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA 4440\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4501 AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA 4560\n+\n+Query 4441 ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA 4500\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4561 ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA 4620\n+\n+Query 4501 GGTTGTCAAGAAGGCttttttttttttcttttttAAACCTGAGGGCAAAAAGGAATGGAT 4560\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+Sbjct 4621 GGTTGTCAAGAAGGCTTTTTTTTTTTTCTTTTTTAAACCTGAGGGCAAAAAGGAATGGAT 4680\n+\n+\n+\n+Lambda K H\n+ 1.33 0.621 1.12 \n+\n+Gapped\n+Lambda K H\n+ 1.28 0.460 0.850 \n+\n+Effective search space used: 106299490\n+\n+\n+ Database: Just 3 human mRNA sequences\n+ Posted date: Dec 26, 2014 5:54 AM\n+ Number of letters in database: 10,732\n+ Number of sequences in database: 3\n+\n+\n+\n+Matrix: blastn matrix 1 -2\n+Gap Penalties: Existence: 0, Extension: 2.5\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_rhodopsin_vs_three_human.columns.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human.columns.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,7 @@ +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 4301 1213 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 983 1213 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1047 1213 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_rhodopsin_vs_three_human.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,7 @@ +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132 460 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93 331 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73 265 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68 248 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_rhodopsin_vs_three_human.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,549 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>blastn</BlastOutput_program>\n+ <BlastOutput_version>BLASTN 2.2.30+</BlastOutput_version>\n+ <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), "A greedy algorithm for aligning DNA sequences", J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference>\n+ <BlastOutput_db></BlastOutput_db>\n+ <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+ <BlastOutput_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</BlastOutput_query-def>\n+ <BlastOutput_query-len>1047</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_expect>1e-40</Parameters_expect>\n+ <Parameters_sc-match>1</Parameters_sc-match>\n+ <Parameters_sc-mismatch>-2</Parameters_sc-mismatch>\n+ <Parameters_gap-open>0</Parameters_gap-open>\n+ <Parameters_gap-extend>0</Parameters_gap-extend>\n+ <Parameters_filter>L;m;</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+ <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>4933992</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>2</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+ <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>4933992</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>3</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+ <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>Subject_3</Hit_id>\n+ <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def>\n+ <Hit_accession>Subject_3</Hit_accession>\n+ <Hit_len>1213</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>1474.75</Hsp_bit-score>\n+ <Hsp_score>798</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>1047</Hsp_query-to>\n+ <Hsp_hit-from>88</Hsp_hit-from>\n+ <Hsp_hit-to>1134</Hsp_hit-to>\n+ <Hsp_query-frame>1</Hsp_query-frame>\n+ <Hsp_hit-frame>1</Hsp_hit-frame>\n+ <Hsp_identity>964</Hsp_identity>\n+ <Hsp_positive>964</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>1047</Hsp_align-len>\n+ <Hsp_qseq>ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCC'..b'|||||||| |||||||||||||||||| | || || ||||||||||||||||||||||| |||||||| || ||||||||||| | || |||||||||| |||||| || ||||||||||| || |||||||| ||||| || || || ||||| | |||||||||||||||||| | |||||||||||||| ||||||||||||||||| || ||||||||||||||||| |||||||| ||||||||||||||||||||||||||||||||||| |||||||||||||| || ||||||||||||||||| |||||||| || || ||||| |||| ||||||||| || |||||||| ||||| ||||||||||||| || ||||| |||||||||| | | |||| |||||| ||||| || ||||||||||||||||| || ||||||| ||||||| ||||| |||| || |||||||| |||||||| |||||||||||||||||||| || ||||||||||||||||| |||||||| |||||||| |||||||||||||| || ||||||||||||||||||||||||||||| || |||||| |||||||||| | ||||| ||||||||||||||||| ||||| ||| |||| || |||||||||||||||||||| || ||||||||||||| || | ||| |||| ||||| |||||||| ||||||||||||||||||||||||||||||||| ||||||| ||||||| ||||||||||| || |||||||| |||||||| | |||||||||||||| ||||| ||||| |||||||| ||||||</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>4933992</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>16</Iteration_iter-num>\n+ <Iteration_query-ID>Query_6</Iteration_query-ID>\n+ <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+ <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>6353949</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>17</Iteration_iter-num>\n+ <Iteration_query-ID>Query_6</Iteration_query-ID>\n+ <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+ <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>6353949</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>18</Iteration_iter-num>\n+ <Iteration_query-ID>Query_6</Iteration_query-ID>\n+ <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+ <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>15</Statistics_hsp-len>\n+ <Statistics_eff-space>6353949</Statistics_eff-space>\n+ <Statistics_kappa>0.46</Statistics_kappa>\n+ <Statistics_lambda>1.28</Statistics_lambda>\n+ <Statistics_entropy>0.85</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastn_rhodopsin_vs_three_human_converted.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human_converted.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,7 @@ +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132 460 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93 331 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73 265 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68 248 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_four_human_vs_rhodopsin.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_four_human_vs_rhodopsin.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,6 @@ +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 +sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 +sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 +sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 +sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 +sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_four_human_vs_rhodopsin.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_four_human_vs_rhodopsin.xml Fri Jan 30 08:27:28 2015 -0500 |
| [ |
| b'@@ -0,0 +1,665 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>blastp</BlastOutput_program>\n+ <BlastOutput_version>BLASTP 2.2.30+</BlastOutput_version>\n+ <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+ <BlastOutput_db></BlastOutput_db>\n+ <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID>\n+ <BlastOutput_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def>\n+ <BlastOutput_query-len>406</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+ <Parameters_expect>1e-08</Parameters_expect>\n+ <Parameters_gap-open>11</Parameters_gap-open>\n+ <Parameters_gap-extend>1</Parameters_gap-extend>\n+ <Parameters_filter>F</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+ <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>30</Statistics_hsp-len>\n+ <Statistics_eff-space>119568</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>2</Iteration_iter-num>\n+ <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+ <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>30</Statistics_hsp-len>\n+ <Statistics_eff-space>119568</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>3</Iteration_iter-num>\n+ <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+ <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>30</Statistics_hsp-len>\n+ <Statistics_eff-space>119568</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>4</Iteration_iter-num>\n+ <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n+ <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sa'..b'q>\n+ <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>29</Statistics_hsp-len>\n+ <Statistics_eff-space>101761</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>24</Iteration_iter-num>\n+ <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>\n+ <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>348</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>gi|12583665|dbj|BAB21486.1|</Hit_id>\n+ <Hit_def>fresh water form rod opsin [Conger myriaster]</Hit_def>\n+ <Hit_accession>BAB21486</Hit_accession>\n+ <Hit_len>354</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>599.356</Hsp_bit-score>\n+ <Hsp_score>1544</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>341</Hsp_query-to>\n+ <Hsp_hit-from>1</Hsp_hit-from>\n+ <Hsp_hit-to>342</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>281</Hsp_identity>\n+ <Hsp_positive>314</Hsp_positive>\n+ <Hsp_gaps>1</Hsp_gaps>\n+ <Hsp_align-len>342</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP +D ASAT SKTE</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>29</Statistics_hsp-len>\n+ <Statistics_eff-space>101761</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_four_human_vs_rhodopsin_converted.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_four_human_vs_rhodopsin_converted.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,6 @@ +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 +sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 +sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 +sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 +sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 +sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Fri Jan 30 08:27:28 2015 -0500 |
| [ |
| @@ -0,0 +1,6 @@ +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 rhodopsin [Felis catus] +sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 RecName: Full=Rhodopsin +sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Cynopterus brachyotis] +sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Myotis pilosus] +sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 rhodopsin [Bos taurus] +sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 fresh water form rod opsin [Conger myriaster] |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_four_human_vs_rhodopsin_ext.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,6 @@ +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 N/A +sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 N/A +sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A +sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A +sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 N/A +sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 N/A |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_human_vs_pdb_seg_no.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_human_vs_pdb_seg_no.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,322 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>blastp</BlastOutput_program>\n+ <BlastOutput_version>BLASTP 2.2.24+</BlastOutput_version>\n+ <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+ <BlastOutput_db>/data/blastdb/pdbaa</BlastOutput_db>\n+ <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+ <BlastOutput_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def>\n+ <BlastOutput_query-len>406</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+ <Parameters_expect>1e-08</Parameters_expect>\n+ <Parameters_gap-open>11</Parameters_gap-open>\n+ <Parameters_gap-extend>1</Parameters_gap-extend>\n+ <Parameters_filter>F</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+ <BlastOutput_iterations>\n+ <Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>406</Iteration_query-len>\n+ <Iteration_hits>\n+ <Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>gi|193885198|pdb|2R2J|A</Hit_id>\n+ <Hit_def>Chain A, Crystal Structure Of Human Erp44</Hit_def>\n+ <Hit_accession>2R2J_A</Hit_accession>\n+ <Hit_len>382</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>768.073791748238</Hsp_bit-score>\n+ <Hsp_score>1982</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>26</Hsp_query-from>\n+ <Hsp_query-to>406</Hsp_query-to>\n+ <Hsp_hit-from>2</Hsp_hit-from>\n+ <Hsp_hit-to>382</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>370</Hsp_identity>\n+ <Hsp_positive>372</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>381</Hsp_align-len>\n+ <Hsp_qseq>PVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL</Hsp_qseq>\n+ <Hsp_hseq>PLGSEITSLDTENIDEILNNADVALVNFYADWCRFSQXLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGXXXKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDXVYLGAXTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHXKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHXYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL</Hsp_hseq>\n+ <Hsp_midline>P+ +EITSLDTENIDEILNNADVALVNFYADWCRFSQ LHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNG KREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPD VYLGA TNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFH KEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRH YVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+ </Hit>\n+ <Hit>\n+ <Hit_num>2</Hit_num>\n+ <Hit_id>gi|88192228|pdb|2B5E|A</Hit_id>\n+ <Hit_d'..b' <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+ </Hit>\n+ <Hit>\n+ <Hit_num>2</Hit_num>\n+ <Hit_id>gi|195927458|pdb|3C9M|A</Hit_id>\n+ <Hit_def>Chain A, Structure Of A Mutant Bovine Rhodopsin In Hexagonal Crystal Form</Hit_def>\n+ <Hit_accession>3C9M_A</Hit_accession>\n+ <Hit_len>348</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>674.085095224404</Hsp_bit-score>\n+ <Hsp_score>1738</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>348</Hsp_query-to>\n+ <Hsp_hit-from>1</Hsp_hit-from>\n+ <Hsp_hit-to>348</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>324</Hsp_identity>\n+ <Hsp_positive>335</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>348</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA</Hsp_qseq>\n+ <Hsp_hseq>MCGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSCFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>\n+ <Hsp_midline>M GTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+ </Hit>\n+ </Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>49615</Statistics_db-num>\n+ <Statistics_db-len>11554246</Statistics_db-len>\n+ <Statistics_hsp-len>0</Statistics_hsp-len>\n+ <Statistics_eff-space>1672994000</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ </Iteration>\n+ </BlastOutput_iterations>\n+</BlastOutput>\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,8 @@\n+sp|Q9BS26|ERP44_HUMAN\tgi|193885198|pdb|2R2J|A\t97.11\t381\t11\t0\t26\t406\t2\t382\t0.0\t768\tgi|193885198|pdb|2R2J|A\t1982\t370\t372\t0\t97.64\t1\t1\tPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL\tPLGSEITSLDTENIDEILNNADVALVNFYADWCRFSQXLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGXXXKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDXVYLGAXTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHXKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHXYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL\t406\t382\tChain A, Crystal Structure Of Human Erp44\n+sp|Q9BS26|ERP44_HUMAN\tgi|88192228|pdb|2B5E|A\t25.17\t290\t193\t8\t25\t306\t10\t283\t4e-20\t95.1\tgi|88192228|pdb|2B5E|A;gi|206581884|pdb|3BOA|A\t235\t73\t133\t24\t45.86\t1\t1\tTPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKR-EYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNI---IYKPPGHSAPDMVYLGA---MTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKF-RH\tAPEDSAVVKLATDSFNEYIQSHDLVLAEFFAPWCGHCKNMAPEYVKAAETLVEK-----NITLAQIDCTENQDLCMEHNIPGFPSLKIFKNSDVNNSIDYEGPRTAEAIVQFMIKQSQPAVAVVADLPAYLANETFVTPVIVQSGKIDADFNATFYSMANKHFNDYDFVSA--------ENADDDFKLSIYLPSAMDEP-VVYNGKKADIADADVFEKWLQVEALPYFGEIDGSVFAQYVESGLPLGYLFY--NDEEELEEYKPLFTELAKKNRGLMNFVSIDARKFGRH\t406\t504\tChain A, Crystal Structure Of Yeast Protein Disulfide Isomerase<>Chain A, Crystal Structure Of Yeast Protein Disulfide Isomerase.\n+sp|Q9NSY1|BMP2K_HUMAN\tgi|73536291|pdb|2BUJ|A\t29.39\t279\t182\t8\t40\t308\t21\t294\t1e-22\t105\tgi|73536291|pdb|2BUJ|A;gi|73536292|pdb|2BUJ|B\t262\t82\t130\t15\t46.59\t1\t1\tGVRVFAVGRHQVTLEESLAEGGFSTVFLVR-THGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTG--FTEPEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDG-VNVVEEEIKKYTTLSYRAPEMINLYGGKPITTKADIWALGCLLYKLCFFTLPF------GESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDI\tGHMVIIDNKHYLFIQK-LGEGGFSYVDLVEGLHDGHFYALKRILCHEQQDREEAQREAD-MHRLFNHPNILRLVAYCLRERGAKH-EAWLLLPFFKRGTLWNEIERLKDKGNFLTEDQILWLLLGICRGLEAIH--AKGYAHRDLKPTNILLGDEGQPVLMDLGSMNQACIHVEGSRQALTLQDWAAQRCTISYRAPELFSVQSHCVIDERTDVWSLGCVLYAMMFGEGPYDMVFQKGDSVALAVQNQLSIPQSPRHSSALWQLLNSMMTVDPHQRPHI\t1161\t317\tChain A, Crystal Structure Of The Human Serine-Threonine Kinase 16 In Complex With Staurosporine<>Chain B, Crystal Structure Of The Human Serine-Threonine Kinase 16 In Complex With Staurosporine\n+sp|Q9NSY1|BMP2K_HUMAN\tgi|270346335|pdb|2WQM|A\t27.21\t272\t166\t12\t53\t311\t36\t288\t6e-17\t86.3\tgi|270346335|pdb|2WQM|A;gi|270346336|pdb|2WQN|A\t212\t74\t129\t32\t47.43\t1\t1\tLEESLAEGGFSTVFLVRTH-GGIRCALKRMYVNNMPDLNV---CKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMN--KKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGGKPITTKADIWALGCLLYKLCFFTLPFGESQV---AICD----GNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQV\tIEKKIGRGQFSEVYRAACLLDGVPVALKKVQIFDLMDAKARADCIKEIDLLKQLN-HPNVIKYY---ASFIEDN--ELNIVLELADAGDLSRMIKHFKKQKRLIPERTVWKYFVQLCSALEHMHSRR--VMHRDIKPANVFITATGVVKLGDLG--LGRFFSSKTTAAHSL------VGTPYYMSPERIHENG---YNFKSDIWSLGCLLYEMAALQSPFYGDKMNLYSLCKKIEQCDYPPLPSDHYSEELRQLVNMCINPDPEKRPDVTYV\t1161\t310\tChain A, Structure Of Apo Human Nek7<>Chain A, Structure Of Adp-Bound Human Nek7\n+sp|P06213|INSR_HUMAN\tgi|116667097|pdb|2DTG|E\t95.91\t928\t7\t2\t28\t955\t1\t897\t0.0\t1846\tgi|116667097|pdb|2DTG|E\t4781\t890\t893\t31\t96.23\t1\t1\tHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPK'..b'83860|pdb|2I35|A;gi|157883861|pdb|2I36|A;gi|157883862|pdb|2I36|B;gi|157883863|pdb|2I36|C;gi|157883864|pdb|2I37|A;gi|157883865|pdb|2I37|B;gi|157883866|pdb|2I37|C;gi|159795066|pdb|2PED|A;gi|159795067|pdb|2PED|B;gi|192988480|pdb|3CAP|A;gi|192988481|pdb|3CAP|B;gi|195927457|pdb|3C9L|A;gi|197107530|pdb|1F88|A;gi|197107531|pdb|1F88|B;gi|206582030|pdb|3DQB|A\t1756\t325\t337\t0\t96.84\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t348\tChain A, Structure Of Bovine Rhodopsin (Dark Adapted)<>Chain A, Structure Of Bovine Rhodopsin (Metarhodopsin Ii)<>Chain A, Structure Of Bovine Rhodopsin In A Trigonal Crystal Form<>Chain B, Structure Of Bovine Rhodopsin In A Trigonal Crystal Form<>Chain A, Crystal Structure Of Bovine Rhodopsin<>Chain B, Crystal Structure Of Bovine Rhodopsin<>Chain A, Crystal Structure Of Bovine Rhodopsin At 2.6 Angstroms Resolution<>Chain B, Crystal Structure Of Bovine Rhodopsin At 2.6 Angstroms Resolution<>Chain A, Crystal Structure Of Bovine Rhodopsin At 2.2 Angstroms Resolution<>Chain B, Crystal Structure Of Bovine Rhodopsin At 2.2 Angstroms Resolution<>Chain A, Crystallographic Model Of Bathorhodopsin<>Chain B, Crystallographic Model Of Bathorhodopsin<>Chain A, Crystallographic Model Of Lumirhodopsin<>Chain B, Crystallographic Model Of Lumirhodopsin<>Chain A, Crystal Structure Of Rhombohedral Crystal Form Of Ground- State Rhodopsin<>Chain A, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain B, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain C, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain A, Crystal Structure Of A Photoactivated Rhodopsin<>Chain B, Crystal Structure Of A Photoactivated Rhodopsin<>Chain C, Crystal Structure Of A Photoactivated Rhodopsin<>Chain A, Crystallographic Model Of 9-Cis-Rhodopsin<>Chain B, Crystallographic Model Of 9-Cis-Rhodopsin<>Chain A, Crystal Structure Of Native Opsin: The G Protein-Coupled Receptor Rhodopsin In Its Ligand-Free State<>Chain B, Crystal Structure Of Native Opsin: The G Protein-Coupled Receptor Rhodopsin In Its Ligand-Free State<>Chain A, Structure Of Ground-State Bovine Rhodospin In A Hexagonal Crystal Form<>Chain A, Crystal Structure Of Bovine Rhodopsin<>Chain B, Crystal Structure Of Bovine Rhodopsin<>Chain A, Crystal Structure Of The Active G-Protein-Coupled Receptor Opsin In Complex With A C-Terminal Peptide Derived From The Galpha Subunit Of Transducin\n+sp|P08100|OPSD_HUMAN\tgi|195927458|pdb|3C9M|A\t93.10\t348\t24\t0\t1\t348\t1\t348\t0.0\t674\tgi|195927458|pdb|3C9M|A\t1738\t324\t335\t0\t96.26\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMCGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSCFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t348\tChain A, Structure Of A Mutant Bovine Rhodopsin In Hexagonal Crystal Form\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_human_vs_pdb_seg_no_converted_std.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_human_vs_pdb_seg_no_converted_std.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,8 @@ +sp|Q9BS26|ERP44_HUMAN gi|193885198|pdb|2R2J|A 97.11 381 11 0 26 406 2 382 0.0 768 +sp|Q9BS26|ERP44_HUMAN gi|88192228|pdb|2B5E|A 25.17 290 193 8 25 306 10 283 4e-20 95.1 +sp|Q9NSY1|BMP2K_HUMAN gi|73536291|pdb|2BUJ|A 29.39 279 182 8 40 308 21 294 1e-22 105 +sp|Q9NSY1|BMP2K_HUMAN gi|270346335|pdb|2WQM|A 27.21 272 166 12 53 311 36 288 6e-17 86.3 +sp|P06213|INSR_HUMAN gi|116667097|pdb|2DTG|E 95.91 928 7 2 28 955 1 897 0.0 1846 +sp|P06213|INSR_HUMAN gi|114794482|pdb|2HR7|A 99.59 485 2 0 28 512 1 485 0.0 1016 +sp|P08100|OPSD_HUMAN gi|16975387|pdb|1JFP|A 93.39 348 23 0 1 348 1 348 0.0 681 +sp|P08100|OPSD_HUMAN gi|195927458|pdb|3C9M|A 93.10 348 24 0 1 348 1 348 0.0 674 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_rhodopsin_vs_four_human.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_rhodopsin_vs_four_human.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,6 @@ +gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 348 1 348 0.0 701 +gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.33 354 53 2 1 354 1 348 0.0 605 +gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0 630 +gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0 630 +gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.10 348 23 1 1 347 1 348 0.0 651 +gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.09 349 65 1 1 349 1 348 0.0 587 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_sample.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_sample.xml Fri Jan 30 08:27:28 2015 -0500 |
| [ |
| b'@@ -0,0 +1,293 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>blastp</BlastOutput_program>\n+ <BlastOutput_version>BLASTP 2.2.24+</BlastOutput_version>\n+ <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+ <BlastOutput_db>nr</BlastOutput_db>\n+ <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+ <BlastOutput_query-def>Sample</BlastOutput_query-def>\n+ <BlastOutput_query-len>516</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+ <Parameters_expect>1e-30</Parameters_expect>\n+ <Parameters_gap-open>11</Parameters_gap-open>\n+ <Parameters_gap-extend>1</Parameters_gap-extend>\n+ <Parameters_filter>F</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+ <BlastOutput_iterations>\n+ <Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>Sample</Iteration_query-def>\n+ <Iteration_query-len>516</Iteration_query-len>\n+ <Iteration_hits>\n+ <Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>gi|119953746|ref|YP_950551.1|</Hit_id>\n+ <Hit_def>tail tape measure protein [Streptococcus phage SMP] >gi|118430558|gb|ABK91882.1| tail tape measure protein [Streptococcus suis phage SMP]</Hit_def>\n+ <Hit_accession>YP_950551</Hit_accession>\n+ <Hit_len>659</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>949.117592429394</Hsp_bit-score>\n+ <Hsp_score>2452</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>516</Hsp_query-to>\n+ <Hsp_hit-from>27</Hsp_hit-from>\n+ <Hsp_hit-to>542</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>500</Hsp_identity>\n+ <Hsp_positive>500</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>516</Hsp_align-len>\n+ <Hsp_qseq>FHLLNSGGSALSVMFAKLVGIIAGISAPIWXXXXXXXXXXXXXXXXYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_qseq>\n+ <Hsp_hseq>FHLLNSGGSALSVMFAKLVGIIAGISAPIWAVIGVIAALVAGFVLLYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_hseq>\n+ <Hsp_midline>FHLLNSGGSALSVMFAKLVGIIAGISAPIW YNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNN'..b'NVFNAIKNTATNVWNAIKTTISNVVQTILNF---------------------------------VTPIFNTMKNTITNIFNAIRNTASSVWNSIKTTISNIVTSVKNTVINIFNALKNSITNIFNAIRNTASTVWNSIKSTVSNIVSATVNTVKNLFNGMKNTVSSIWDGVRNTISNVVNAVKNTISNVWGGITGTVSN----IFNGVKNAIDGPMNAAKNLVKNVV----DAIKGF</Hsp_hseq>\n+ <Hsp_midline>+++V L G +V WN+ + + + ++ + VE V + +QT W++I AVV ++ N+ K + D KA Q + W+ +K +A +WE I V I+G + + + K+ + +W ++ V W+ IK TV++ TA+ + I +I+TT V+NAI A+N+W AI TT+ +V+ TI + VT F+ +K I+N + I+ S +WN+I T +S I +K + +K +I+N+ I++ T WN+IK+++S N N +K+ + W+ +++ IS + +K+T+SN W + TV+N I + V+ D +NAA+N + N + D I GF</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+ </Hit>\n+ <Hit>\n+ <Hit_num>9</Hit_num>\n+ <Hit_id>gi|163941333|ref|YP_001646217.1|</Hit_id>\n+ <Hit_def>prophage LambdaBa01, membrane protein, putative [Bacillus weihenstephanensis KBAB4] >gi|163863530|gb|ABY44589.1| prophage LambdaBa01, membrane protein, putative [Bacillus weihenstephanensis KBAB4]</Hit_def>\n+ <Hit_accession>YP_001646217</Hit_accession>\n+ <Hit_len>725</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>138.657684699283</Hsp_bit-score>\n+ <Hsp_score>348</Hsp_score>\n+ <Hsp_evalue>8.15996781441799e-31</Hsp_evalue>\n+ <Hsp_query-from>61</Hsp_query-from>\n+ <Hsp_query-to>480</Hsp_query-to>\n+ <Hsp_hit-from>142</Hsp_hit-from>\n+ <Hsp_hit-to>560</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>118</Hsp_identity>\n+ <Hsp_positive>203</Hsp_positive>\n+ <Hsp_gaps>29</Hsp_gaps>\n+ <Hsp_align-len>434</Hsp_align-len>\n+ <Hsp_qseq>WEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIK---AVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKT----VWS-------AAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLIN</Hsp_qseq>\n+ <Hsp_hseq>WDAIKQWTIDAWNAIGEFLVGIWDGIVQWASEAWNSISESTSAVWNSIKEFLIGIWNGIVEFVVT-WGT--AILETYVGIWTSIFNFCMEIWNGIVEYLTSVLQGIATFFTEIWTSISTFFQEIWNGLVAFITPVLQGIADFFAM-----------IWNGISTVIQTVWNFITQYLQAIWTAILYFATPLFESIKNFISECWNKISSTTSLVWETIKNFLVSCWNGLVSFVTPIFEKIKSWIISVWDTISSATMAVWNAVKNFLQACWNGLVSIVTPIFDAIKNWIVNVWNAISSTTSAVWNAIKSYLSSLWNSIVSTASSIFNSIKSAISTVWNMISSASSSVWNGIKSTLSSIWNGIKSTASSVWNGLKDAIMTPVRWVTSAVSGAFNGMKSAVLGVWDGIKSGIRTAINGIIRIINKFI-DGFNTPAELLN</Hsp_hseq>\n+ <Hsp_midline>W+AIK A A+ F++ +W +V W +E I ++ VWN+I+ + + ++ V T W A++ T + + ++ + +++ GI++ +V+Q I ++ W ++ IW G+ + + + G+ F +W I V+ +W++I + TA+ + SI+ WN IS+ S +W I ++S + ++ E IK+ VW A W +K A +V +VT FD IK I N W I + TS +WNAI ++LS +W I + AS+ + IK+ IS V I S + WN IK+++S+ N IKS A + WN +K AI T + + S VS +N + S V I S +RT + + FI + + +L+N</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+ </Hit>\n+ </Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>6589360</Statistics_db-num>\n+ <Statistics_db-len>-2041834015</Statistics_db-len>\n+ <Statistics_hsp-len>0</Statistics_hsp-len>\n+ <Statistics_eff-space>504129014857</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ </Iteration>\n+ </BlastOutput_iterations>\n+</BlastOutput>\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastp_sample_converted.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_sample_converted.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,9 @@ +Sample gi|119953746|ref|YP_950551.1| 96.90 516 16 0 1 516 27 542 0.0 949 +Sample gi|148986157|ref|ZP_01819143.1| 41.27 252 115 3 49 300 679 897 2e-41 174 +Sample gi|77411259|ref|ZP_00787609.1| 41.00 261 143 2 50 310 655 904 8e-39 165 +Sample gi|76786754|ref|YP_329383.1| 39.46 261 147 2 50 310 655 904 7e-37 159 +Sample gi|153811333|ref|ZP_01964001.1| 29.98 557 277 18 3 516 573 1059 2e-36 157 +Sample gi|56962696|ref|YP_174422.1| 28.79 389 228 8 48 433 123 465 3e-33 146 +Sample gi|50914476|ref|YP_060448.1| 43.82 178 100 0 50 227 655 832 5e-33 146 +Sample gi|29374987|ref|NP_814140.1| 25.46 432 244 8 73 482 545 920 7e-31 139 +Sample gi|163941333|ref|YP_001646217.1| 27.19 434 287 7 61 480 142 560 8e-31 138 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastx_rhodopsin_vs_four_human.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,10 @@ +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastx_rhodopsin_vs_four_human.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,741 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>blastx</BlastOutput_program>\n+ <BlastOutput_version>BLASTX 2.2.30+</BlastOutput_version>\n+ <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+ <BlastOutput_db></BlastOutput_db>\n+ <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+ <BlastOutput_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</BlastOutput_query-def>\n+ <BlastOutput_query-len>1047</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+ <Parameters_expect>1e-10</Parameters_expect>\n+ <Parameters_gap-open>11</Parameters_gap-open>\n+ <Parameters_gap-extend>1</Parameters_gap-extend>\n+ <Parameters_filter>L;</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+ <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>30</Statistics_hsp-len>\n+ <Statistics_eff-space>119944</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>2</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+ <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>30</Statistics_hsp-len>\n+ <Statistics_eff-space>119944</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>3</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+ <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>30</Statistics_hsp-len>\n+ <Statistics_eff-space>119944</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>4</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+ <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+<Hi'..b'Statistics_db-len>\n+ <Statistics_hsp-len>32</Statistics_hsp-len>\n+ <Statistics_eff-space>155584</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>23</Iteration_iter-num>\n+ <Iteration_query-ID>Query_6</Iteration_query-ID>\n+ <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+ <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>32</Statistics_hsp-len>\n+ <Statistics_eff-space>155584</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>24</Iteration_iter-num>\n+ <Iteration_query-ID>Query_6</Iteration_query-ID>\n+ <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+ <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>Subject_4</Hit_id>\n+ <Hit_def>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Hit_def>\n+ <Hit_accession>Subject_4</Hit_accession>\n+ <Hit_len>348</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>532.717</Hsp_bit-score>\n+ <Hsp_score>1371</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>23</Hsp_query-from>\n+ <Hsp_query-to>1021</Hsp_query-to>\n+ <Hsp_hit-from>1</Hsp_hit-from>\n+ <Hsp_hit-to>333</Hsp_hit-to>\n+ <Hsp_query-frame>2</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>272</Hsp_identity>\n+ <Hsp_positive>307</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>333</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP +++ </Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>32</Statistics_hsp-len>\n+ <Statistics_eff-space>155584</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human_all.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastx_rhodopsin_vs_four_human_all.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,10 @@ +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|57163782|ref|NM_001009242.1| gi|57163782|ref|NM_001009242.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA71FL4TS79VA9ML34VI71LI7SASA21LI13TA1GV11 99 33 N/A N/A N/A N/A N/A +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A 0 gi|2734705|gb|U59921.1|BBU59921 gi|2734705|gb|U59921.1|BBU59921 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 10IV1MF2KA20YF1IM1CA7LV7ML5IV17FV1NDHL4CG3VS1ML1ST1MLNH3IVLF1AP3YNVL36SG4VI7IV3SA2VA3LA9ML5VI26LM8RQ2CF30VIFA13FY2STNH3EN7VI7SASA9LM8IL2LI6FL1ED1DE 63 21 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA9HY61FL4TS28 22 3 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 18LV3GS19LI7SASA13 22 2 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 6ML34VI14VARASA 22 1 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 1AG36LV1LF13VA4 22 1 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 9LI13TA1AV 22 1 N/A N/A N/A N/A N/A +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A 0 gi|283855822|gb|GQ290312.1| gi|283855822|gb|GQ290312.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA66ND4FL4TS5ML7AP49LV25ML34VI40VI6LV19VI11SASA21LI13TA1 99 33 N/A N/A N/A N/A N/A +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|18148870|dbj|AB062417.1| gi|18148870|dbj|AB062417.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA9AY22MV38FL4TS79VA9ML10PLHKEP1TV14IT2LM1VI47LV3GS11DN14TSSA1VI17VL2LI13TA13 99 33 N/A N/A N/A N/A N/A +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A 0 gi|12583664|dbj|AB043817.1| gi|12583664|dbj|AB043817.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 10IV1MF22AQ2AM1SA4FL2IVAL12IVEQ23FL4TS1ML3ML12IL14CS8WYMV5VMTS6SN6MAVF2TV6LA3FA20RLAK1GEIV10TVCV2ST2LMAIVI1SF4RQ2CF12EA3RK2RK6VI4SA2VI9WF8TN9SA5SA1LI3MV3CM6HN2IL2LI6FLEGEDEDDEGA 74 25 N/A N/A N/A N/A N/A |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human_converted.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,10 @@ +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,10 @@ +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_rhodopsin_vs_four_human_ext.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,10 @@ +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_sample.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastx_sample.xml Fri Jan 30 08:27:28 2015 -0500 |
| [ |
| b'@@ -0,0 +1,758 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>blastx</BlastOutput_program>\n+ <BlastOutput_version>BLASTX 2.2.24+</BlastOutput_version>\n+ <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+ <BlastOutput_db>/share/BlastDB/nr</BlastOutput_db>\n+ <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+ <BlastOutput_query-def>phage_suis</BlastOutput_query-def>\n+ <BlastOutput_query-len>1890</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+ <Parameters_expect>0.001</Parameters_expect>\n+ <Parameters_gap-open>11</Parameters_gap-open>\n+ <Parameters_gap-extend>1</Parameters_gap-extend>\n+ <Parameters_filter>L;</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+ <BlastOutput_iterations>\n+ <Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>phage_suis</Iteration_query-def>\n+ <Iteration_query-len>1890</Iteration_query-len>\n+ <Iteration_hits>\n+ <Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>gi|119953746|ref|YP_950551.1|</Hit_id>\n+ <Hit_def>tail tape measure protein [Streptococcus phage SMP] >gi|118430558|gb|ABK91882.1| tail tape measure protein [Streptococcus phage SMP]</Hit_def>\n+ <Hit_accession>YP_950551</Hit_accession>\n+ <Hit_len>659</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>988.407949172964</Hsp_bit-score>\n+ <Hsp_score>2554</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>336</Hsp_query-from>\n+ <Hsp_query-to>1889</Hsp_query-to>\n+ <Hsp_hit-from>25</Hsp_hit-from>\n+ <Hsp_hit-to>542</Hsp_hit-to>\n+ <Hsp_query-frame>3</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>518</Hsp_identity>\n+ <Hsp_positive>518</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>518</Hsp_align-len>\n+ <Hsp_qseq>NWFHLLNSGGSALSVMFAKLVGIIAGISAPIWXXXXXXXXXXXXXXXXYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_qseq>\n+ <Hsp_hseq>NWFHLLNSGGSALSVMFAKLVGIIAGISAPIWAVIGVIAALVAGFVLLYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDWAKGLLGIKS</Hsp_hseq>\n+ <Hsp_midline>NWFHLLNSGGSALSVMFAKLVGIIAGISAPIWAVIGVIAALVAGFVLLYNTNEEFRTKVQAAWEAIKSAISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKT'..b'\n+ <Hsp_qseq>AIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVA------IDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSGIWEGIKTAASTAWEWIKTTISNVMTTIKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTNAGPRIVSAVRTGFDNAVNAARNFISNAISVGGDLINGFVEGVKGAAGRLIDAVGGAVSGAIDW-AKG</Hsp_qseq>\n+ <Hsp_hseq>AMAEVGGVLAEALAPVLELLAQLLQAVANWFSN-LPGPIQTFIVIMGGLITVVGLLLPGLLA-----LQAAAVAMGTTIGGLVVAAAPIVGTVLGIIAVITLLVVWIQELWQNNEGFRTAVI-EIWNAIYAFISVIIQEISTFIMTIWGTLTTWWTENQALIQAAVETVWNAISTVIQTVMSLIGPYLEAAWANIQLIITTAWEIIKTVVETAITVVLGIIKAIMQAITGDWSGAWETIKGVLQRVWQAIQQIVTTILSAIGQFISNTWNGIKNTFSNILSAISGIVSSIWNTIKSVISSVISSIVSFVSSGWSGIQQTISSILSGISSTVSSVWNGIKNSISNA----INGAKNVVSSAINAIKNLFNFKISWPHIPLPHF--SVSGSANPLDWLKGGLPKISIAWYAKG</Hsp_hseq>\n+ <Hsp_midline>A+ V + AL P+++ L+ AV N + + T + ++ G+I V ++ G + L+ A + I LV A + G++ + + +++ +W G AV+ IW+ I +S I + I I ++ T W I +W AIST + +V++ I Y++ I+ + + AWEIIK V + ++G++ I S AWE IK +W AI ++ I I S W IK T SN+++ I + + WN IK+ IS+ +++I S + W+ I+ IS+ + I STVS+ WN + ++++NA ++ + +A+NA +N + IS + F V G+A L GG +I W AKG</Hsp_midline>\n+ </Hsp>\n+ <Hsp>\n+ <Hsp_num>3</Hsp_num>\n+ <Hsp_bit-score>121.708903358919</Hsp_bit-score>\n+ <Hsp_score>304</Hsp_score>\n+ <Hsp_evalue>2.99798279087674e-25</Hsp_evalue>\n+ <Hsp_query-from>543</Hsp_query-from>\n+ <Hsp_query-to>1673</Hsp_query-to>\n+ <Hsp_hit-from>637</Hsp_hit-from>\n+ <Hsp_hit-to>1004</Hsp_hit-to>\n+ <Hsp_query-frame>3</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>89</Hsp_identity>\n+ <Hsp_positive>168</Hsp_positive>\n+ <Hsp_gaps>29</Hsp_gaps>\n+ <Hsp_align-len>387</Hsp_align-len>\n+ <Hsp_qseq>ISTAVEAVVSFVMDLWGQMVAWWNENQELIRQTAETVWNAIRTVVETVMTALIPIVQTAWDLILAVVTTVLNVIKTVVDTGLKVVLGIIKAVMQMINGDWSGAWETLKGVAGTIWEGIKSLVQVAIDGLVQIFQTGLAFLKSIWDTVWGTIMAVVGPIWDWIKTTVSNAITAVWEIIQNIMTSIQTTWDTVWNAISTVASNIWTAISTTVMSVLTTIWGYIQTYLELIKTVWSAAWEIIKAVFAAILLTIVGLVTGNFDLIKQAISNAWEIIKTKTSEIWNAITTFLSG--IWEGIK------TAASTAWEWIKTTISNVMTT--IKSNIETAWNNIKTSISNALNNIKSAAENAWNNIKSAISTAIENIKSTVSNGWNNLVSTVTN</Hsp_qseq>\n+ <Hsp_hseq>IIAVITLLVVWIQELW--------QNNEGFRTAVIEIWNAIYAFISVIIQEISTFIMTIWGTLTTWWTENQALIQAAVETVWNAISTVIQTVMSLIGPYLEAAWANIQLIITTAWEIIKTVVETAITVVLGIIKAIMQAITGDWSGAWETIKGVLQRVWQAIQQIVTTILSAIGQFISNTWNGIKNTFSNILSAISGIVSSIWNTIKSVISSVISSIVSFV-----------SSGWSGIQQTISSILSGISSTVSSVWNGIKNSISNAINGAKNVVSSAINAIKNLFNFKISWPHIPLPHFSVSGSANPLDWLKGGLPKISIAWYAKGGILTKPTAFGMNEKQLMVGGEAGKEAVLPLTKQNLAAIGEGIASTMGTGGNFINVSITD</Hsp_hseq>\n+ <Hsp_midline>I + +V ++ +LW +N E R +WNAI + ++ + + T W + T +I+ V+T + +I+ VM +I AW ++ + T WE IK++V+ AI ++ I + + + W W TI V+ +W I+ V+ ++A+ + I N I+ T+ + +AIS + S+IW I + + SV+++I ++ S+ W I+ ++IL I V+ ++ IK +ISNA K S NAI + W I + ++ +W+K + + K I T + + ++ E K ++ E I ST+ G N + ++T+</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+ </Hit>\n+ </Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>12310662</Statistics_db-num>\n+ <Statistics_db-len>-87459526</Statistics_db-len>\n+ <Statistics_hsp-len>0</Statistics_hsp-len>\n+ <Statistics_eff-space>1174893963300</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ </Iteration>\n+ </BlastOutput_iterations>\n+</BlastOutput>\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/blastx_sample_converted.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastx_sample_converted.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,33 @@ +phage_suis gi|119953746|ref|YP_950551.1| 100.00 518 0 0 336 1889 25 542 0.0 988 +phage_suis gi|289551554|ref|YP_003472458.1| 32.95 516 280 6 342 1889 657 1106 6e-66 256 +phage_suis gi|223044325|ref|ZP_03614360.1| 30.22 546 327 7 393 1889 655 1193 1e-64 252 +phage_suis gi|223044325|ref|ZP_03614360.1| 19.88 508 328 9 384 1796 844 1309 6e-28 130 +phage_suis gi|268611153|ref|ZP_06144880.1| 28.64 639 371 11 78 1847 440 1042 1e-60 239 +phage_suis gi|268611153|ref|ZP_06144880.1| 23.36 441 286 7 543 1856 547 938 4e-31 141 +phage_suis gi|268611153|ref|ZP_06144880.1| 25.27 459 266 11 522 1844 722 1121 8e-31 140 +phage_suis gi|268611153|ref|ZP_06144880.1| 24.63 406 267 8 501 1694 770 1144 3e-23 115 +phage_suis gi|268611153|ref|ZP_06144880.1| 27.80 241 145 3 492 1148 811 1044 6e-16 90.9 +phage_suis gi|268611153|ref|ZP_06144880.1| 19.76 253 168 6 1158 1883 547 775 3e-04 52.0 +phage_suis gi|268610688|ref|ZP_06144415.1| 28.95 639 369 11 78 1847 440 1042 3e-59 234 +phage_suis gi|268610688|ref|ZP_06144415.1| 24.64 491 316 9 501 1856 770 1245 4e-39 167 +phage_suis gi|268610688|ref|ZP_06144415.1| 23.79 517 319 9 492 1832 811 1322 3e-37 161 +phage_suis gi|268610688|ref|ZP_06144415.1| 21.91 493 322 11 510 1859 905 1377 1e-25 123 +phage_suis gi|268610688|ref|ZP_06144415.1| 20.55 292 197 5 486 1343 1138 1400 4e-10 71.6 +phage_suis gi|268610688|ref|ZP_06144415.1| 21.41 341 225 10 894 1883 467 775 8e-05 53.9 +phage_suis gi|153811333|ref|ZP_01964001.1| 28.34 621 364 16 108 1847 493 1073 8e-55 219 +phage_suis gi|153811333|ref|ZP_01964001.1| 29.67 428 250 9 519 1760 709 1099 2e-47 195 +phage_suis gi|153811333|ref|ZP_01964001.1| 29.41 391 226 7 498 1640 746 1096 1e-39 169 +phage_suis gi|153811333|ref|ZP_01964001.1| 26.49 268 174 3 492 1256 854 1111 3e-24 118 +phage_suis gi|153811333|ref|ZP_01964001.1| 27.12 306 198 4 510 1385 816 1110 1e-23 116 +phage_suis gi|262113750|emb|CAR95417.1| 38.46 286 169 1 384 1241 540 818 2e-54 218 +phage_suis gi|262113750|emb|CAR95417.1| 29.68 411 271 7 657 1871 460 858 3e-40 171 +phage_suis gi|77411259|ref|ZP_00787609.1| 37.19 285 172 1 387 1241 628 905 2e-53 215 +phage_suis gi|77411259|ref|ZP_00787609.1| 28.01 407 281 6 660 1871 548 945 1e-40 172 +phage_suis gi|77411259|ref|ZP_00787609.1| 22.82 355 207 7 978 1877 540 882 9e-14 83.6 +phage_suis gi|76786754|ref|YP_329383.1| 36.84 285 173 1 387 1241 628 905 8e-53 213 +phage_suis gi|76786754|ref|YP_329383.1| 27.27 407 284 6 660 1871 548 945 3e-38 164 +phage_suis gi|76786754|ref|YP_329383.1| 24.73 283 194 2 543 1391 637 900 3e-23 115 +phage_suis gi|76786754|ref|YP_329383.1| 22.91 323 204 6 978 1847 540 850 2e-13 82.4 +phage_suis gi|50914476|ref|YP_060448.1| 35.86 290 179 1 372 1241 623 905 4e-51 207 +phage_suis gi|50914476|ref|YP_060448.1| 27.01 411 280 7 660 1871 548 945 2e-35 155 +phage_suis gi|50914476|ref|YP_060448.1| 23.00 387 269 5 543 1673 637 1004 3e-25 121 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003.smp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd00003.smp Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,19957 @@\n+PssmWithParameters ::= {\n+ pssm {\n+ isProtein TRUE,\n+ numRows 28,\n+ numColumns 234,\n+ byRow FALSE,\n+ query seq {\n+ id {\n+ general {\n+ db "CDD",\n+ tag id 237977\n+ }\n+ },\n+ descr {\n+ title "cd00003, PNPsynthase, Pyridoxine 5\'-phosphate (PNP) synthase\n+ domain; pyridoxal 5\'-phosphate is the active form of vitamin B6 that acts as\n+ an essential, ubiquitous coenzyme in amino acid metabolism. In bacteria,\n+ formation of pyridoxine 5\'-phosphate is a step in the biosynthesis of vitamin\n+ B6. PNP synthase, a homooctameric enzyme, catalyzes the final step in PNP\n+ biosynthesis, the condensation of 1-amino-acetone 3-phosphate and\n+ 1-deoxy-D-xylulose 5-phosphate. PNP synthase adopts a TIM barrel topology,\n+ intersubunit contacts are mediated by three \'\'extra\'\' helices, generating a\n+ tetramer of symmetric dimers with shared active sites; the open state has\n+ been proposed to accept substrates and to release products, while most of the\n+ catalytic events are likely to occur in the closed state; a hydrophilic\n+ channel running through the center of the barrel was identified as the\n+ essential structural feature that enables PNP synthase to release water\n+ molecules produced during the reaction from the closed, solvent-shielded\n+ active site."\n+ },\n+ inst {\n+ repr raw,\n+ mol aa,\n+ length 234,\n+ seq-data ncbieaa "RLGVNIDHVATLRNARGTNYPDPVEAALLAEKAGADGITVHLREDRRHIQDR\n+DVRLLRELVRTELNLEMAPTEEMLEIALEVKPHQVTLVPEKREELTTEGGLDVAGQAEKLKPIIERLKDAGIRVSLFI\n+DPDPEQIEAAKEVGADRVELHTGPYANAYDKAEREAELERIAKAAKLARELGLGVNAGHGLNYENVKPIAKIPGIAEL\n+NIGHAIISRALFVGLEEAVREMKDLI"\n+ }\n+ },\n+ intermediateData {\n+ weightedResFreqsPerPos {\n+ { 0, 10, 0 },\n+ { 575880368388257, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 663632240275508, 10, -16 },\n+ { 18974024208621, 10, -15 },\n+ { 435260939968393, 10, -16 },\n+ { 0, 10, 0 },\n+ { 231579235547631, 10, -16 },\n+ { 217943087911858, 10, -16 },\n+ { 197347688751049, 10, -15 },\n+ { 149575209809135, 10, -15 },\n+ { 0, 10, 0 },\n+ { 373639929584932, 10, -16 },\n+ { 0, 10, 0 },\n+ { 454596950735964, 10, -16 },\n+ { 280508357667036, 10, -15 },\n+ { 0, 10, 0 },\n+ { 276040598748792, 10, -16 },\n+ { 187780737233597, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 119593107246649, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 355209907721631, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 925910073190528, 10, -15 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 385689360373084, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 526473206870828, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 499042712921408, 10, -15 },\n+ { 160105971725447, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 217340080528288, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 38877105237495, 10, -14 },\n+ { 0, 10, 0 },\n+ { 217943087911858, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ '..b'-414,\n+ -399,\n+ -460,\n+ 5,\n+ 352,\n+ 16,\n+ -587,\n+ -100,\n+ -494,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -195,\n+ -32768,\n+ -409,\n+ -650,\n+ -589,\n+ -382,\n+ -651,\n+ -647,\n+ 402,\n+ -562,\n+ -63,\n+ -221,\n+ -629,\n+ -572,\n+ -558,\n+ -589,\n+ -496,\n+ -335,\n+ 665,\n+ -607,\n+ -100,\n+ -446,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -24,\n+ -32768,\n+ -591,\n+ -46,\n+ -272,\n+ -576,\n+ -277,\n+ -37,\n+ -291,\n+ 283,\n+ -138,\n+ -402,\n+ -350,\n+ -488,\n+ 369,\n+ 578,\n+ -203,\n+ -174,\n+ -491,\n+ -587,\n+ -100,\n+ -483,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ 7,\n+ -32768,\n+ -616,\n+ 241,\n+ 528,\n+ -606,\n+ -496,\n+ -367,\n+ -230,\n+ 196,\n+ -343,\n+ -105,\n+ -331,\n+ -454,\n+ 43,\n+ 272,\n+ -194,\n+ -392,\n+ -502,\n+ -619,\n+ -100,\n+ -523,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -446,\n+ -32768,\n+ -500,\n+ -647,\n+ -538,\n+ 178,\n+ -619,\n+ -325,\n+ -106,\n+ -485,\n+ -175,\n+ 921,\n+ -550,\n+ -602,\n+ -404,\n+ -481,\n+ -486,\n+ -420,\n+ -288,\n+ -277,\n+ -100,\n+ 627,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -165,\n+ -32768,\n+ -534,\n+ -503,\n+ -358,\n+ -453,\n+ -555,\n+ -452,\n+ -144,\n+ 482,\n+ 287,\n+ -280,\n+ -440,\n+ -508,\n+ -91,\n+ 361,\n+ -409,\n+ -396,\n+ 122,\n+ -575,\n+ -100,\n+ -472,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ 149,\n+ -32768,\n+ -567,\n+ 385,\n+ 307,\n+ -606,\n+ -460,\n+ -383,\n+ -292,\n+ 51,\n+ -396,\n+ -460,\n+ -145,\n+ -458,\n+ 207,\n+ 304,\n+ -90,\n+ -107,\n+ -277,\n+ -625,\n+ -100,\n+ -526,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ 176,\n+ -32768,\n+ 31,\n+ -605,\n+ -520,\n+ -357,\n+ -572,\n+ -562,\n+ 483,\n+ -473,\n+ 369,\n+ -197,\n+ -576,\n+ -553,\n+ -44,\n+ -69,\n+ -433,\n+ -185,\n+ 8,\n+ -549,\n+ -100,\n+ -456,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -154,\n+ -32768,\n+ 387,\n+ -646,\n+ -594,\n+ -334,\n+ -636,\n+ -593,\n+ 485,\n+ -545,\n+ 343,\n+ 529,\n+ -604,\n+ -586,\n+ -513,\n+ -556,\n+ -479,\n+ -56,\n+ 41,\n+ -533,\n+ -100,\n+ -450,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768\n+ },\n+ lambda { 267, 10, -3 },\n+ kappa { 695502437462053, 10, -16 },\n+ h { 14, 10, -2 },\n+ scalingFactor 100,\n+ lambdaUngapped { 315181590957692, 10, -15 },\n+ kappaUngapped { 22723615854819, 10, -14 },\n+ hUngapped { 852942415611443, 10, -15 }\n+ }\n+ },\n+ params {\n+ pseudocount 10,\n+ rpsdbparams {\n+ matrixName "BLOSUM62"\n+ }\n+ }\n+}\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.aux --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd00003_and_cd00008.aux Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,12 @@ +BLOSUM62 +11 +1 +0.000000e+00 +0.000000e+00 +0 +0 +100.000000 +234 +6.955024e-02 +160 +4.862535e-02 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.freq |
| b |
| Binary file test-data/cd00003_and_cd00008.freq has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.loo |
| b |
| Binary file test-data/cd00003_and_cd00008.loo has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.phr |
| b |
| Binary file test-data/cd00003_and_cd00008.phr has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.pin |
| b |
| Binary file test-data/cd00003_and_cd00008.pin has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.psd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd00003_and_cd00008.psd Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,2 @@ +gnl|cdd|1890191 +gnl|cdd|2379770 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.psi |
| b |
| Binary file test-data/cd00003_and_cd00008.psi has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.psq |
| b |
| Binary file test-data/cd00003_and_cd00008.psq has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00003_and_cd00008.rps |
| b |
| Binary file test-data/cd00003_and_cd00008.rps has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/cd00008.smp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd00008.smp Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,13679 @@\n+PssmWithParameters ::= {\n+ pssm {\n+ isProtein TRUE,\n+ numRows 28,\n+ numColumns 160,\n+ byRow FALSE,\n+ query seq {\n+ id {\n+ general {\n+ db "CDD",\n+ tag id 189019\n+ }\n+ },\n+ descr {\n+ title "cd00008, PIN_53EXO-like, PIN domains of the 5\'-3\' exonucleases\n+ of DNA polymerase I, bacteriophage T4 RNase H and T5-5\' nucleases, and\n+ homologs. PIN (PilT N terminus) domains of the 5\'-3\' exonucleases (53EXO) of\n+ mutli-domain DNA polymerase I and single domain protein homologs, as well as,\n+ the PIN domains of bacteriophage T5-5\'nuclease (T5FEN or 5\'-3\'exonuclease),\n+ bacteriophage T4 RNase H (T4FEN), bacteriophage T3 (T3 phage\n+ exodeoxyribonuclease) and other similar nucleases are included in this\n+ family. The 53EXO of DNA polymerase I recognizes and endonucleolytically\n+ cleaves a structure-specific DNA substrate that has a bifurcated downstream\n+ duplex and an upstream template-primer duplex that overlaps the downstream\n+ duplex by 1 bp. The T5-5\'nuclease is a 5\'-3\'exodeoxyribonuclease that also\n+ exhibits endonucleolytic activity on flap structures (branched duplex DNA\n+ containing a free single-stranded 5\'end). T4 RNase H, which removes the RNA\n+ primers that initiate lagging strand fragments, has 5\'- 3\'exonuclease\n+ activity on DNA/DNA and RNA/DNA duplexes and has endonuclease activity on\n+ flap or forked DNA structures. These nucleases are members of the\n+ structure-specific, 5\' nuclease family that catalyzes hydrolysis of DNA\n+ duplex-containing nucleic acid structures during DNA replication, repair, and\n+ recombination. They contain a PIN domain with a helical arch/clamp region (I\n+ domain) of variable length (approximately 16 to 30 residues in 53EXO-like PIN\n+ domains) and a H3TH (helix-3-turn-helix) domain, an atypical\n+ helix-hairpin-helix-2-like region. Both the H3TH domain (not included here)\n+ and the helical arch/clamp region are involved in DNA binding. The active\n+ site of the 53EXO of Taq DNA polymerase I includes a set of conserved acidic\n+ residues that are essential for binding three divalent metal ions (two Mn2+\n+ ions and one Zn2+ ion) required for nuclease activity. T5-5\'nuclease requires\n+ at least two bound divalent metal ions for nuclease activity and is reported\n+ to be able to use Mg2+, Mn2+ or Co2+ as co-factors."\n+ },\n+ inst {\n+ repr raw,\n+ mol aa,\n+ length 160,\n+ seq-data ncbieaa "LMLVDGTNLAFRTKHNNSKKKEKINLSPFASSYVSSIQSLAKSYSARTTIVL\n+GDKGKSVFRLEHLPEYKGNRDEKYAEEKALDEQFFEYLKDAFELCKATTFPTFTIRGYEADDMAAYLVKKIGHEGDHV\n+WIISTDGDWDQLLTDKVSRFSPTTRREYHL"\n+ }\n+ },\n+ intermediateData {\n+ weightedResFreqsPerPos {\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 338020833333333, 10, -15 },\n+ { 0, 10, 0 },\n+ { 328645833333333, 10, -15 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 333333333333333, 10, -15 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 338020833333333, 10, -15 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 333333333333333, 10, -15 },\n+ { 328645833333333, 10, -15 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0'..b' -307,\n+ 704,\n+ -250,\n+ -309,\n+ -192,\n+ -207,\n+ -249,\n+ -197,\n+ -100,\n+ -12,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -64,\n+ -32768,\n+ -122,\n+ -145,\n+ -113,\n+ -102,\n+ -157,\n+ -140,\n+ -20,\n+ -97,\n+ 17,\n+ 497,\n+ -93,\n+ -116,\n+ -65,\n+ -112,\n+ -12,\n+ 327,\n+ -19,\n+ -149,\n+ -100,\n+ -121,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -142,\n+ -32768,\n+ -302,\n+ -51,\n+ 332,\n+ -282,\n+ -264,\n+ 602,\n+ -290,\n+ -70,\n+ -294,\n+ -205,\n+ -45,\n+ -211,\n+ 22,\n+ -107,\n+ -28,\n+ 303,\n+ -228,\n+ -332,\n+ -100,\n+ -69,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -121,\n+ -32768,\n+ -379,\n+ -186,\n+ -84,\n+ -383,\n+ 340,\n+ -157,\n+ -394,\n+ 351,\n+ -349,\n+ -246,\n+ -95,\n+ -242,\n+ -9,\n+ 403,\n+ -102,\n+ -177,\n+ -341,\n+ -352,\n+ -100,\n+ -289,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -196,\n+ -32768,\n+ -375,\n+ -234,\n+ -82,\n+ -9,\n+ -299,\n+ -9,\n+ -301,\n+ 349,\n+ -256,\n+ -192,\n+ -143,\n+ -264,\n+ -2,\n+ 405,\n+ -146,\n+ -178,\n+ -268,\n+ -72,\n+ -100,\n+ 490,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -226,\n+ -32768,\n+ -301,\n+ -157,\n+ 297,\n+ -71,\n+ -349,\n+ -222,\n+ -123,\n+ -172,\n+ 207,\n+ -46,\n+ -277,\n+ -314,\n+ -71,\n+ -214,\n+ -226,\n+ -213,\n+ -167,\n+ 856,\n+ -100,\n+ -25,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -167,\n+ -32768,\n+ -210,\n+ -396,\n+ -331,\n+ 65,\n+ -415,\n+ -122,\n+ 350,\n+ -300,\n+ 9,\n+ -12,\n+ -350,\n+ -347,\n+ -284,\n+ -313,\n+ -261,\n+ -135,\n+ 320,\n+ -71,\n+ -100,\n+ 490,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -136,\n+ -32768,\n+ -287,\n+ -161,\n+ -56,\n+ -281,\n+ -243,\n+ 591,\n+ -274,\n+ 318,\n+ -281,\n+ -184,\n+ -43,\n+ -206,\n+ -7,\n+ 7,\n+ -29,\n+ 308,\n+ -222,\n+ -337,\n+ -100,\n+ -69,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -187,\n+ -32768,\n+ -240,\n+ -339,\n+ -301,\n+ -129,\n+ -391,\n+ -351,\n+ 323,\n+ -269,\n+ 251,\n+ 24,\n+ -362,\n+ 495,\n+ -275,\n+ -318,\n+ -249,\n+ -165,\n+ 48,\n+ -331,\n+ -100,\n+ -238,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768\n+ },\n+ lambda { 267, 10, -3 },\n+ kappa { 486253485452101, 10, -16 },\n+ h { 14, 10, -2 },\n+ scalingFactor 100,\n+ lambdaUngapped { 318588052238909, 10, -15 },\n+ kappaUngapped { 158869858915243, 10, -15 },\n+ hUngapped { 43477934178065, 10, -14 }\n+ }\n+ },\n+ params {\n+ pseudocount 10,\n+ rpsdbparams {\n+ matrixName "BLOSUM62"\n+ }\n+ }\n+}\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/chimera.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/chimera.fasta Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,168 @@\n+>chimera chunks of AB011145 plus M10051 plus BC112106\n+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC\n+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA\n+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC\n+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT\n+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA\n+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA\n+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA\n+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC\n+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA\n+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT\n+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG\n+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA\n+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA\n+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG\n+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA\n+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC\n+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT\n+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA\n+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC\n+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA\n+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG\n+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC\n+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC\n+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA\n+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT\n+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA\n+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT\n+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT\n+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA\n+TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA\n+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA\n+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG\n+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG\n+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT\n+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT\n+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT\n+TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG\n+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA\n+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT\n+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC\n+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT\n+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT\n+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT\n+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT\n+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT\n+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA\n+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA\n+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA\n+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT\n+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT\n+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT\n+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT\n+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA\n+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT\n+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT\n+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT\n+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT\n+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG\n+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC\n+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT\n+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA\n+GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT\n+TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT\n+GAGAGGGAATAATCTGAGC'..b'GGGGACAAGGCATCC\n+TGTGAAAATGAGTTACTTAAATTTTCTTACATTCGGACATCTTTTGACAAGATCTTGCTG\n+AGATGGGAGCCGTACTGGCCCCCCGACTTCCGAGACCTCTTGGGGTTCATGCTGTTCTAC\n+AAAGAGGCCCCTTATCAGAATGTGACGGAGTTCGATGGGCAGGATGCGTGTGGTTCCAAC\n+AGTTGGACGGTGGTAGACATTGACCCACCCCTGAGGTCCAACGACCCCAAATCACAGAAC\n+CACCCAGGGTGGCTGATGCGGGGTCTCAAGCCCTGGACCCAGTATGCCATCTTTGTGAAG\n+ACCCTGGTCACCTTTTCGGATGAACGCCGGACCTATGGGGCCAAGAGTGACATCATTTAT\n+GTCCAGACAGATGCCACCAACCCCTCTGTGCCCCTGGATCCAATCTCAGTGTCTAACTCA\n+TCATCCCAGATTATTCTGAAGTGGAAACCACCCTCCGACCCCAATGGCAACATCACCCAC\n+TACCTGGTTTTCTGGGAGAGGCAGGCGGAAGACAGTGAGCTGTTCGAGCTGGATTATTGC\n+CTCAAAGGGCTGAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT\n+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG\n+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT\n+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC\n+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG\n+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG\n+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC\n+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC\n+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT\n+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG\n+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG\n+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT\n+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC\n+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT\n+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT\n+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT\n+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC\n+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG\n+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG\n+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC\n+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC\n+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA\n+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG\n+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA\n+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC\n+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC\n+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA\n+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT\n+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA\n+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG\n+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT\n+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC\n+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC\n+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG\n+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT\n+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT\n+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG\n+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC\n+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT\n+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC\n+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT\n+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG\n+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC\n+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA\n+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC\n+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC\n+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG\n+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC\n+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG\n+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA\n+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG\n+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT\n+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG\n+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA\n+GCCATCCCACCAG\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/convert2blastmask_four_human_masked.maskinfo-asn1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/convert2blastmask_four_human_masked.maskinfo-asn1 Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,158 @@ +Blast-db-mask-info ::= { + algo-id 0, + algo-program seg, + algo-options "window=12; locut=2.2; hicut=2.5", + masks { + masks { + int { + from 6, + to 18, + id swissprot { + name "ERP44_HUMAN", + accession "Q9BS26", + release "reviewed" + } + }, + packed-int { + { + from 11, + to 46, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 325, + to 332, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 421, + to 496, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 501, + to 516, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 536, + to 558, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 636, + to 648, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 737, + to 762, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 789, + to 806, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 970, + to 983, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 999, + to 1010, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + } + }, + packed-int { + { + from 3, + to 26, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 372, + to 390, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 766, + to 791, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 1312, + to 1324, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + } + }, + int { + from 230, + to 246, + id swissprot { + name "OPSD_HUMAN", + accession "P08100", + release "reviewed" + } + } + }, + more FALSE + } +} |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary |
| b |
| Binary file test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/dustmasker_three_human.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dustmasker_three_human.fasta Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,183 @@\n+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds\n+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG\n+CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC\n+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC\n+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA\n+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC\n+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT\n+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA\n+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA\n+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA\n+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC\n+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA\n+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT\n+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG\n+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA\n+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA\n+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG\n+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA\n+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC\n+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT\n+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA\n+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC\n+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA\n+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG\n+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC\n+TATGTGTatttttattttgaataaacagaaagaaattttgggtttttaatttttttCTCC\n+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTaaaaaaaaaaaaaCCTGCTA\n+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT\n+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA\n+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT\n+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT\n+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA\n+TTGCTTATGAAAATTCCATAGTGGTAtttttttGGATTCTTAATGTGTAACTTAAACATA\n+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA\n+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG\n+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG\n+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT\n+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT\n+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACtt\n+tttttCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG\n+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA\n+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT\n+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC\n+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT\n+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT\n+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT\n+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT\n+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT\n+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA\n+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA\n+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA\n+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT\n+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT\n+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT\n+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT\n+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA\n+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT\n+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT\n+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT\n+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT\n+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG\n+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC\n+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT\n+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTA'..b'GAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT\n+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG\n+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT\n+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC\n+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG\n+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG\n+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC\n+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC\n+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT\n+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG\n+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG\n+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT\n+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC\n+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT\n+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT\n+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT\n+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC\n+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG\n+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG\n+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC\n+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC\n+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA\n+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG\n+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA\n+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC\n+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC\n+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA\n+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT\n+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA\n+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG\n+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT\n+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC\n+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC\n+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG\n+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT\n+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT\n+ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC\n+TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA\n+GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT\n+TTTTTCGTTccccccacccgcccccAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT\n+CttttttttttttttttttttttttttttGCTGGTGTCTGAGCTTCAGTATAAAAGACAA\n+AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA\n+>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds\n+CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC\n+CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG\n+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG\n+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC\n+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT\n+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC\n+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT\n+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG\n+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC\n+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA\n+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC\n+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC\n+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG\n+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC\n+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG\n+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA\n+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG\n+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT\n+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG\n+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA\n+GCCATCCCACCAG\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/dustmasker_three_human.maskinfo-asn1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dustmasker_three_human.maskinfo-asn1 Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,64 @@ +Blast-db-mask-info ::= { + algo-id 2, + algo-program dust, + algo-options "window=64; level=20; linker=1", + masks { + masks { + packed-int { + { + from 1447, + to 1495, + id local id 1 + }, + { + from 1540, + to 1552, + id local id 1 + }, + { + from 1886, + to 1892, + id local id 1 + }, + { + from 2278, + to 2284, + id local id 1 + }, + { + from 4409, + to 4415, + id local id 1 + }, + { + from 4635, + to 4653, + id local id 1 + }, + { + from 4726, + to 4734, + id local id 1 + } + }, + packed-int { + { + from 139, + to 219, + id local id 2 + }, + { + from 4569, + to 4584, + id local id 2 + }, + { + from 4621, + to 4648, + id local id 2 + } + } + }, + more FALSE + } +} |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/dustmasker_three_human.maskinfo-asn1-binary |
| b |
| Binary file test-data/dustmasker_three_human.maskinfo-asn1-binary has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.dbinfo.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.dbinfo.txt Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,7 @@ +Database: Just 4 human proteins + 4 sequences; 3,297 total residues + +Date: Feb 10, 2014 6:40 PM Longest sequence: 1,382 residues + +Volumes: + /mnt/galaxy/galaxy_blast/test-data/four_human_proteins_taxid.fasta |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,48 @@ +>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 +MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFP +NENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSK +RNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK +CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPV +IAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLL +RDRDEL +>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 +MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEGGFSTVFLVRTHGGIRCALKR +MYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFC +DTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG +KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQVSYFAFKFAK +KDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDTIGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLA +PGEFGNHRPKGALRPGNGPEILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ +QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQYQQAFFQQQMLAQHQPSQQQA +SPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSVADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEEL +LDREFDLLRSNRLEERASSDKNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD +QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPENLGHRPLLMDSEDEEEEEKH +SSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSAQLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNL +PQHRFPAAGLEQEEFDVFTKAPFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD +EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARRHKKVGRRDSQSSNEFLTISD +SKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLSWHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKM +DDFGAVPFTELVVQSITPHQSQQSQPVELDPFGAAPFPSKQ +>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 +MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPK +LIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDW +SRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL +GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYT +MNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRS +YALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE +RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVD +IDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIIL +KWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL +KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPFEK +VVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGL +IVLYEVSYRRYGDEELHLCVSRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG +PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSREKITLLRELGQGSFGMVYEG +NARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKGFTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRP +EAENNPGRPPPTLQEMIQMAAEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV +RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDNCPERVTDLMRMCWQFNPKMR +PTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEMEFEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIP +YTHMNGGKKNGRILTLPRSNPS +>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA +VADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFT +WVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES +ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTT +ICCGKNPLGDDEASATVSKTETSQVAPA |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta.log.txt Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,5 @@ +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.phd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta.phd Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,4 @@ +11117184492 +29249033410 +36665887501 +5392473183 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.phi |
| b |
| Binary file test-data/four_human_proteins.fasta.phi has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.phr |
| b |
| Binary file test-data/four_human_proteins.fasta.phr has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.pin |
| b |
| Binary file test-data/four_human_proteins.fasta.pin has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.pog |
| b |
| Binary file test-data/four_human_proteins.fasta.pog has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.psd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta.psd Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,4 @@ +gnl|bl_ord_id|00 +gnl|bl_ord_id|11 +gnl|bl_ord_id|22 +gnl|bl_ord_id|33 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.psi |
| b |
| Binary file test-data/four_human_proteins.fasta.psi has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins.fasta.psq |
| b |
| Binary file test-data/four_human_proteins.fasta.psq has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_masked.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_masked.fasta Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,61 @@ +>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 +MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF +SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK +REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER +VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK +CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD +CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF +HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL +>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 +MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG +GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS +DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD +LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG +KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP +DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT +IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE +Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq +qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy +qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV +ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD +KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD +QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE +NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA +QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK +APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD +EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR +HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS +WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ +SQQSQPVELDPFGAAPFPSKQ +>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 +MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL +QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL +VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE +ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL +GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG +CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC +TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL +EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE +RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ +NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS +DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE +RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL +KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf +pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV +SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV +SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG +PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR +EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG +FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA +AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV +RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN +CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme +fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN +PS +>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY +VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG +GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP +EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes +attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI +YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.log.txt Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,5 @@ +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.phd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.phd Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,4 @@ +11117184492 +29249033410 +36665887501 +5392473183 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.phi |
| b |
| Binary file test-data/four_human_proteins_taxid.fasta.phi has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.phr |
| b |
| Binary file test-data/four_human_proteins_taxid.fasta.phr has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.pin |
| b |
| Binary file test-data/four_human_proteins_taxid.fasta.pin has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.pog |
| b |
| Binary file test-data/four_human_proteins_taxid.fasta.pog has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.psd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.psd Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,4 @@ +gnl|bl_ord_id|00 +gnl|bl_ord_id|11 +gnl|bl_ord_id|22 +gnl|bl_ord_id|33 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.psi |
| b |
| Binary file test-data/four_human_proteins_taxid.fasta.psi has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/four_human_proteins_taxid.fasta.psq |
| b |
| Binary file test-data/four_human_proteins_taxid.fasta.psq has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/rhodopsin_nucs.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rhodopsin_nucs.fasta Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,161 @@\n+>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA\n+ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCCTTCTCCAACAAAACGGGTGTGGTACGCAGCCCCT\n+TCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTCCTGCT\n+CATCGTGCTTGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACGGTCCAGCACAAGAAGCTGCGCACG\n+CCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCTGACCTCTTCATGGTCTTCGGTGGCTTCACCACCA\n+CCCTCTACACCTCTCTGCATGGATACTTTGTCTTTGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGC\n+CACACTGGGCGGTGAAATTGCCCTGTGGTCTTTGGTGGTCCTGGCCATTGAGCGGTACGTGGTGGTGTGT\n+AAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCATGCCATAATGGGCGTCGCTTTCACCTGGGTCATGG\n+CACTGGCCTGCGCTGCACCCCCCCTCGTTGGTTGGTCCAGGTACATCCCTGAAGGCATGCAGTGTTCATG\n+CGGGATCGACTACTACACACTCAAGCCAGAAGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTC\n+CACTTCACCATCCCCATGATCGTCATCTTCTTTTGCTACGGGCAGCTTGTCTTCACAGTCAAGGAGGCGG\n+CAGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCTGAGAAGGAGGTCACTCGCATGGTCATCATCAT\n+GGTCATTGCTTTCCTGATCTGTTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGG\n+TCCAACTTTGGCCCCATCTTCATGACACTCCCGGCGTTCTTCGCAAAGTCCTCCTCCATCTACAACCCTG\n+TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCC\n+ACTGGGTGATGACGAGGCTTCCACAACCGGTTCCAAGACGGAGACCAGCCAGGTGGCACCGGCCTAA\n+\n+>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds\n+TCTTTCTAGTTTGGGGGGGGGGACTTTAAAGAGCCGCCAATATGAACGGAACAGAAGGCCCAAACTTTTA\n+CATACCCATGTCCAACAAGACTGGGGTGGTGCGAAGCCCCTTTGAATACCCTCAGTATTACCTGGCAGAG\n+CCATGGCAATATTCCATTCTGTGCGCGTACATGTTCCTGCTCATTCTACTTGGGTTCCCAATCAACTTCA\n+TGACCTTGTACGTCACCATCCAGCACAAGAAGCTCCGGACACCCTTAAACTATATCCTGCTGAATTTGGC\n+CTTTGCCAACCACTTCATGGTCCTGTGTGGATTCACGGTGACAATGTACTCCTCAATGAACGGATACTTC\n+ATCCTCGGAGCCACCGGTTGCTATGTTGAAGGCTTCTTCGCTACCCTTGGTGGTGAAATCGCCCTTTGGT\n+CCCTGGTGGTCTTGGCCATTGAACGATACGTGGTCGTCTGTAAGCCCATGAGCAACTTCCGATTTAGTGA\n+GAACCATGCCGTCATGGGCGTAGCGTTCACCTGGATAATGGCTTTGTCCTGTGCTGTTCCTCCACTCCTT\n+GGATGGTCCAGGTACATCCCCGAGGGCATGCAGTGCTCCTGCGGAGTCGACTACTACACCCTGAAGCCCG\n+AGGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTCGTCCACTTCACCATCCCCCTGATTATCATTTT\n+CTTCTGCTATGGCCGCCTGGTGTGCACTGTGAAAGAGGCTGCAGCTCAACAGCAAGAGTCCGCCACCACC\n+CAGAAGGCCGAGAAAGAGGTGACCAGGATGGTGATCATCATGGTGGTCTTCTTCCTTATCTGTTGGGTCC\n+CCTACGCCTCTGTCGCTTTCTTCATCTTCAGCAATCAGGGCTCTGAGTTCGGCCCCATCTTCATGACCGT\n+CCCAGCTTTCTTTGCCAAGAGTTCTTCCATCTACAACCCCGTCATCTACATCATGCTCAACAAGCAGTTC\n+CGTAACTGCATGATCACCACCCTGTGCTGCGGCAAGAATCCCTTTGGAGAAGACGATGCCTCCTCTGCCG\n+CCACCTCCAAGACAGAGGCTTCTTCTGTTTCTTCCAGCCAGGTGTCTCCTGCATAAGACCTTCCACCAGG\n+CCTGTCTCAGGGTCCGCTGCCTCACACAGCTCCCACCGCCCCAACTCCGTCTCCTGCTCGCTAAGGCGGC\n+GAAGTTCCCCTTCCATTACATAAAACGTATCTGTTCAAGAAAGGCGACGACGAAGGAGAAGAAGAGGAGC\n+CCCCCCGAACCCCTTCGCTGCTGCTGAAAACGACTTGATTGCTTCTGCAACGCAACGGGGCCTTACGGCA\n+GCGAAGGGGTTGTCATCCGGACGCGCCAAGAATTCCTTCGAGACTGTAAATATCTTAAAGGAACCGTCCT\n+GCTAGTTACCGACGCCGCTCCTGTAGCCGCCGTTCCCCCGCACTCCGGCCGGTTCATACCTCTTATTTTT\n+TTGCAATGCAACAGAAAATAATATTTTTGTTCCCACGGCTTTTCCCGGTCAGGTCTGGTAGTGGCGGAGA\n+TTGGCCGACCCCTCGCACCTGTAATAAAGCGCAG\n+\n+>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds\n+GTGCCCTTCTCCAACAAGACAGGCGTGGTGCGCAGTCCCTTCGAGCATCCACAGTACTACCTGGCCGAGC\n+CATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTCGGCTTCCCCATCAACTTCCT\n+CACGCTCTATGTCACGGTTCAGCACAAGAAGCTGCGTACGCCTCTCAACTACATCCTGCTCAACCTGGCC\n+GTGGCCGACCTCTTCATGGTCTTCGGAGGCTTCACCACCACCCTCTACACCTCCCTGCATGGATACTTTG\n+TCTTCGGGCCTACGGGATGCAATCTGGAGGGCTTTTTTGCCACCCTGGGAGGTATGAGCTGAGATGCGGG\n+TAAGGAGGAGGCATAGAGGCATCTGGGAACAGTCCCAAGCTTGGGGTGAAGGCTAAGAGGCCTTCTTCCT\n+TGTTCTGTCATTGGCGTCGTCCGAAGCCCTCACTTAATCAACAAACAGTTTGGTGGTGAGGCGCTGAGCT\n+CCATTTGGAGAGGGCAGGTATCGAGCACTGTTTTATCCCCCCTGGAGTGGTGCCATTGCCTTGCTTTACA\n+GCAAAGAAACTGAGGATGAGAGGAGTCGAGGGTCTTGCCAGGTCACATCATGGCAGAGACAGAGCTGAGT\n+TTCAACCCTGCATCTATGTGCAGTTTCCCTTGGAGCAGCTATGTTAGGTCAGACCCACGGTGGGCACTGG\n+GGAGAGAGCTGCACAAGACAGGTCCCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTCCTGATTGCCA\n+GGAGTGATGTGCAGCGCAAATGTCTGAATTCCATTATTATGTGCTCCTTCTTCCTCTGAGCCAAACATCC\n+ATCTTCATGGCTCCTAGAATTGGGTCCCACCCACATGAGCAGGTCATTTTGTTTCCCTAGAGGGGAGAGG\n+TCACT'..b'CTTCAGAGGGTCAGATTTGGGATGAGAGTGGAGGCTGCGAGGGCCTGAGTG\n+GGAAGGGATTGGAGGCAAATCTCACCAACCATGTCAGTTTGCTACACACACTTTGGGTGGACCCTGACCC\n+TGACTCATGCTTCTTGCCTTCCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCCAC\n+TGGGTGACGATGAGGCCTCCACCACTGCCTC\n+\n+>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds\n+GTGCCCTTCTCCAACAAGACGGGTGTGGTGCGCAGCCCCTTCGAGTACCCGCAGTACTACCTGGCTGAGC\n+CCTGGCAGTTCTCCATGCTGGCTGCCTACATGTTTCTGCTGATCGTGCTCGGATTCCCCATCAACTTCCT\n+CACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCT\n+GTGGCCAACCTCTTCATGGTCTTTGGAGGCTTCACCACCACCCTGTATACCTCTATGCATGGATACTTCG\n+TCTTCGGGGCCACGGGATGCAATCTGGAGGGCTTCTTTGCCACGCTGGGCGGTGAAATCGCCCTGTGGTC\n+CCTGGTGGTCCTGGCCATCGAGCGGTATGTGGTGGTCTGCAAGCCCATGAGCAACTTCCGCTTTGGGGAG\n+AACCACGCCATCATGGGCCTCGCCTTCACGTGGGTCATGGCACTGGCCTGCGCTGCACCCCCACTAGCCG\n+GCTGGTCCAGGTACATCCCAGAGGGCATGCAGTGCTCGTGTGGGATTGACTACTACACGCTCAAACCGGA\n+GGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTGTCATTTTC\n+TTCTGCTACGGACAGCTGGTGTTCACAGTGAAGGAGGCGGCTGCCCAGCAGCAGGAGTCAGCCACCACCC\n+AGAAGGCCGAGAAGGAAGTCACGCGCATGGTCATCATCATGGTCGTTGCGTTCCTAATCTGTTGGCTGCC\n+CTACGCCAGCGTGGCATTCTACATCTTTACCCACCAGGGCTCTAACTTTGGCCCTGTCTTCATGACCATC\n+CCGGCATTCTTCGCCAAGTCATCCTCCATCTACAACCCGGTCATCTATATCATGATGAACAAGCAGTTCC\n+GGAACTGCATGCTCACCACCCTCTGCTGTGGCAAGAACCCACTGGGTGATGACGAAGCATCCACCACTGC\n+CTC\n+\n+>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds\n+ATGAACGGGACCGAGGGCCCAAACTTCTACGTGCCTTTCTCCAACAAGACGGGCGTCGTACGCAGCCCCT\n+TCGAGGCGCCGCAGTACTACCTGGCTGAGCCATGGCAGTTCAGCATGCTGGCCGCCTACATGTTCCTGCT\n+GATCATGCTTGGCTTCCCCATCAACTTCCTCACGCTGTACGTCACAGTCCAGCACAAGAAGCTGAGGACC\n+CCCCTCAACTACATCCTGCTCAACCTGGCCGTGGCAGATCTCTTCATGGTGTTCGGGGGCTTCACCACCA\n+CCCTGTATACCTCTCTGCACGGGTACTTCGTGTTCGGTCCGACGGGCTGCAACCTCGAGGGCTTCTTTGC\n+CACCTTAGGCGGTGAAATTGCACTGTGGTCCTTGGTGGTGCTAGCCATCGAGCGGTACGTAGTGGTGTGC\n+AAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCGTCGCATTCACCTGGGTCATGG\n+CTCTGGCCTGTGCGGCCCCCCCCCTCGTCGGCTGGTCTAGATACATCCCGGAGGGGATGCAGTGCTCGTG\n+CGGGATCGATTACTACACGCCCCACGAGGAGACCAACAATGAGTCGTTCGTCATCTACATGTTCGTTGTA\n+CACTTCATCATCCCCCTGATTGTCATATTCTTCTGCTACGGGCAGCTGGTCTTCACCGTCAAGGAGGCTG\n+CAGCCCAGCAGCAGGAGTCGGCCACCACTCAGAAGGCCGAGAAGGAGGTCACGCGTATGGTCATCATCAT\n+GGTCATCGCTTTCCTCATATGCTGGCTGCCCTACGCAGGTGTGGCGTTCTACATCTTCACCCATCAGGGA\n+TCCGACTTTGGCCCCATCTTCATGACCATCCCGGCTTTCTTTGCCAAGACGTCTGCCGTCTATAACCCCG\n+TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGGTCACCACTCTCTGCTGTGGCAAGAACCC\n+CCTAGGTGACGACGAGGCCTCCACGACCGTGTCCAAGACAGAGACCAGCCAAGTGGCCCCTGCCTAA\n+\n+>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds\n+CCGCTACTGACGAACCGCAACCATGAACGGCACTGAGGGACCTAACTTCTACATCCCCATGTCAAACGCC\n+ACTGGTGTAGTGAGGAGTCCATTTGAATACCCGCAGTACTACCTTGCAGAACCATGGGCTTTCTCAGCTC\n+TGTCTGCCTACATGTTCTTCCTGATTATCGCCGGATTCCCCATCAACTTCCTCACCCTGTATGTCACCAT\n+CGAACATAAGAAACTGAGGACCCCACTGAACTACATTCTGCTGAACCTGGCCGTGGCCGACCTCTTCATG\n+GTGTTTGGCGGATTCACCACCACGATGTACACCTCCATGCACGGCTACTTTGTCTTCGGCCCCACCGGCT\n+GCAACATCGAAGGGTTCTTCGCCACCCTCGGCGGCGAGATTGCCCTCTGGTGCCTCGTTGTCCTGGCCAT\n+TGAAAGGTGGATGGTCGTCTGCAAGCCAGTGACCAATTTCCGCTTCGGTGAGAGCCATGCCATCATGGGT\n+GTCATGGTGACCTGGACCATGGCATTGGCCTGTGCCCTCCCCCCTCTCTTCGGCTGGTCTCGGTACATTC\n+CGGAAGGTCTGCAGTGCTCGTGCGGGATCGACTACTATACCCGGGCGCCTGGGATCAACAATGAGTCCTT\n+TGTGATCTACATGTTTACCTGCCACTTCTCCATCCCACTCGCCGTCATCTCTTTCTGCTACGGCCGACTG\n+GTGTGCACCGTCAAAGAGGCCGCTGCCCAGCAACAGGAGTCCGAGACCACCCAGAGGGCTGAGCGGGAGG\n+TCACCCGCATGGTCGTCATCATGGTCATCTCCTTCCTGGTCTGCTGGGTGCCCTATGCCAGTGTGGCCTG\n+GTACATCTTTACCCACCAGGGAAGCACTTTTGGGCCCATCTTCATGACCATTCCATCCTTCTTTGCCAAG\n+AGTTCAGCCCTCTACAACCCCATGATCTACATCTGCATGAACAAGCAGTTCCGCCATTGCATGATCACCA\n+CCCTCTGCTGTGGGAAGAACCCCTTCGAGGAGGAGGATGGAGCGTCCGCCACTAGCTCTAAAACTGAGGC\n+TTCATCCGTGTCCTCCAGCTCTGTCTCCCCGGCATAAACCTTGTTTGACCGAACACCACGCATCAACACA\n+AAGACCAAGAATGCTGACTAAATGCTAACATTTCAGGGAAATCCAAAGACTTTTTACTATTTTTTTACAC\n+AACCATATAGGTTGCAAACAGAGGTTTAGCCCTGTTTACAGGTTGTCATCAATGTGATGTCAGTATGTAC\n+AATATAGTCAACTTGATAGCAAGTTGTTGGCTTATTTCAGATTGTATGGGCAATGTAATCAACCATATGT\n+GAAATAAATTGCAA\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/rhodopsin_proteins.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rhodopsin_proteins.fasta Fri Jan 30 08:27:28 2015 -0500 |
| [ |
| @@ -0,0 +1,43 @@ +>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] +MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT +PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC +KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVV +HFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQG +SNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA + +>gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin +MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRT +PLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVC +KPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVV +HFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQG +SEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQ +VSPA + +>gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] +VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA +VADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE +NHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF +FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTL +PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS + +>gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus] +VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA +VANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE +NHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF +FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTI +PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS + +>gi|223523|prf||0811197A rhodopsin [Bos taurus] +MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRT +PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC +KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVH +FIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGS +DFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA + +>gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster] +MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRT +PLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVC +KPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTC +HFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQG +STFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSS +VSPA |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/segmasker_four_human.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/segmasker_four_human.fasta Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,61 @@ +>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 +MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF +SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK +REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER +VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK +CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD +CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF +HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL +>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 +MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG +GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS +DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD +LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG +KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP +DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT +IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE +Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq +qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy +qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV +ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD +KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD +QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE +NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA +QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK +APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD +EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR +HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS +WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ +SQQSQPVELDPFGAAPFPSKQ +>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 +MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL +QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL +VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE +ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL +GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG +CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC +TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL +EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE +RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ +NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS +DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE +RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL +KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf +pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV +SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV +SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG +PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR +EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG +FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA +AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV +RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN +CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme +fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN +PS +>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY +VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG +GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP +EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes +attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI +YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/segmasker_four_human.maskinfo-asn1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/segmasker_four_human.maskinfo-asn1 Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,114 @@ +Blast-db-mask-info ::= { + algo-id 1, + algo-program seg, + algo-options "window=12; locut=2.2; hicut=2.5", + masks { + masks { + int { + from 6, + to 18, + id local id 1 + }, + packed-int { + { + from 11, + to 46, + id local id 2 + }, + { + from 325, + to 332, + id local id 2 + }, + { + from 421, + to 443, + id local id 2 + }, + { + from 437, + to 450, + id local id 2 + }, + { + from 447, + to 496, + id local id 2 + }, + { + from 501, + to 516, + id local id 2 + }, + { + from 536, + to 554, + id local id 2 + }, + { + from 545, + to 558, + id local id 2 + }, + { + from 636, + to 648, + id local id 2 + }, + { + from 737, + to 762, + id local id 2 + }, + { + from 789, + to 806, + id local id 2 + }, + { + from 970, + to 983, + id local id 2 + }, + { + from 999, + to 1010, + id local id 2 + } + }, + packed-int { + { + from 3, + to 26, + id local id 3 + }, + { + from 372, + to 390, + id local id 3 + }, + { + from 766, + to 782, + id local id 3 + }, + { + from 780, + to 791, + id local id 3 + }, + { + from 1312, + to 1324, + id local id 3 + } + }, + int { + from 230, + to 246, + id local id 4 + } + }, + more FALSE + } +} |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/segmasker_four_human.maskinfo-asn1-binary |
| b |
| Binary file test-data/segmasker_four_human.maskinfo-asn1-binary has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/tblastn_four_human_vs_rhodopsin.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastn_four_human_vs_rhodopsin.html Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,787 @@\n+<HTML>\n+<TITLE>BLAST Search Results</TITLE>\n+<BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">\n+<PRE>\n+\n+<b>TBLASTN 2.2.30+</b>\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA\n+\n+Length=1047\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda K H a alpha\n+ 0.347 0.182 0.684 0.522 1.92 \n+\n+Gapped\n+Lambda K H a alpha sigma\n+ 0.299 0.0710 0.270 1.10 13.8 14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete\n+cds\n+\n+Length=1574\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda K H a alpha\n+ 0.347 0.182 0.684 0.522 1.92 \n+\n+Gapped\n+Lambda K H a alpha sigma\n+ 0.299 0.0710 0.270 1.10 13.8 14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434\n+rhodopsin (RHO) gene, exons 1 through 5 and partial cds\n+\n+Length=4301\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda K H a alpha\n+ 0.347 0.182 0.684 0.522 1.92 \n+\n+Gapped\n+Lambda K H a alpha sigma\n+ 0.299 0.0710 0.270 1.10 13.8 14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin\n+(RHO) mRNA, partial cds\n+\n+Length=983\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda K H a alpha\n+ 0.347 0.182 0.684 0.522 1.92 \n+\n+Gapped\n+Lambda K H a alpha sigma\n+ 0.299 0.0710 0.270 1.10 13.8 14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for\n+rhodopsin, complete cds\n+\n+Length=1047\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda K H a alpha\n+ 0.347 0.182 0.684 0.522 1.92 \n+\n+Gapped\n+Lambda K H a alpha sigma\n+ 0.299 0.0710 0.270 1.10 13.8 14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n+OS=Homo sapiens GN=ERP44 PE=1 SV=1\n+\n+Length=406\n+\n+<b>Subject=</b> gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh\n+water form rod opsin, complete cds\n+\n+Length=1344\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda K H a alpha\n+ 0.347 0.182 0.684 0.522 1.92 \n+\n+Gapped\n+Lambda K H a alpha sigma\n+ 0.299 0.0710 0.270 1.10 13.8 14.5 \n+\n+Effective search space used: 127710\n+\n+\n+<b>Query=</b> sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens\n+GN=BMP2K PE=1 SV=2\n+\n+Length=1161\n+\n+<b>Subject=</b> gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA\n+\n+Length=1047\n+\n+\n+***** No hits found *****\n+\n+\n+\n+Lambda K H a alpha\n+ 0.334 0.170 0.615 0.522 1.92 \n+\n+Gapped\n+Lambda K H a alpha sigma\n+ 0.299 0.0710 0.270 1.10 13.8 14.5 \n+\n+Effective search space used: 370988\n+\n+\n+<b>Query'..b' 711 bits (1640), Expect = 0.0, Method: Compositional matrix adjust.\n+ Identities = 325/348 (93%), Positives = 337/348 (97%), Gaps = 0/348 (0%)\n+ Frame = +1\n+\n+Query 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY 60\n+ MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLY\n+Sbjct 1 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY 180\n+\n+Query 61 VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG 120\n+ VTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLG\n+Sbjct 181 VTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLG 360\n+\n+Query 121 GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP 180\n+ GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIP\n+Sbjct 361 GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIP 540\n+\n+Query 181 EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES 240\n+ EG+QCSCGIDYYT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQES\n+Sbjct 541 EGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQES 720\n+\n+Query 241 ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI 300\n+ ATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+\n+Sbjct 721 ATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAV 900\n+\n+Query 301 YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 348\n+ YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA\n+Sbjct 901 YNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 1044\n+\n+\n+\n+Lambda K H a alpha\n+ 0.351 0.182 0.707 0.522 1.92 \n+\n+Gapped\n+Lambda K H a alpha sigma\n+ 0.299 0.0710 0.270 1.10 13.8 14.5 \n+\n+Effective search space used: 109230\n+\n+\n+<b>Query=</b> sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1\n+\n+Length=348\n+\n+<b>Subject=</b> gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh\n+water form rod opsin, complete cds\n+\n+Length=1344\n+\n+\n+ Score = 626 bits (1444), Expect = 0.0, Method: Compositional matrix adjust.\n+ Identities = 281/342 (82%), Positives = 311/342 (91%), Gaps = 1/342 (0%)\n+ Frame = +2\n+\n+Query 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY 60\n+ MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLY\n+Sbjct 23 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLY 202\n+\n+Query 61 VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG 120\n+ VT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLG\n+Sbjct 203 VTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLG 382\n+\n+Query 121 GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP 180\n+ GEIALW LVVLAIER++VVCKP++NFRFGE HAIMGV TW MALACA PPL GWSRYIP\n+Sbjct 383 GEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIP 562\n+\n+Query 181 EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES 240\n+ EGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES\n+Sbjct 563 EGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQES 742\n+\n+Query 241 ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI 300\n+ TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA YIFTHQGS FGPIFMTIP+FFAKS+A+\n+Sbjct 743 ETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSAL 922\n+\n+Query 301 YNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE 341\n+ YNP+IYI MNKQFR CM+TT+CCGKNP +D ASAT SKTE\n+Sbjct 923 YNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 1048\n+\n+\n+\n+Lambda K H a alpha\n+ 0.351 0.182 0.707 0.522 1.92 \n+\n+Gapped\n+Lambda K H a alpha sigma\n+ 0.299 0.0710 0.270 1.10 13.8 14.5 \n+\n+Effective search space used: 109230\n+\n+\n+\n+\n+Matrix: BLOSUM80\n+Gap Penalties: Existence: 10, Extension: 1\n+Neighboring words threshold: 14\n+Window for multiple hits: 25\n+</PRE>\n+</BODY>\n+</HTML>\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/tblastn_four_human_vs_rhodopsin.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastn_four_human_vs_rhodopsin.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,10 @@ +sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732 +sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7 +sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 +sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 +sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/tblastn_four_human_vs_rhodopsin.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastn_four_human_vs_rhodopsin.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,741 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>tblastn</BlastOutput_program>\n+ <BlastOutput_version>TBLASTN 2.2.30+</BlastOutput_version>\n+ <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+ <BlastOutput_db></BlastOutput_db>\n+ <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+ <BlastOutput_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</BlastOutput_query-def>\n+ <BlastOutput_query-len>406</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_matrix>BLOSUM80</Parameters_matrix>\n+ <Parameters_expect>1e-10</Parameters_expect>\n+ <Parameters_gap-open>10</Parameters_gap-open>\n+ <Parameters_gap-extend>1</Parameters_gap-extend>\n+ <Parameters_filter>F</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>19</Statistics_hsp-len>\n+ <Statistics_eff-space>127710</Statistics_eff-space>\n+ <Statistics_kappa>0.071</Statistics_kappa>\n+ <Statistics_lambda>0.299</Statistics_lambda>\n+ <Statistics_entropy>0.27</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>2</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>19</Statistics_hsp-len>\n+ <Statistics_eff-space>127710</Statistics_eff-space>\n+ <Statistics_kappa>0.071</Statistics_kappa>\n+ <Statistics_lambda>0.299</Statistics_lambda>\n+ <Statistics_entropy>0.27</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>3</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>406</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>19</Statistics_hsp-len>\n+ <Statistics_eff-space>127710</Statistics_eff-space>\n+ <Statistics_kappa>0.071</Statistics_kappa>\n+ <Statistics_lambda>0.299</Statistics_lambda>\n+ <Statistics_entropy>0.27</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+ <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>4</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmi'..b'YYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>18</Statistics_hsp-len>\n+ <Statistics_eff-space>109230</Statistics_eff-space>\n+ <Statistics_kappa>0.071</Statistics_kappa>\n+ <Statistics_lambda>0.299</Statistics_lambda>\n+ <Statistics_entropy>0.27</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+</Iteration>\n+<Iteration>\n+ <Iteration_iter-num>24</Iteration_iter-num>\n+ <Iteration_query-ID>Query_4</Iteration_query-ID>\n+ <Iteration_query-def>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>\n+ <Iteration_query-len>348</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>Subject_6</Hit_id>\n+ <Hit_def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Hit_def>\n+ <Hit_accession>Subject_6</Hit_accession>\n+ <Hit_len>1344</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>626.708</Hsp_bit-score>\n+ <Hsp_score>1444</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>341</Hsp_query-to>\n+ <Hsp_hit-from>23</Hsp_hit-from>\n+ <Hsp_hit-to>1048</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>2</Hsp_hit-frame>\n+ <Hsp_identity>281</Hsp_identity>\n+ <Hsp_positive>311</Hsp_positive>\n+ <Hsp_gaps>1</Hsp_gaps>\n+ <Hsp_align-len>342</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR CM+TT+CCGKNP +D ASAT SKTE</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>0</Statistics_db-num>\n+ <Statistics_db-len>0</Statistics_db-len>\n+ <Statistics_hsp-len>18</Statistics_hsp-len>\n+ <Statistics_eff-space>109230</Statistics_eff-space>\n+ <Statistics_kappa>0.071</Statistics_kappa>\n+ <Statistics_lambda>0.299</Statistics_lambda>\n+ <Statistics_entropy>0.27</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/tblastn_four_human_vs_rhodopsin_ext.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,10 @@ +sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732 gi|57163782|ref|NM_001009242.1| 1689 336 343 0 98.56 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 1047 N/A +sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 gi|2734705|gb|U59921.1|BBU59921 1489 290 320 1 93.57 0 3 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 1574 N/A +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 gi|283855845|gb|GQ290303.1| 342 69 73 0 98.65 0 3 ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ 348 4301 N/A +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 gi|283855845|gb|GQ290303.1| 284 54 57 0 96.61 0 2 RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS 348 4301 N/A +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229 gi|283855845|gb|GQ290303.1| 523 107 109 0 98.20 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG 348 4301 N/A +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 gi|283855845|gb|GQ290303.1| 276 55 56 0 94.92 0 3 LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR 348 4301 N/A +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7 gi|283855845|gb|GQ290303.1| 125 23 24 0 92.31 0 1 QFRNCMLTTICCGKNPLGDDEASATV QFRNCMLTTLCCGKNPLGDDEASTTA 348 4301 N/A +sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 gi|283855822|gb|GQ290312.1| 1517 310 322 0 98.77 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT 348 983 N/A +sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 gi|18148870|dbj|AB062417.1| 1640 325 337 0 96.84 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 1047 N/A +sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626 gi|12583664|dbj|AB043817.1| 1444 281 311 1 90.94 0 2 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 1344 N/A |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/tblastx_rhodopsin_vs_three_human.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastx_rhodopsin_vs_three_human.tabular Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,117 @@\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t97.39\t230\t6\t0\t1\t690\t88\t777\t0.0\t 559\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.12\t102\t6\t0\t742\t1047\t829\t1134\t0.0\t 236\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t91.22\t148\t13\t0\t1046\t603\t1133\t690\t0.0\t 308\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.32\t88\t5\t0\t566\t303\t653\t390\t0.0\t 207\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t96.34\t82\t3\t0\t248\t3\t335\t90\t0.0\t 182\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t83.33\t204\t34\t0\t18\t629\t105\t716\t4e-158\t 404\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t75.28\t89\t22\t0\t780\t1046\t867\t1133\t4e-158\t 161\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t81.28\t203\t38\t0\t609\t1\t696\t88\t5e-153\t 360\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t80.60\t67\t13\t0\t916\t716\t1003\t803\t5e-153\t 135\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t70.27\t37\t11\t0\t1047\t937\t1134\t1024\t5e-153\t64.2\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t100.00\t7\t0\t0\t646\t626\t733\t713\t5e-153\t24.0\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t89.23\t65\t7\t0\t460\t266\t547\t353\t4e-105\t 167\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t89.58\t48\t5\t0\t184\t41\t271\t128\t4e-105\t 104\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t77.78\t45\t10\t0\t882\t748\t969\t835\t4e-105\t93.9\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t67.86\t28\t9\t0\t1045\t962\t1132\t1049\t4e-105\t51.9\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t59.09\t22\t9\t0\t586\t521\t673\t608\t4e-105\t33.1\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t81.40\t86\t16\t0\t296\t553\t383\t640\t2e-87\t 185\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t84.38\t32\t5\t0\t11\t106\t98\t193\t2e-87\t74.8\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t71.43\t35\t10\t0\t941\t1045\t1028\t1132\t2e-87\t61.6\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.44\t18\t1\t0\t794\t847\t881\t934\t2e-87\t50.1\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t83.61\t238\t39\t0\t18\t731\t64\t777\t0.0\t 507\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t82.35\t85\t15\t0\t783\t1037\t829\t1083\t0.0\t 188\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t70.96\t303\t88\t0\t925\t17\t971\t63\t2e-130\t 435\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t72.22\t18\t5\t0\t1027\t974\t1073\t1020\t2e-130\t35.0\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t55.32\t188\t84\t0\t605\t42\t651\t88\t7e-89\t 245\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t61.11\t72\t28\t0\t1037\t822\t1083\t868\t7e-89\t91.3\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t49.02\t204\t104\t0\t29\t640\t75\t686\t4e-78\t 197\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t66.04\t53\t18\t0\t860\t1018\t906\t1064\t4e-78\t85.8\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t44.44\t27\t15\t0\t689\t769\t735\t815\t4e-78\t32.2\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t47.47\t198\t104\t0\t633\t40\t679\t86\t4e-65\t 177\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t68.09\t47\t15\t0\t1017\t877\t1063\t923\t4e-65\t80.3\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t57.89\t114\t48\t0\t265\t606\t311\t652\t3e-46\t 137\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t46.30\t54\t29\t0\t19\t180\t65\t226\t3e-46\t52.4\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t96.40\t111\t4\t0\t1\t333\t118\t450\t0.0\t 264\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t92.31\t65\t5\t0\t3174\t3368\t829\t1023\t0.0\t 151\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t96.43\t56\t2\t0\t2855\t3022\t616\t783\t0.0\t 141\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t93.22\t59\t4\t0\t1404\t1580\t442\t618\t0.0\t 138\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t92.00\t25\t2\t0\t4222\t4296\t1021\t1095\t0.0\t64.3\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t88.89\t9\t1\t0\t3128\t3154\t783\t809\t0.0\t22.6\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t78.38\t111\t24\t0\t333\t1\t450\t118\t7e-171\t 212\n+gi|283855845|gb|GQ290303.1|\tENA|BC112106|BC112106.1\t88.75\t80\t9\t0\t3367'..b'319\t72\t0\t3\t959\t120\t1076\t4e-174\t 593\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t79.07\t129\t27\t0\t558\t172\t675\t289\t2e-133\t 248\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t76.83\t82\t19\t0\t963\t718\t1080\t835\t2e-133\t 159\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t84.09\t44\t7\t0\t133\t2\t250\t119\t2e-133\t97.3\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t73.08\t78\t21\t0\t433\t200\t550\t317\t6e-102\t 145\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t70.15\t67\t20\t0\t799\t599\t916\t716\t6e-102\t 106\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t80.49\t41\t8\t0\t123\t1\t240\t118\t6e-102\t84.5\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t77.78\t27\t6\t0\t553\t473\t670\t590\t6e-102\t51.9\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t71.43\t14\t4\t0\t889\t848\t1006\t965\t6e-102\t32.7\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t81.82\t11\t2\t0\t958\t926\t1075\t1043\t6e-102\t28.6\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t84.85\t33\t5\t0\t239\t337\t356\t454\t4e-48\t72.5\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t86.67\t30\t4\t0\t2\t91\t119\t208\t4e-48\t71.2\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t81.40\t43\t8\t0\t404\t532\t521\t649\t4e-48\t47.3\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t88.89\t18\t2\t0\t764\t817\t881\t934\t4e-48\t44.6\n+gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t87.50\t8\t1\t0\t935\t958\t1052\t1075\t4e-48\t21.7\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t93.91\t230\t14\t0\t1\t690\t88\t777\t0.0\t 538\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t91.18\t102\t9\t0\t742\t1047\t829\t1134\t0.0\t 233\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t88.83\t188\t21\t0\t566\t3\t653\t90\t0.0\t 394\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t84.06\t138\t22\t0\t1046\t633\t1133\t720\t0.0\t 260\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t67.11\t228\t75\t0\t684\t1\t771\t88\t7e-132\t 333\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t67.27\t110\t36\t0\t1045\t716\t1132\t803\t7e-132\t 141\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t70.20\t151\t45\t0\t3\t455\t90\t542\t1e-128\t 236\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t64.04\t89\t32\t0\t780\t1046\t867\t1133\t1e-128\t 136\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.22\t74\t25\t0\t510\t731\t597\t818\t1e-128\t 111\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.04\t106\t36\t0\t242\t559\t329\t646\t2e-58\t 161\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t85.71\t21\t3\t0\t92\t154\t179\t241\t2e-58\t53.8\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t73.68\t19\t5\t0\t791\t847\t878\t934\t2e-58\t39.1\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t61.29\t62\t24\t0\t424\t239\t511\t326\t4e-55\t81.3\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t74.36\t39\t10\t0\t133\t17\t220\t104\t4e-55\t69.8\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t65.71\t35\t12\t0\t882\t778\t969\t865\t4e-55\t56.3\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t58.14\t43\t18\t0\t649\t521\t736\t608\t4e-55\t50.6\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.67\t12\t4\t0\t972\t937\t1059\t1024\t4e-55\t23.9\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t82.13\t235\t42\t0\t11\t715\t76\t780\t0.0\t 498\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t78.31\t83\t18\t0\t770\t1018\t835\t1083\t0.0\t 177\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t72.29\t332\t92\t0\t1017\t22\t1082\t87\t1e-150\t 516\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t48.30\t147\t76\t0\t712\t272\t777\t337\t2e-98\t 169\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t54.17\t72\t33\t0\t1030\t815\t1095\t880\t2e-98\t 103\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t47.83\t69\t36\t0\t220\t14\t285\t79\t2e-98\t83.5\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t72.00\t25\t7\t0\t782\t708\t847\t773\t2e-98\t45.1\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t56.00\t75\t33\t0\t532\t756\t597\t821\t5e-65\t87.7\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t39.42\t104\t63\t0\t19\t330\t84\t395\t5e-65\t86.8\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t56.90\t58\t25\t0\t829\t1002\t894\t1067\t5e-65\t81.3\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t50.00\t30\t15\t0\t388\t477\t453\t542\t5e-65\t33.6\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.dbinfo.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/three_human_mRNA.dbinfo.txt Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,7 @@ +Database: Just 3 human mRNA sequences + 3 sequences; 10,732 total bases + +Date: Dec 26, 2014 5:54 AM Longest sequence: 4,796 bases + +Volumes: + /mnt/galaxy/galaxy_blast/test-data/three_human_mRNA.fasta |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/three_human_mRNA.fasta Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,183 @@\n+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.\n+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG\n+CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC\n+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC\n+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA\n+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC\n+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT\n+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA\n+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA\n+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA\n+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC\n+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA\n+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT\n+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG\n+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA\n+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA\n+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG\n+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA\n+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC\n+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT\n+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA\n+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC\n+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA\n+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG\n+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC\n+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC\n+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA\n+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT\n+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA\n+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT\n+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT\n+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA\n+TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA\n+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA\n+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG\n+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG\n+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT\n+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT\n+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT\n+TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG\n+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA\n+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT\n+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC\n+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT\n+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT\n+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT\n+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT\n+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT\n+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA\n+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA\n+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA\n+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT\n+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT\n+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT\n+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT\n+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA\n+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT\n+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT\n+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT\n+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT\n+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG\n+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC\n+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT\n+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGT'..b'AAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT\n+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG\n+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT\n+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC\n+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG\n+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG\n+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC\n+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC\n+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT\n+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG\n+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG\n+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT\n+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC\n+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT\n+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT\n+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT\n+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC\n+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG\n+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG\n+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC\n+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC\n+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA\n+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG\n+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA\n+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC\n+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC\n+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA\n+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT\n+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA\n+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG\n+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT\n+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC\n+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC\n+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG\n+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT\n+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT\n+ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC\n+TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA\n+GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT\n+TTTTTCGTTCCCCCCACCCGCCCCCAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT\n+CTTTTTTTTTTTTTTTTTTTTTTTTTTTTGCTGGTGTCTGAGCTTCAGTATAAAAGACAA\n+AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA\n+>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.\n+CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC\n+CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG\n+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG\n+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC\n+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT\n+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC\n+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT\n+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG\n+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC\n+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA\n+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC\n+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC\n+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG\n+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC\n+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG\n+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA\n+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG\n+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT\n+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG\n+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA\n+GCCATCCCACCAG\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/three_human_mRNA.fasta.log.txt Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,5 @@ +New DB title: Just 3 human mRNA sequences +Sequence type: Nucleotide +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nhd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/three_human_mRNA.fasta.nhd Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,3 @@ +12956943350 +13082197871 +19180330422 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nhi |
| b |
| Binary file test-data/three_human_mRNA.fasta.nhi has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nhr |
| b |
| Binary file test-data/three_human_mRNA.fasta.nhr has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nin |
| b |
| Binary file test-data/three_human_mRNA.fasta.nin has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nog |
| b |
| Binary file test-data/three_human_mRNA.fasta.nog has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nsd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/three_human_mRNA.fasta.nsd Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,3 @@ +gnl|bl_ord_id|00 +gnl|bl_ord_id|11 +gnl|bl_ord_id|22 |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nsi |
| b |
| Binary file test-data/three_human_mRNA.fasta.nsi has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/three_human_mRNA.fasta.nsq |
| b |
| Binary file test-data/three_human_mRNA.fasta.nsq has changed |
| b |
| diff -r 000000000000 -r 432ea9614cc9 test-data/tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tool_data_table_conf.xml.test Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,15 @@ +<tables> + <!-- test files! --> + <table name="blastdb" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/blastdb.loc" /> + </table> + <table name="blastdb_p" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/blastdb_p.loc" /> + </table> + <table name="blastdb_d" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/blastdb_d.loc" /> + </table> +</tables> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tool-data/blastdb.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/blastdb.loc.sample Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,39 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of nucleotide BLAST databases, using three columns tab separated +#(longer whitespace are TAB characters): +# +#<unique_id> <database_caption> <base_name_path> +# +#The captions typically contain spaces and might end with the build date. +#It is important that the actual database name does not have a space in +#it, and that there are only two tabs on each line. +# +#So, for example, if your database is nt and the path to your base name +#is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry +#would look like this: +# +#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk +# +#and your /depot/data2/galaxy/blastdb/nt directory would contain all of +#your "base names" (e.g.): +# +#-rw-r--r-- 1 wychung galaxy 23437408 2008-04-09 11:26 nt.chunk.00.nhr +#-rw-r--r-- 1 wychung galaxy 3689920 2008-04-09 11:26 nt.chunk.00.nin +#-rw-r--r-- 1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq +#...etc... +# +#Your blastdb.loc file should include an entry per line for each "base name" +#you have stored. For example: +# +#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk +#wgs_30_Nov_2009 wgs 30 Nov 2009 /depot/data2/galaxy/blastdb/wgs/wgs.chunk +#test_20_Sep_2008 test 20 Sep 2008 /depot/data2/galaxy/blastdb/test/test +#...etc... +# +#You can download the NCBI provided protein databases like NT from here: +#ftp://ftp.ncbi.nlm.nih.gov/blast/db/ +# +#See also blastdb_p.loc which is for any protein BLAST database, and +#blastdb_d.loc which is for any protein domains databases (like CDD). + + |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tool-data/blastdb_d.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/blastdb_d.loc.sample Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,35 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of protein domain databases, using three columns tab separated +#(longer whitespace are TAB characters): +# +#<unique_id> <database_caption> <base_name_path> +# +#The captions typically contain spaces and might end with the build date. +#It is important that the actual database name does not have a space in it, +#and that there are only two tabs on each line. +# +#You can download the NCBI provided databases as tar-balls from here: +#ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ +# +#So, for example, if your database is CDD and the path to your base name +#is /data/blastdb/Cdd, then the blastdb_d.loc entry would look like this: +# +#Cdd{tab}NCBI Conserved Domains Database (CDD){tab}/data/blastdb/Cdd +# +#and your /data/blastdb directory would contain all of the files associated +#with the database, /data/blastdb/Cdd.*. +# +#Your blastdb_d.loc file should include an entry per line for each "base name" +#you have stored. For example: +# +#Cdd NCBI CDD /data/blastdb/domains/Cdd +#Kog KOG (eukaryotes) /data/blastdb/domains/Kog +#Cog COG (prokaryotes) /data/blastdb/domains/Cog +#Pfam Pfam-A /data/blastdb/domains/Pfam +#Smart SMART /data/blastdb/domains/Smart +#Tigr TIGR /data/blastdb/domains/Tigr +#Prk Protein Clusters database /data/blastdb/domains/Prk +#...etc... +# +#See also blastdb.loc which is for any nucleotide BLAST database, and +#blastdb_p.loc which is for any protein BLAST databases. |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tool-data/blastdb_p.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/blastdb_p.loc.sample Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,30 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of protein BLAST databases, using three columns tab separated +#(longer whitespace are TAB characters): +# +#<unique_id> <database_caption> <base_name_path> +# +#The captions typically contain spaces and might end with the build date. +#It is important that the actual database name does not have a space in +#it, and that there are only two tabs on each line. +# +#So, for example, if your database is NR and the path to your base name +#is /data/blastdb/nr, then the blastdb_p.loc entry would look like this: +# +#nr{tab}NCBI NR (non redundant){tab}/data/blastdb/nr +# +#and your /data/blastdb directory would contain all of the files associated +#with the database, /data/blastdb/nr.*. +# +#Your blastdb_p.loc file should include an entry per line for each "base name" +#you have stored. For example: +# +#nr_05Jun2010 NCBI NR (non redundant) 05 Jun 2010 /data/blastdb/05Jun2010/nr +#nr_15Aug2010 NCBI NR (non redundant) 15 Aug 2010 /data/blastdb/15Aug2010/nr +#...etc... +# +#You can download the NCBI provided protein databases like NR from here: +#ftp://ftp.ncbi.nlm.nih.gov/blast/db/ +# +#See also blastdb.loc which is for any nucleotide BLAST database, and +#blastdb_d.loc which is for any protein domains databases (like CDD). |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tool-data/tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/tool_data_table_conf.xml.sample Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,14 @@ +<tables> + <table name="blastdb" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/blastdb.loc" /> + </table> + <table name="blastdb_p" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/blastdb_p.loc" /> + </table> + <table name="blastdb_d" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/blastdb_d.loc" /> + </table> +</tables> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/README.rst Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,266 @@\n+Galaxy wrappers for NCBI BLAST+ suite\n+=====================================\n+\n+These wrappers are copyright 2010-2014 by Peter Cock (The James Hutton Institute,\n+UK) and additional contributors including Edward Kirton, John Chilton,\n+Nicola Soranzo, Jim Johnson, and Bjoern Gruening.\n+\n+See the licence text below.\n+\n+Currently tested with NCBI BLAST 2.2.30+ (i.e. version 2.2.30 of BLAST+),\n+and does not work with the NCBI \'legacy\' BLAST suite (e.g. ``blastall``).\n+\n+Note that these wrappers (and the associated datatypes) were originally\n+distributed as part of the main Galaxy repository, but as of August 2012\n+moved to the Galaxy Tool Shed as ``ncbi_blast_plus`` (and ``blast_datatypes``).\n+My thanks to Dannon Baker from the Galaxy development team for his assistance\n+with this.\n+\n+These wrappers are available from the Galaxy Tool Shed at:\n+http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+\n+\n+Citation\n+========\n+\n+Please cite the following paper (currently available as a preprint):\n+\n+NCBI BLAST+ integrated into Galaxy.\n+P.J.A. Cock, J.M. Chilton, B. Gruening, J.E. Johnson, N. Soranzo\n+bioRxiv DOI: http://dx.doi.org/10.1101/014043 (preprint)\n+\n+You should also cite the NCBI BLAST+ tools:\n+\n+BLAST+: architecture and applications.\n+C. Camacho et al. BMC Bioinformatics 2009, 10:421.\n+DOI: http://dx.doi.org/10.1186/1471-2105-10-421\n+\n+\n+Automated Installation\n+======================\n+\n+Galaxy should be able to automatically install the dependencies, i.e. the\n+BLAST+ binaries and the ``blast_datatypes`` repository which defines the\n+BLAST XML file format (``blastxml``), protein and nucleotide BLAST databases\n+(``blastdbp`` and ``blastdbn``), and so on.\n+\n+See the configuration notes below.\n+\n+Manual Installation\n+===================\n+\n+For those not using Galaxy\'s automated installation from the Tool Shed, put\n+the XML and Python files in the ``tools/ncbi_blast_plus/`` folder and add the\n+XML files to your ``tool_conf.xml`` as normal. For example, use::\n+\n+ <section name="NCBI BLAST+" id="ncbi_blast_plus_tools">\n+ <tool file="ncbi_blast_plus/ncbi_blastn_wrapper.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_blastp_wrapper.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_blastx_wrapper.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_tblastn_wrapper.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_tblastx_wrapper.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_makeblastdb.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_dustmasker_wrapper.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_blastdbcmd_info.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_rpsblast_wrapper.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml" />\n+ <tool file="ncbi_blast_plus/ncbi_makeprofiledb.xml" />\n+ <tool file="ncbi_blast_plus/blastxml_to_tabular.xml" />\n+ </section>\n+\n+You will also need to install ``blast_datatypes`` from the Tool Shed. This\n+defines the BLAST XML file format (``blastxml``), BLAST databases, etc:\n+\n+* http://toolshed.g2.bx.psu.edu/view/devteam/blast_datatypes\n+\n+As described above for an automated installation, you must also tell Galaxy\n+about any system level BLAST databases using the ``tool-data/blastdb*.loc``\n+files. Also merge the ``tool-data/tool_data_table_conf.xml.sample`` contents\n+into your ``tool_data_table_conf.xml`` file.\n+\n+You must install the NCBI BLAST+ standalone tools somewhere on the system\n+path. Currently the unit tests are written using BLAST+ 2.2.30.\n+\n+Run the functional tests (adjusting the section identifier to match your\n+``tool_conf.xml.sample`` file)::\n+\n+ ./run_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools\n+\n+Configuration\n+=============\n+\n+You must tell Galaxy about any system level BLAST databases using configuration\n+files ``blastdb.loc`` (nucleotide databases like NT) and ``blastdb_p.loc``\n+(protein databases like NR), and ``blastdb_d.loc`` (protein domain databases\n+like CDD or SMART) which ar'..b'STP task option.\n+ - Wrappers for segmasker, dustmasker and convert2blastmask\n+ (contribution from Bjoern Gruening).\n+ - Supports using maskinfo with ``makeblastdb`` wrapper.\n+ - Supports setting a taxonomy ID in ``makeblastdb`` wrapper.\n+ - Subtle changes like new conditional settings will require some old\n+ workflows be updated to cope.\n+v0.1.01 - Requires ``blastdbd`` datatype (``blast_datatypes`` v0.0.19).\n+ - Wrapper for makeprofiledb added to create protein domain databases\n+ (based on contribution from Bjoern Gruening).\n+ - The RPS-BLAST and RPS-TBLASTN wrappers support using a protein\n+ domain database from the user\'s history.\n+ - Tool definitions now embed citation information (by John Chilton).\n+ - BLAST tools support GI and SeqID filters (added by Bjoern Gruening).\n+v0.1.02 - Now depends on ``package_blast_plus_2_2_30`` in ToolShed.\n+ - Tests updated for BLAST+ 2.2.30 instead of BLAST+ 2.2.29.\n+ - New tasks ``blastp-fast``, ``blastx-fast`` and ``tblastn-fast``.\n+ - New minimum query HSP coverage option, ``-qcov_hsp_perc``.\n+ - Removed ``-word_size`` from RPS-BLAST and RPS-TBLASTN wrappers, this\n+ is set during database construction and should not have been offered\n+ as a command line option in releases prior to BLAST+ 2.2.30.\n+ - BLAST database ``blastdb*.loc`` files now accessed via the XML\n+ table definitions in Galaxy\'s ``tool_data_table_conf.xml`` file,\n+ setup via ``tool-data/tool_data_table_conf.xml.sample``\n+ - Replace ``.extra_files_path`` with ``.files_path`` (internal change,\n+\t thanks to Bjoern Gruening and John Chilton).\n+ - Added "NCBI BLAST+ integrated into Galaxy" preprint citation.\n+======= ======================================================================\n+\n+\n+Bug Reports\n+===========\n+\n+You can file an issue here https://github.com/peterjc/galaxy_blast/issues or ask\n+us on the Galaxy development list http://lists.bx.psu.edu/listinfo/galaxy-dev\n+\n+\n+Developers\n+==========\n+\n+This script and related tools were originally developed on the \'tools\' branch\n+of the following Mercurial repository:\n+https://bitbucket.org/peterjc/galaxy-central/\n+\n+As of July 2013, development is continuing on a dedicated GitHub repository:\n+https://github.com/peterjc/galaxy_blast\n+\n+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use\n+the following command from the GitHub repository root folder::\n+\n+ $ tools/ncbi_blast_plus/make_ncbi_blast_plus.sh\n+\n+This simplifies ensuring a consistent set of files is bundled each time,\n+including all the relevant test files.\n+\n+When updating the version of BLAST+, many of the sample data files used for\n+the unit tests must be regenerated. This script automates that task::\n+\n+ $ tools/ncbi_blast_plus/update_test_files.sh\n+\n+\n+Licence (MIT)\n+=============\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n+THE SOFTWARE.\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/blastxml_to_tabular.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Fri Jan 30 08:27:28 2015 -0500 |
| [ |
| b'@@ -0,0 +1,344 @@\n+#!/usr/bin/env python\n+"""Convert a BLAST XML file to tabular output.\n+\n+Takes three command line options, input BLAST XML filename, output tabular\n+BLAST filename, output format (std for standard 12 columns, or ext for the\n+extended 24 columns offered in the BLAST+ wrappers).\n+\n+The 12 columns output are \'qseqid sseqid pident length mismatch gapopen qstart\n+qend sstart send evalue bitscore\' or \'std\' at the BLAST+ command line, which\n+mean:\n+ \n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+ 1 qseqid Query Seq-id (ID of your sequence)\n+ 2 sseqid Subject Seq-id (ID of the database hit)\n+ 3 pident Percentage of identical matches\n+ 4 length Alignment length\n+ 5 mismatch Number of mismatches\n+ 6 gapopen Number of gap openings\n+ 7 qstart Start of alignment in query\n+ 8 qend End of alignment in query\n+ 9 sstart Start of alignment in subject (database hit)\n+ 10 send End of alignment in subject (database hit)\n+ 11 evalue Expectation value (E-value)\n+ 12 bitscore Bit score\n+====== ========= ============================================\n+\n+The additional columns offered in the Galaxy BLAST+ wrappers are:\n+\n+====== ============= ===========================================\n+Column NCBI name Description\n+------ ------------- -------------------------------------------\n+ 13 sallseqid All subject Seq-id(s), separated by \';\'\n+ 14 score Raw score\n+ 15 nident Number of identical matches\n+ 16 positive Number of positive-scoring matches\n+ 17 gaps Total number of gaps\n+ 18 ppos Percentage of positive-scoring matches\n+ 19 qframe Query frame\n+ 20 sframe Subject frame\n+ 21 qseq Aligned part of query sequence\n+ 22 sseq Aligned part of subject sequence\n+ 23 qlen Query sequence length\n+ 24 slen Subject sequence length\n+ 25 salltitles All subject titles, separated by \'<>\'\n+====== ============= ===========================================\n+\n+Most of these fields are given explicitly in the XML file, others some like\n+the percentage identity and the number of gap openings must be calculated.\n+\n+Be aware that the sequence in the extended tabular output or XML direct from\n+BLAST+ may or may not use XXXX masking on regions of low complexity. This\n+can throw the off the calculation of percentage identity and gap openings.\n+[In fact, both BLAST 2.2.24+ and 2.2.25+ have a subtle bug in this regard,\n+with these numbers changing depending on whether or not the low complexity\n+filter is used.]\n+\n+This script attempts to produce identical output to what BLAST+ would have done.\n+However, check this with "diff -b ..." since BLAST+ sometimes includes an extra\n+space character (probably a bug).\n+"""\n+import sys\n+import re\n+import os\n+from optparse import OptionParser\n+\n+if "-v" in sys.argv or "--version" in sys.argv:\n+ print "v0.1.04"\n+ sys.exit(0)\n+\n+if sys.version_info[:2] >= ( 2, 5 ):\n+ try:\n+ from xml.etree import cElementTree as ElementTree\n+ except ImportError:\n+ from xml.etree import ElementTree as ElementTree\n+else:\n+ from galaxy import eggs\n+ import pkg_resources; pkg_resources.require( "elementtree" )\n+ from elementtree import ElementTree\n+\n+def stop_err( msg ):\n+ sys.stderr.write("%s\\n" % msg)\n+ sys.exit(1)\n+\n+if len(sys.argv) == 4 and sys.argv[3] in ["std", "x22", "ext"]:\n+ #False positive if user really has a BLAST XML file called \'std\' or \'ext\'...\n+ stop_err("""ERROR: The script API has changed, sorry.\n+\n+Instead of the old style:\n+\n+$ python blastxml_to_tabular.py input.xml output.tabular std\n+\n+Please use:\n+\n+$ python blastxml_to_tabular.py -o output.tabular -c std input.xml\n+\n+For more information, use:\n+\n+$ python blastxml_to_tabular.py -h\n+""")\n+\n+usage = """'..b'X")))\n+\n+\n+ evalue = hsp.findtext("Hsp_evalue")\n+ if evalue == "0":\n+ evalue = "0.0"\n+ else:\n+ evalue = "%0.0e" % float(evalue)\n+ \n+ bitscore = float(hsp.findtext("Hsp_bit-score"))\n+ if bitscore < 100:\n+ #Seems to show one decimal place for lower scores\n+ bitscore = "%0.1f" % bitscore\n+ else:\n+ #Note BLAST does not round to nearest int, it truncates\n+ bitscore = "%i" % bitscore\n+\n+ values = [qseqid,\n+ sseqid,\n+ pident,\n+ length, #hsp.findtext("Hsp_align-len")\n+ str(mismatch),\n+ gapopen,\n+ hsp.findtext("Hsp_query-from"), #qstart,\n+ hsp.findtext("Hsp_query-to"), #qend,\n+ hsp.findtext("Hsp_hit-from"), #sstart,\n+ hsp.findtext("Hsp_hit-to"), #send,\n+ evalue, #hsp.findtext("Hsp_evalue") in scientific notation\n+ bitscore, #hsp.findtext("Hsp_bit-score") rounded\n+ ]\n+\n+ if extended:\n+ try:\n+ sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >"))\n+ salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >"))\n+ except IndexError as e:\n+ stop_err("Problem splitting multuple hits?\\n%r\\n--> %s" % (hit_def, e))\n+ #print hit_def, "-->", sallseqid\n+ positive = hsp.findtext("Hsp_positive")\n+ ppos = "%0.2f" % (100*float(positive)/float(length))\n+ qframe = hsp.findtext("Hsp_query-frame")\n+ sframe = hsp.findtext("Hsp_hit-frame")\n+ if blast_program == "blastp":\n+ #Probably a bug in BLASTP that they use 0 or 1 depending on format\n+ if qframe == "0": qframe = "1"\n+ if sframe == "0": sframe = "1"\n+ slen = int(hit.findtext("Hit_len"))\n+ values.extend([sallseqid,\n+ hsp.findtext("Hsp_score"), #score,\n+ nident,\n+ positive,\n+ hsp.findtext("Hsp_gaps"), #gaps,\n+ ppos,\n+ qframe,\n+ sframe,\n+ #NOTE - for blastp, XML shows original seq, tabular uses XXX masking\n+ q_seq,\n+ h_seq,\n+ str(qlen),\n+ str(slen),\n+ salltitles,\n+ ])\n+ if cols:\n+ #Only a subset of the columns are needed\n+ values = [values[colnames.index(c)] for c in cols]\n+ #print "\\t".join(values) \n+ output_handle.write("\\t".join(values) + "\\n")\n+ # prevents ElementTree from growing large datastructure\n+ root.clear()\n+ elem.clear()\n+\n+\n+if options.output:\n+ outfile = open(options.output, "w")\n+else:\n+ outfile = sys.stdout\n+\n+for in_file in args:\n+ blast_program = None\n+ convert(in_file, outfile)\n+\n+if options.output:\n+ outfile.close()\n+else:\n+ #Using stdout\n+ pass\n+\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/blastxml_to_tabular.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,215 @@\n+<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.1.04">\n+ <description>Convert BLAST XML output to tabular</description>\n+ <version_command interpreter="python">blastxml_to_tabular.py --version</version_command>\n+ <command interpreter="python">\n+blastxml_to_tabular.py -o "$tabular_file"\n+#if $output.out_format == "cols":\n+#set cols = (str($output.std_cols)+","+str($output.ext_cols)).replace("None", " ").replace(",,", ",").replace(",", " ")\n+-c "$cols"\n+#else\n+-c "$output.out_format"\n+#end if\n+#for i in $blastxml_file#"${i}" #end for#\n+ </command>\n+ <stdio>\n+ <!-- Anything other than zero is an error -->\n+ <exit_code range="1:" />\n+ <exit_code range=":-1" />\n+ </stdio>\n+ <inputs>\n+ <param name="blastxml_file" type="data" format="blastxml" multiple="true" label="BLAST results as XML"/>\n+ <conditional name="output">\n+ <param name="out_format" type="select" label="Output format">\n+ <option value="std" selected="True">Tabular (standard 12 columns)</option>\n+ <option value="ext">Tabular (extended 25 columns)</option>\n+ <option value="cols">Tabular (select columns to output)</option>\n+ </param>\n+ <when value="std"/>\n+ <when value="ext"/>\n+ <when value="cols">\n+ <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns">\n+ <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option>\n+ <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option>\n+ <option selected="true" value="pident">pident = Percentage of identical matches</option>\n+ <option selected="true" value="length">length = Alignment length</option>\n+ <option selected="true" value="mismatch">mismatch = Number of mismatches</option>\n+ <option selected="true" value="gapopen">gapopen = Number of gap openings</option>\n+ <option selected="true" value="qstart">qstart = Start of alignment in query</option>\n+ <option selected="true" value="qend">qend = End of alignment in query</option>\n+ <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option>\n+ <option selected="true" value="send">send = End of alignment in subject (database hit)</option>\n+ <option selected="true" value="evalue">evalue = Expectation value (E-value)</option>\n+ <option selected="true" value="bitscore">bitscore = Bit score</option>\n+ </param>\n+ <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns">\n+ <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a \';\'</option>\n+ <option value="score">score = Raw score</option>\n+ <option value="nident">nident = Number of identical matches</option>\n+ <option value="positive">positive = Number of positive-scoring matches</option>\n+ <option value="gaps">gaps = Total number of gaps</option>\n+ <option value="ppos">ppos = Percentage of positive-scoring matches</option>\n+ <option value="qframe">qframe = Query frame</option>\n+ <option value="sframe">sframe = Subject frame</option>\n+ <option value="qseq">qseq = Aligned part of query sequence</option>\n+ <option value="sseq">sseq = Aligned part of subject sequence</option>\n+ <option value="qlen">qlen = Query sequence length</option>\n+ <option value="slen">slen = Subject sequence length</option>\n+ <option value="salltitles">salltitles = All subject title(s), separated by a \'<>\'</option>\n+ </param>\n+ </when>\n+ </conditional>\n+ </inputs>\n+ <outputs>\n+ <data nam'..b'ame="out_format" value="std" />\n+ <output name="tabular_file" file="blastn_rhodopsin_vs_three_human_converted.tabular" ftype="tabular" />\n+ </test>\n+ <test>\n+ <param name="blastxml_file" value="blastn_rhodopsin_vs_three_human.xml" ftype="blastxml" />\n+ <param name="out_format" value="cols" />\n+ <param name="std_cols" value="qseqid,sseqid,pident" />\n+ <param name="ext_cols" value="qlen,slen" />\n+ <output name="tabular_file" file="blastn_rhodopsin_vs_three_human.columns.tabular" ftype="tabular" />\n+ </test>\n+ </tests>\n+ <help>\n+ \n+**What it does**\n+\n+NCBI BLAST+ (and the older NCBI \'legacy\' BLAST) can output in a range of\n+formats including tabular and a more detailed XML format. A complex workflow\n+may need both the XML and the tabular output - but running BLAST twice is\n+slow and wasteful.\n+\n+This tool takes the BLAST XML output and can convert it into the\n+standard 12 column tabular equivalent:\n+\n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+ 1 qseqid Query Seq-id (ID of your sequence)\n+ 2 sseqid Subject Seq-id (ID of the database hit)\n+ 3 pident Percentage of identical matches\n+ 4 length Alignment length\n+ 5 mismatch Number of mismatches\n+ 6 gapopen Number of gap openings\n+ 7 qstart Start of alignment in query\n+ 8 qend End of alignment in query\n+ 9 sstart Start of alignment in subject (database hit)\n+ 10 send End of alignment in subject (database hit)\n+ 11 evalue Expectation value (E-value)\n+ 12 bitscore Bit score\n+====== ========= ============================================\n+\n+The BLAST+ tools can optionally output additional columns of information,\n+but this takes longer to calculate. Most (but not all) of these columns are\n+included by selecting the extended tabular output. The extra columns are\n+included *after* the standard 12 columns. This is so that you can write\n+workflow filtering steps that accept either the 12 or 25 column tabular\n+BLAST output. This tool now uses this extended 25 column output by default.\n+\n+====== ============= ===========================================\n+Column NCBI name Description\n+------ ------------- -------------------------------------------\n+ 13 sallseqid All subject Seq-id(s), separated by a \';\'\n+ 14 score Raw score\n+ 15 nident Number of identical matches\n+ 16 positive Number of positive-scoring matches\n+ 17 gaps Total number of gaps\n+ 18 ppos Percentage of positive-scoring matches\n+ 19 qframe Query frame\n+ 20 sframe Subject frame\n+ 21 qseq Aligned part of query sequence\n+ 22 sseq Aligned part of subject sequence\n+ 23 qlen Query sequence length\n+ 24 slen Subject sequence length\n+ 25 salltitles All subject title(s), separated by a \'<>\'\n+====== ============= ===========================================\n+\n+Beware that the XML file (and thus the conversion) and the tabular output\n+direct from BLAST+ may differ in the presence of XXXX masking on regions\n+low complexity (columns 21 and 22), and thus also calculated figures like\n+the percentage identity (column 3).\n+\n+**References**\n+\n+If you use this Galaxy tool in work leading to a scientific publication please\n+cite:\n+\n+Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n+Galaxy tools and workflows for sequence analysis with applications\n+in molecular plant pathology. PeerJ 1:e167\n+http://dx.doi.org/10.7717/peerj.167\n+\n+This wrapper is available to install into other Galaxy Instances via the Galaxy\n+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+ </help>\n+ <citations>\n+ <citation type="doi">10.7717/peerj.167</citation>\n+ </citations>\n+</tool>\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/check_no_duplicates.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/check_no_duplicates.py Fri Jan 30 08:27:28 2015 -0500 |
| [ |
| @@ -0,0 +1,46 @@ +#!/usr/bin/env python +"""Check for duplicate sequence identifiers in FASTA files. + +This is run as a pre-check before makeblastdb, in order to avoid +a regression bug in BLAST+ 2.2.28 which fails to catch this. See: +http://blastedbio.blogspot.co.uk/2012/10/my-ids-not-good-enough-for-ncbi-blast.html + +This script takes one or more FASTA filenames as input, and +will return a non-zero error if any duplicate identifiers +are found. +""" +import sys +import os + +if "-v" in sys.argv or "--version" in sys.argv: + print("v0.0.22") + sys.exit(0) + +def stop_err(msg, error=1): + sys.stderr.write("%s\n" % msg) + sys.exit(error) + + +identifiers = set() +files = 0 +for filename in sys.argv[1:]: + if not os.path.isfile(filename): + stop_err("Missing FASTA file %r" % filename, 2) + files += 1 + handle = open(filename) + for line in handle: + if line.startswith(">"): + #The split will also take care of the new line character, + #e.g. ">test\n" and ">test description here\n" both give "test" + seq_id = line[1:].split(None, 1)[0] + if seq_id in identifiers: + handle.close() + stop_err("Repeated identifiers, e.g. %r" % seq_id, 1) + identifiers.add(seq_id) + handle.close() +if not files: + stop_err("No FASTA files given to check for duplicates", 3) +elif files == 1: + print("%i sequences" % len(identifiers)) +else: + print("%i sequences in %i FASTA files" % (len(identifiers), files)) |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,48 @@ +<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.1.02"> + <description>Show BLAST database information from blastdbcmd</description> + <macros> + <token name="@BINARY@">blastdbcmd</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out "$info" + </command> + <expand macro="stdio" /> + <inputs> + <expand macro="input_conditional_choose_db_type" /> + </inputs> + <outputs> + <data name="info" format="txt" label="${db_opts.database.fields.name} info" /> + </outputs> + <tests> + <test> + <param name="db_opts|db_type" value="prot" /> + <param name="db_opts|database" value="four_human_proteins" /> + <output name="info" file="four_human_proteins.dbinfo.txt" ftype="txt" lines_diff="4" /> + </test> + <test> + <param name="db_opts|db_type" value="nucl" /> + <param name="db_opts|database" value="three_human_mRNA" /> + <output name="info" file="three_human_mRNA.dbinfo.txt" ftype="txt" lines_diff="4" /> + </test> + </tests> + <help> + +**What it does** + +Calls the NCBI BLAST+ blastdbcmd command line tool with the -info +switch to give summary information about a BLAST database, such as +the size (number of sequences and total length) and date. + +------- + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| [ |
| @@ -0,0 +1,118 @@ +<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.1.02"> + <description>Extract sequence(s) from BLAST database</description> + <macros> + <token name="@BINARY@">blastdbcmd</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" + +##TODO: What about -ctrl_a and -target_only as advanced options? + +#if $id_opts.id_type=="file": +-entry_batch "$id_opts.entries" +#else: +##Perform some simple search/replaces to remove whitespace +##and make it comma separated, and escape any pipe characters +-entry "$id_opts.entries.replace('\r',',').replace('\n',',').replace(' ','').replace(',,',',').replace(',,',',').strip(',').replace('|','\|')" +#end if + +##When building a BLAST database, to ensure unique IDs makeblastdb will +##do things like turning a FASTA entry with ID of ERP44 into lcl|ERP44 +##(if using -parse_seqids) or simply assign it an ID using the record +##number like gnl|BL_ORD_ID|123 (to cope with duplicate IDs in the FASTA +##file). In -parse_seqids mode, a duplicate FASTA ID gives an error. +## +##The BLAST plain text and XML output will contain these BLAST IDs, but +##the tabular output does not (at least, not in BLAST 2.2.25+). +##Therefore in general, Galaxy users won't care about the (internal) +##BLAST identifiers. +## +##The blastdbcmd FASTA output will also contain these IDs, but in the +##context of the BLAST tabular output they are not helpful. Therefore +##to recover the original ID as used in the FASTA file for makeblastdb +##we need a litte post processing. +## +##We remove the NCBI's lcl|... or gnl|BL_ORD_ID|123 prefixes +##using sed, however the exact syntax differs for Mac OS X's sed + +#if str($outfmt)=="blastid": +-out "$seq" +#else if sys.platform == "darwin": +| sed -E 's/^>(lcl\||gnl\|BL_ORD_ID\|[0-9]* )/>/1' > "$seq" +#else: +| sed 's/>\(lcl|\|gnl|BL_ORD_ID|[0-9]* \)/>/1' > "$seq" +#end if + </command> + <expand macro="stdio" /> + <inputs> + <expand macro="input_conditional_choose_db_type" /> + <conditional name="id_opts"> + <param name="id_type" type="select" label="Type of identifier list"> + <option value="file">From file</option> + <option value="prompt">User entered</option> + </param> + <when value="file"> + <param name="entries" type="data" format="txt,tabular" label="Sequence identifier(s)" help="Plain text file with one ID per line (i.e. single column tabular file)"/> + </when> + <when value="prompt"> + <param name="entries" type="text" label="Sequence identifier(s)" help="Comma or new line separated list." optional="False" area="True" size="10x30"/> + </when> + </conditional> + <param name="outfmt" type="select" label="Output format"> + <option value="original">FASTA with original identifiers</option> + <option value="blastid">FASTA with BLAST assigned identifiers</option> + </param> + </inputs> + <outputs> + <data name="seq" format="fasta" label="Sequences from ${db_opts.database.fields.name}" /> + </outputs> + <tests> + <test> + <param name="db_opts|db_type" value="prot" /> + <param name="db_opts|database" value="four_human_proteins" /> + <param name="id_opts|id_type" value="prompt" /> + <param name="id_opts|entries" value="all" /> + <param name="outfmt" value="original" /> + <output name="seq" file="four_human_proteins.fasta" ftype="fasta" /> + </test> + </tests> + <help> + +**What it does** + +Extracts FASTA formatted sequences from a BLAST database +using the NCBI BLAST+ blastdbcmd command line tool. + +.. class:: warningmark + +**BLAST assigned identifiers** + +When a BLAST database is constructed from a FASTA file, the +original identifiers can be replaced with BLAST assigned +identifiers, partly to ensure uniqueness. e.g. Sometimes +a prefix of 'lcl|' is added (lcl is short for local), +or an arbitrary name starting 'gnl|BL_ORD_ID|' is created. + +If you are using the tabular output from BLAST, it will contain +the original identifiers - not the BLAST assigned identifiers +suitable for use with the blastdbcmd tool. + +If you are using the XML or plain text output, this will also +contain the BLAST assigned identifiers. However, this means +getting a list of BLAST assigned identifiers isn't straightforward. + +------- + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,156 @@ +<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.1.02"> + <description>Search nucleotide database with nucleotide query sequence(s)</description> + <!-- If job splitting is enabled, break up the query file into parts --> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism> + <macros> + <token name="@BINARY@">blastn</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +blastn +-query "$query" +@BLAST_DB_SUBJECT@ +-task $blast_type +-evalue $evalue_cutoff +@BLAST_OUTPUT@ +@THREADS@ +#if $adv_opts.adv_opts_selector=="advanced": +$adv_opts.strand +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_WORD_SIZE@ +#if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ): +-perc_identity $adv_opts.identity_cutoff +#end if +$adv_opts.ungapped +@ADV_ID_LIST_FILTER@ +@ADV_QCOV_HSP_PERC@ +## End of advanced options: +#end if + </command> + + <expand macro="stdio" /> + + <inputs> + <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> + + <expand macro="input_conditional_nucleotide_db" /> + + <param name="blast_type" type="select" display="radio" label="Type of BLAST"> + <option value="megablast">megablast - Traditional megablast used to find very similar (e.g., intraspecies or closely related species) sequences</option> + <option value="blastn">blastn - Traditional BLASTN requiring an exact match of 11, for somewhat similar sequences</option> + <option value="blastn-short">blastn-short - BLASTN program optimized for sequences shorter than 50 bases</option> + <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option> + <!-- Using BLAST 2.2.24+ this gives an error: + BLAST engine error: Program type 'vecscreen' not supported + <option value="vecscreen">vecscreen</option> + In any case, vecscreen has gone in BLAST+ 2.2.28 + --> + <!-- BLAST+ 2.2.28 also offers rmblastn --> + </param> + <expand macro="input_evalue" /> + <expand macro="input_out_format" /> + <expand macro="advanced_options"> + <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' --> + <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" /> + <expand macro="input_strand" /> + <expand macro="input_max_hits" /> + <param name="identity_cutoff" type="float" min="0" max="100" value="0" label="Percent identity cutoff (-perc_identity)" help="Use zero for no cutoff" /> + + <!-- I'd like word_size to be optional, with minimum 4 for blastn --> + <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4."> + <validator type="in_range" min="0" /> + </param> + <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" /> + <expand macro="input_parse_deflines" /> + <expand macro="advanced_optional_id_files" /> + <expand macro="input_qcov_hsp_perc" /> + </expand> + </inputs> + <outputs> + <data name="output1" format="tabular" label="${blast_type.value} $query.name vs @ON_DB_SUBJECT@"> + <expand macro="output_change_format" /> + </data> + </outputs> + <tests> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-40" /> + <param name="out_format" value="5" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastn_rhodopsin_vs_three_human.xml" ftype="blastxml" /> + </test> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-40" /> + <param name="out_format" value="6" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastn_rhodopsin_vs_three_human.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-40" /> + <param name="out_format" value="cols" /> + <param name="std_cols" value="qseqid,sseqid,pident" /> + <param name="ext_cols" value="qlen,slen" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastn_rhodopsin_vs_three_human.columns.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="chimera.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="db" /> + <param name="database" value="three_human_mRNA" /> + <param name="out_format" value="6" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="max_hits" value="1" /> + <output name="output1" file="blastn_chimera_vs_three_human_max1.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="chimera.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="db" /> + <param name="database" value="three_human_mRNA" /> + <param name="out_format" value="0" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="max_hits" value="1" /> + <output name="output1" file="blastn_chimera_vs_three_human_max1.txt" ftype="txt" /> + </test> + </tests> + <help> + +@SEARCH_TIME_WARNING@ + +**What it does** + +Search a *nucleotide database* using a *nucleotide query*, +using the NCBI BLAST+ blastn command line tool. +Algorithms include blastn, megablast, and discontiguous megablast. + +@FASTA_WARNING@ + +----- + +@OUTPUT_FORMAT@ + +------- + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,158 @@ +<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.1.02"> + <description>Search protein database with protein query sequence(s)</description> + <!-- If job splitting is enabled, break up the query file into parts --> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" /> + <macros> + <token name="@BINARY@">blastp</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +blastp +-query "$query" +@BLAST_DB_SUBJECT@ +-task $blast_type +-evalue $evalue_cutoff +@BLAST_OUTPUT@ +@THREADS@ +#if $adv_opts.adv_opts_selector=="advanced": +-matrix $adv_opts.matrix +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_WORD_SIZE@ +##Ungapped disabled for now - see comments below +##$adv_opts.ungapped +@ADV_ID_LIST_FILTER@ +@ADV_QCOV_HSP_PERC@ +## End of advanced options: +#end if + </command> + + <expand macro="stdio" /> + + <inputs> + <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> + + <expand macro="input_conditional_protein_db" /> + + <param name="blast_type" type="select" display="radio" label="Type of BLAST"> + <option value="blastp">blastp - Traditional BLASTP to compare a protein query to a protein database</option> + <option value="blastp-fast">blastp-fast - Use longer words for seeding, faster but less accurate</option> + <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option> + </param> + <expand macro="input_evalue" /> + <expand macro="input_out_format" /> + <expand macro="advanced_options"> + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> + <expand macro="input_filter_query_default_false" /> + <expand macro="input_scoring_matrix" /> + <expand macro="input_max_hits" /> + <expand macro="input_word_size" /> + <!-- + Can't use '-ungapped' on its own, error back is: + Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search + Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.' + <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" /> + --> + <expand macro="input_parse_deflines" /> + <expand macro="advanced_optional_id_files" /> + <expand macro="input_qcov_hsp_perc" /> + </expand> + </inputs> + <outputs> + <data name="output1" format="tabular" label="${blast_type.value} $query.name vs @ON_DB_SUBJECT@"> + <expand macro="output_change_format" /> + </data> + </outputs> + <tests> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-8" /> + <param name="blast_type" value="blastp" /> + <param name="out_format" value="5" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="False" /> + <param name="matrix" value="BLOSUM62" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="True" /> + <param name="qcov_hsp_perc" value="25" /> + <output name="output1" file="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" /> + </test> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-8" /> + <param name="blast_type" value="blastp" /> + <param name="out_format" value="6" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="False" /> + <param name="matrix" value="BLOSUM62" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="True" /> + <param name="qcov_hsp_perc" value="25" /> + <output name="output1" file="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-8" /> + <param name="blast_type" value="blastp" /> + <param name="out_format" value="ext" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="False" /> + <param name="matrix" value="BLOSUM62" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="True" /> + <param name="qcov_hsp_perc" value="25" /> + <output name="output1" file="blastp_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-8" /> + <param name="blast_type" value="blastp" /> + <param name="out_format" value="6" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastp_rhodopsin_vs_four_human.tabular" ftype="tabular" /> + </test> + </tests> + <help> + +@SEARCH_TIME_WARNING@ + +**What it does** + +Search a *protein database* using a *protein query*, +using the NCBI BLAST+ blastp command line tool. + +@FASTA_WARNING@ + +----- + +@OUTPUT_FORMAT@ + +------- + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,138 @@ +<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.1.02"> + <description>Search protein database with translated nucleotide query sequence(s)</description> + <!-- If job splitting is enabled, break up the query file into parts --> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism> + <macros> + <token name="@BINARY@">blastx</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +blastx +-query "$query" +@BLAST_DB_SUBJECT@ +-query_gencode $query_gencode +-task $blast_type +-evalue $evalue_cutoff +@BLAST_OUTPUT@ +@THREADS@ +#if $adv_opts.adv_opts_selector=="advanced": +$adv_opts.strand +-matrix $adv_opts.matrix +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_WORD_SIZE@ +$adv_opts.ungapped +@ADV_ID_LIST_FILTER@ +@ADV_QCOV_HSP_PERC@ +## End of advanced options: +#end if + </command> + + <expand macro="stdio" /> + + <inputs> + <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> + + <expand macro="input_conditional_protein_db" /> + <expand macro="input_query_gencode" /> + <param name="blast_type" type="select" display="radio" label="Type of BLAST"> + <option value="blastx">blastx - Traditional BLASTX to compare translated nucleotide query to protein database</option> + <option value="blastx-fast">blastx-fast - Use longer words for seeding, faster but less accurate</option> + </param> + <expand macro="input_evalue" /> + + <expand macro="input_out_format" /> + <expand macro="advanced_options"> + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> + <expand macro="input_filter_query_default_true" /> + <expand macro="input_strand" /> + <expand macro="input_scoring_matrix" /> + <expand macro="input_max_hits" /> + <expand macro="input_word_size" /> + <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" /> + <expand macro="input_parse_deflines" /> + <expand macro="advanced_optional_id_files" /> + <expand macro="input_qcov_hsp_perc" /> + </expand> + </inputs> + <outputs> + <data name="output1" format="tabular" label="blastx $query.name vs @ON_DB_SUBJECT@"> + <expand macro="output_change_format" /> + </data> + </outputs> + <tests> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="5" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastx_rhodopsin_vs_four_human.xml" ftype="blastxml" /> + </test> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="6" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastx_rhodopsin_vs_four_human.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="ext" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastx_rhodopsin_vs_four_human_ext.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="cols" /> + <param name="std_cols" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" /> + <param name="ext_cols" value="sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" /> + <param name="ids_cols" value="qgi,qacc,qaccver,sallseqid,sgi,sallgi,sacc,saccver,sallacc,stitle" /> + <param name="misc_cols" value="sstrand,frames,btop,qcovs,qcovhsp" /> + <param name="tax_cols" value="staxids,sscinames,scomnames,sblastnames,sskingdoms" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastx_rhodopsin_vs_four_human_all.tabular" ftype="tabular" /> + </test> + </tests> + <help> + +@SEARCH_TIME_WARNING@ + +**What it does** + +Search a *protein database* using a *translated nucleotide query*, +using the NCBI BLAST+ blastx command line tool. + +@FASTA_WARNING@ + +----- + +@OUTPUT_FORMAT@ + +------- + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,88 @@ +<tool id="ncbi_convert2blastmask_wrapper" name="NCBI BLAST+ convert2blastmask" version="0.1.02"> + <description>Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb</description> + <macros> + <token name="@BINARY@">convert2blastmask</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +convert2blastmask +-in $infile +-masking_algorithm "$masking_algorithm" +-masking_options "$masking_options" +$parse_seqids +-out "$outfile" +-outfmt $outformat + </command> + <expand macro="stdio" /> + <inputs> + <param name="infile" type="data" format="fasta" label="masked FASTA file"/> + <param name="masking_algorithm" type="select" label="Used masking algorithm"> + <option value="dust">DUST</option> + <option value="seg" selected="true">SEG</option> + <option value="windowmasker">windowmasker</option> + <option value="repeat">repeat</option> + <option value="other">other</option> + </param> + <param name="masking_options" type="text" value="" size="20" label="Masking algorithm options to create the masked input" + help ="free text to describe the options used to create the masking files. (-masking_options)"> + <sanitizer invalid_char=""> + <valid initial="string.printable" /> + </sanitizer> + </param> + <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="true" label="Parse Seq-ids in FASTA input" help="(-parse_seqids)" /> + <param name="outformat" type="select" label="Output format"> + <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option> + <option value="maskinfo_asn1_text" selected="True">maskinfo ASN.1 text</option> + <option value="maskinfo_xml">maskinfo_xml</option> + </param> + </inputs> + <outputs> + <data name="outfile" format="maskinfo-asn1" label="SEG Masked File"> + <change_format> + <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" /> + <!-- + <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" /> + --> + <when input="outformat" value="maskinfo_xml" format="xml" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" /> + <param name="masking_algorithm" value="seg" /> + <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" /> + <param name="parse_seqids" value="True" /> + <param name="outformat" value="maskinfo_asn1_bin" /> + <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1-binary" /> + </test> + <test> + <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" /> + <param name="masking_algorithm" value="seg" /> + <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" /> + <param name="parse_seqids" value="True" /> + <param name="outformat" value="maskinfo_asn1_text" /> + <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1" /> + </test> + </tests> + <help> +**What it does** + +Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb. + +More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,100 @@ +<tool id="ncbi_dustmasker_wrapper" name="NCBI BLAST+ dustmasker" version="0.1.02"> + <!-- dustmasker wrapper from Edward Kirton and Nicola Soranzo --> + <description>masks low complexity regions</description> + <macros> + <token name="@BINARY@">dustmasker</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +dustmasker +#if $db_opts.db_opts_selector == "db": + -in "${db_opts.database.fields.path}" -infmt blastdb +#elif $db_opts.db_opts_selector == "histdb": + -in "${os.path.join($db_opts.histdb.files_path, 'blastdb')}" -infmt blastdb +#else: + -in "$subject" -infmt fasta +#end if +-out "$outfile" +-window $window -level $level -linker $linker -outfmt $outformat + </command> + <expand macro="stdio" /> + <inputs> + <expand macro="input_conditional_nucleotide_db" /> + <param name="window" type="integer" value="64" label="DUST window length" /> + <param name="level" type="integer" value="20" label="DUST level" help="Score threshold for subwindows" /> + <param name="linker" type="integer" value="1" label="DUST linker" help="How close masked intervals should be to get merged together" /> + <param name="outformat" type="select" label="Output format"> + <!-- seqloc_* formats are not very useful + and what BLAST+ calls 'interval' is not what Galaxy calls interval format + --> + <option value="fasta">FASTA</option> + <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option> + <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option> + <option value="maskinfo_xml">maskinfo_xml</option> + </param> + </inputs> + <outputs> + <data name="outfile" format="maskinfo-asn1" label="DUST Masked File"> + <change_format> + <when input="outformat" value="fasta" format="fasta" /> + <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" /> + <!-- + <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" /> + --> + <when input="outformat" value="maskinfo_xml" format="xml" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="window" value="64" /> + <param name="level" value="20" /> + <param name="linker" value="1" /> + <param name="outformat" value="fasta" /> + <output name="outfile" file="dustmasker_three_human.fasta" /> + </test> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="window" value="64" /> + <param name="level" value="20" /> + <param name="linker" value="1" /> + <param name="outformat" value="maskinfo_asn1_bin" /> + <output name="outfile" file="dustmasker_three_human.maskinfo-asn1-binary" /> + </test> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="window" value="64" /> + <param name="level" value="20" /> + <param name="linker" value="1" /> + <param name="outformat" value="maskinfo_asn1_text" /> + <output name="outfile" file="dustmasker_three_human.maskinfo-asn1" /> + </test> + </tests> + <help> +**What it does** + +This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST_ algorithm. + +If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool. + +More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ +.. _DUST: http://www.ncbi.nlm.nih.gov/pubmed/16796549 + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,504 @@\n+<macros>\n+ <xml name="requirements">\n+ <requirements>\n+ <requirement type="binary">@BINARY@</requirement>\n+ <requirement type="package" version="2.2.30">blast+</requirement>\n+ </requirements>\n+ <version_command>@BINARY@ -version</version_command>\n+ </xml>\n+ <xml name="output_change_format">\n+ <change_format>\n+ <when input="output.out_format" value="0" format="txt"/>\n+ <when input="output.out_format" value="0 -html" format="html"/>\n+ <when input="output.out_format" value="2" format="txt"/>\n+ <when input="output.out_format" value="2 -html" format="html"/>\n+ <when input="output.out_format" value="4" format="txt"/>\n+ <when input="output.out_format" value="4 -html" format="html"/>\n+ <when input="output.out_format" value="5" format="blastxml"/>\n+ </change_format>\n+ </xml>\n+ <xml name="input_out_format">\n+ <conditional name="output">\n+ <param name="out_format" type="select" label="Output format">\n+ <option value="6">Tabular (standard 12 columns)</option>\n+ <option value="ext" selected="True">Tabular (extended 25 columns)</option>\n+ <option value="cols">Tabular (select which columns)</option>\n+ <option value="5">BLAST XML</option>\n+ <option value="0">Pairwise text</option>\n+ <option value="0 -html">Pairwise HTML</option>\n+ <option value="2">Query-anchored text</option>\n+ <option value="2 -html">Query-anchored HTML</option>\n+ <option value="4">Flat query-anchored text</option>\n+ <option value="4 -html">Flat query-anchored HTML</option>\n+ <!--\n+ <option value="-outfmt 11">BLAST archive format (ASN.1)</option>\n+ -->\n+ </param>\n+ <when value="6"/>\n+ <when value="ext"/>\n+ <when value="cols">\n+ <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns">\n+ <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option>\n+ <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option>\n+ <option selected="true" value="pident">pident = Percentage of identical matches</option>\n+ <option selected="true" value="length">length = Alignment length</option>\n+ <option selected="true" value="mismatch">mismatch = Number of mismatches</option>\n+ <option selected="true" value="gapopen">gapopen = Number of gap openings</option>\n+ <option selected="true" value="qstart">qstart = Start of alignment in query</option>\n+ <option selected="true" value="qend">qend = End of alignment in query</option>\n+ <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option>\n+ <option selected="true" value="send">send = End of alignment in subject (database hit)</option>\n+ <option selected="true" value="evalue">evalue = Expectation value (E-value)</option>\n+ <option selected="true" value="bitscore">bitscore = Bit score</option>\n+ </param>\n+ <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns">\n+ <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a \';\'</option>\n+ <option value="score">score = Raw score</option>\n+ <option value="nident">nident = Number of identical matches</option>\n+ <option value="positive">positive = Number of positive-scoring matches</option>\n+ <option value="gaps">gaps = To'..b'\n+ </citations>\n+ </xml>\n+ <token name="@OUTPUT_FORMAT@">**Output format**\n+\n+Because Galaxy focuses on processing tabular data, the default output of this\n+tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n+\n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+ 1 qseqid Query Seq-id (ID of your sequence)\n+ 2 sseqid Subject Seq-id (ID of the database hit)\n+ 3 pident Percentage of identical matches\n+ 4 length Alignment length\n+ 5 mismatch Number of mismatches\n+ 6 gapopen Number of gap openings\n+ 7 qstart Start of alignment in query\n+ 8 qend End of alignment in query\n+ 9 sstart Start of alignment in subject (database hit)\n+ 10 send End of alignment in subject (database hit)\n+ 11 evalue Expectation value (E-value)\n+ 12 bitscore Bit score\n+====== ========= ============================================\n+\n+The BLAST+ tools can optionally output additional columns of information,\n+but this takes longer to calculate. Many commonly used extra columns are\n+included by selecting the extended tabular output. The extra columns are\n+included *after* the standard 12 columns. This is so that you can write\n+workflow filtering steps that accept either the 12 or 25 column tabular\n+BLAST output. Galaxy now uses this extended 25 column output by default.\n+\n+====== ============= ===========================================\n+Column NCBI name Description\n+------ ------------- -------------------------------------------\n+ 13 sallseqid All subject Seq-id(s), separated by a \';\'\n+ 14 score Raw score\n+ 15 nident Number of identical matches\n+ 16 positive Number of positive-scoring matches\n+ 17 gaps Total number of gaps\n+ 18 ppos Percentage of positive-scoring matches\n+ 19 qframe Query frame\n+ 20 sframe Subject frame\n+ 21 qseq Aligned part of query sequence\n+ 22 sseq Aligned part of subject sequence\n+ 23 qlen Query sequence length\n+ 24 slen Subject sequence length\n+ 25 salltitles All subject title(s), separated by a \'<>\'\n+====== ============= ===========================================\n+\n+The third option is to customise the tabular output by selecting which\n+columns you want, from the standard set of 12, the default set of 25,\n+or any of the additional columns BLAST+ offers (including species name).\n+\n+The fourth option is BLAST XML output, which is designed to be parsed by\n+another program, and is understood by some Galaxy tools.\n+\n+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+ </token>\n+ <token name="@FASTA_WARNING@">.. class:: warningmark\n+\n+You can also search against a FASTA file of subject (target)\n+sequences. This is *not* advised because it is slower (only one\n+CPU is used), but more importantly gives e-values for pairwise\n+searches (very small e-values which will look overly signficiant).\n+In most cases you should instead turn the other FASTA file into a\n+database first using *makeblastdb* and search against that.\n+ </token>\n+ <token name="@SEARCH_TIME_WARNING@">.. class:: warningmark\n+\n+**Note**. Database searches may take a substantial amount of time.\n+For large input datasets it is advisable to allow overnight processing. \n+\n+-----\n+ </token>\n+</macros>\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_makeblastdb.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| b'@@ -0,0 +1,204 @@\n+<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.1.02">\n+ <description>Make BLAST database</description>\n+ <macros>\n+ <token name="@BINARY@">makeblastdb</token>\n+ <import>ncbi_macros.xml</import>\n+ </macros>\n+ <expand macro="requirements" />\n+ <command interpreter="python">check_no_duplicates.py\n+##First check for duplicates (since BLAST+ 2.2.28 fails to do so)\n+##and abort (via the ampersand ampersand trick) if any are found.\n+#for i in $input_file#"${i}" #end for#\n+&&\n+makeblastdb -out "${os.path.join($outfile.files_path,\'blastdb\')}"\n+$parse_seqids\n+$hash_index\n+## Single call to -in with multiple filenames space separated with outer quotes\n+## (presumably any filenames with spaces would be a problem). Note this gives\n+## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy:\n+-in "#for i in $input_file#${i} #end for#"\n+#if $title:\n+-title "$title"\n+#else:\n+##Would default to being based on the cryptic Galaxy filenames, which is unhelpful\n+-title "BLAST Database"\n+#end if\n+-dbtype $dbtype\n+## --------------------------------------------------------------------\n+## Masking\n+## --------------------------------------------------------------------\n+## HACK: If no mask files, evaluates as a list with just None in it:\n+## See Trello issue https://trello.com/c/lp5YmA1O\n+#if \' \'.join( map(str, $mask_data_file) ) != \'None\':\n+#for i in $mask_data_file:\n+-mask_data "${i}"\n+#end for\n+#end if\n+## --------------------------------------------------------------------\n+## Taxonomy\n+## --------------------------------------------------------------------\n+#if $tax.taxselect == \'id\':\n+-taxid $tax.taxid\n+## TODO - Can we use a tabular file for the taxonomy mapping?\n+## #else if $tax.taxselect == \'map\':\n+## -taxid_map $tax.taxmap\n+#end if\n+## --------------------------------------------------------------------\n+## Capture the stdout log information to the primary file (plain text):\n+> "$outfile"\n+ </command>\n+ <expand macro="stdio" />\n+ <inputs>\n+ <param name="dbtype" type="select" display="radio" label="Molecule type of input">\n+ <option value="prot">protein</option>\n+ <option value="nucl">nucleotide</option>\n+ </param>\n+ <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)?\n+ NOTE Double check the new database would be self contained first\n+ -->\n+ <!-- Note this is a mandatory parameter - default should be most recent FASTA file -->\n+ <param name="input_file" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" />\n+ <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />\n+ <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe \'|\' symbols" />\n+ <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />\n+ <!-- SEQUENCE MASKING OPTIONS -->\n+ <!-- Note this is an optional parameter - default should be NO files -->\n+ <param name="mask_data_file" type="data" multiple="true" optional="true" value="" format="maskinfo-asn1,maskinfo-asn1-binary" label="Optional ASN.1 file(s) containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />\n+ <!-- TODO - Option to create GI indexed masking data? via -gi_mask and -gi_mask_name? -->\n+ <!-- TAXONOMY OPTIONS -->\n+ <conditional name="tax">\n+ <param '..b'axid.fasta.phd" name="blastdb.phd" />\n+ <extra_files type="file" value="four_human_proteins_taxid.fasta.phi" name="blastdb.phi" />\n+ <extra_files type="file" value="four_human_proteins_taxid.fasta.psd" name="blastdb.psd" />\n+ <extra_files type="file" value="four_human_proteins_taxid.fasta.psi" name="blastdb.psi" />\n+ </output>\n+ </test>\n+ <test>\n+ <param name="dbtype" value="prot" />\n+ <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n+ <param name="title" value="Just 4 human proteins" />\n+ <param name="parse_seqids" value="" />\n+ <param name="hash_index" value="true" />\n+ <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" />\n+ <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">\n+ <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />\n+ <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />\n+ <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />\n+ <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />\n+ <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />\n+ <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" />\n+ <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" />\n+ <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" />\n+ </output>\n+ </test>\n+ <test>\n+ <param name="dbtype" value="nucl" />\n+ <param name="input_file" value="three_human_mRNA.fasta" ftype="fasta" />\n+ <param name="title" value="Just 3 human mRNA sequences" />\n+ <param name="parse_seqids" value="" />\n+ <param name="hash_index" value="true" />\n+ <param name="taxselect" value="id" />\n+ <param name="taxid" value="9606" />\n+ <output name="out_file" compare="contains" file="three_human_mRNA.fasta.log.txt" ftype="blastdbn">\n+ <extra_files type="file" value="three_human_mRNA.fasta.nhr" name="blastdb.nhr" />\n+ <extra_files type="file" value="three_human_mRNA.fasta.nin" name="blastdb.nin" lines_diff="2" />\n+ <extra_files type="file" value="three_human_mRNA.fasta.nsq" name="blastdb.nsq" />\n+ <extra_files type="file" value="three_human_mRNA.fasta.nog" name="blastdb.nog" />\n+ <extra_files type="file" value="three_human_mRNA.fasta.nhd" name="blastdb.nhd" />\n+ <extra_files type="file" value="three_human_mRNA.fasta.nhi" name="blastdb.nhi" />\n+ <extra_files type="file" value="three_human_mRNA.fasta.nsd" name="blastdb.nsd" />\n+ <extra_files type="file" value="three_human_mRNA.fasta.nsi" name="blastdb.nsi" />\n+ </output>\n+ </test>\n+ </tests>\n+ <help>\n+**What it does**\n+\n+Make BLAST database from one or more FASTA files and/or BLAST databases.\n+\n+This is a wrapper for the NCBI BLAST+ tool \'makeblastdb\', which is the\n+replacement for the \'formatdb\' tool in the NCBI \'legacy\' BLAST suite.\n+\n+<!--\n+Applying masks to an existing BLAST database will not change the original database; a new database will be created.\n+For this reason, it\'s best to apply all masks at once to minimize the number of unnecessary intermediate databases.\n+-->\n+\n+**Documentation**\n+\n+http://www.ncbi.nlm.nih.gov/books/NBK1763/\n+\n+**References**\n+\n+If you use this Galaxy tool in work leading to a scientific publication please\n+cite the following papers:\n+\n+@REFERENCES@\n+ </help>\n+ <expand macro="blast_citations" />\n+</tool>\n' |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_makeprofiledb.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,128 @@ +<tool id="ncbi_makeprofiledb" name="NCBI BLAST+ makeprofiledb" version="0.1.02"> + <description>Make profile database</description> + <macros> + <token name="@BINARY@">makeprofiledb</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +##Unlike makeblastdb, makeprofiledb needs directory to exist already: +mkdir -p $outfile.files_path && +makeprofiledb -out "${os.path.join($outfile.files_path,'blastdb')}" + +##We turn $infile_list into $infiles with a configfile entry defined below +-in $infiles + +#if $title: +-title "$title" +#else: +##Would default to being based on the cryptic Galaxy filenames, which is unhelpful +-title "Profile Database" +#end if + +-threshold $threshold + +#if str($contain_pssm_scores.contain_pssm_scores_type) == 'no': + -gapopen $contain_pssm_scores.gapopen + -gapextend $contain_pssm_scores.gapextend + -scale $contain_pssm_scores.scale + -matrix $contain_pssm_scores.matrix +#end if + +-obsr_threshold $obsr_threshold +-exclude_invalid $exclude_invalid + +-logfile "$outfile" + </command> + <expand macro="stdio" /> + <inputs> + <param name="input_file" type="data" multiple="true" optional="false" format="pssm-asn1" + label="Input PSSM files(s)" + help="One or NCBI PSSM ASN.1 format scoremat files (often named *.smp)" /> + <param name="infile_list" type="data" multiple="true" format="pssm-asn1" /> + + <param name="title" type="text" value="" label="Title for the profile database" help="This is the database name shown in BLAST search output" /> + <param name="threshold" type="float" size="5" value="9.82" label="Minimum word score to add a word to the lookup table" /> + + <!-- output options --> + <!-- Initially we're only offering the default, RPS databases for use with rpsblast and rpstblastn + <param name="dbtype" type="select" display="radio" label="Type of database"> + <option value="cobalt">Cobalt</option> + <option value="delta">Delta</option> + <option value="rps" selected="true">RPS</option> + </param> + --> + + <conditional name="contain_pssm_scores"> + <param name="contain_pssm_scores_type" type="select" label="Does your input file contain PSSM scores?"> + <option value="yes" selected="True">Yes</option> + <option value="no">No</option> + </param> + <when value="yes" /> + <when value="no"> + <param name="gapopen" type="integer" size="5" value="" label="Cost to open a gap" /> + <param name="gapextend" type="integer" size="5" value="" label="Cost to extend a gap" /> + <param name="scale" type="float" size="5" value="" label="PSSM scale factor" /> + <expand macro="input_scoring_matrix" /> + </when> + </conditional> + + <!-- Delta Blast Options --> + <param name="exclude_invalid" type="boolean" truevalue="true" falsevalue="false" checked="true" + label="Exclude invalid domains?" + help="Exclude domains that do not pass validation test" /> + <param name="obsr_threshold" type="float" size="5" value="6.0" + label="Observation threshold" + help="Exclude domains with with maximum number of independent observations below this threshold" /> + </inputs> + <configfiles> + <configfile name="infiles"> +#for $infile in $input_file +${infile} +#end for + </configfile> + </configfiles> + <outputs> + <data name="outfile" format="blastdbd" label="RPS database from ${on_string}" /> + </outputs> + <tests> + <test> + <param name="input_file" value="cd00003.smp,cd00008.smp" ftype="pssm-asn1" /> + <param name="title" value="Just 2 PSSM matrices" /> + <param name="contain_pssm_scores_type" value="yes" /> + <output name="out_file" file="empty_file.dat" ftype="blastdbd" > + <extra_files type="file" value="cd00003_and_cd00008.phr" name="blastdb.phr" /> + <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="cd00003_and_cd00008.psq" name="blastdb.psq" /> + <extra_files type="file" value="cd00003_and_cd00008.freq" name="blastdb.freq" /> + <extra_files type="file" value="cd00003_and_cd00008.loo" name="blastdb.loo" /> + <extra_files type="file" value="cd00003_and_cd00008.psd" name="blastdb.psd" /> + <extra_files type="file" value="cd00003_and_cd00008.psi" name="blastdb.psi" /> + <extra_files type="file" value="cd00003_and_cd00008.rps" name="blastdb.rps" /> + <extra_files type="file" value="cd00003_and_cd00008.aux" name="blastdb.aux" /> + </output> + </test> + </tests> + <help> +**What it does** + +Make a protein domain profile database (for use with RPS-BLAST or RSP-TBLASTN) +from one or more Position Specific Scoring Matrices (PSSM) files in the NCBI +"scoremat" ASN.1 format (usually named ``*.smp``). + +This is a wrapper for the NCBI BLAST+ tool 'makeprofiledb'. + +More information about makeprofiledb can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ + + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,120 @@ +<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.1.02"> + <description>Search protein domain database (PSSMs) with protein query sequence(s)</description> + <!-- If job splitting is enabled, break up the query file into parts --> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" /> + <macros> + <token name="@BINARY@">deltablast</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +rpsblast +-query "$query" +#if $db_opts.db_opts_selector == "db": + -db "${db_opts.database.fields.path}" +#elif $db_opts.db_opts_selector == "histdb": + -db "${os.path.join($db_opts.histdb.files_path,'blastdb')}" +#end if +-evalue $evalue_cutoff +@BLAST_OUTPUT@ +@THREADS@ +#if $adv_opts.adv_opts_selector=="advanced": +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_QCOV_HSP_PERC@ +## End of advanced options: +#end if + </command> + + <expand macro="stdio" /> + + <inputs> + <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> + + <expand macro="input_conditional_pssm" /> + + <expand macro="input_evalue" /> + + <expand macro="input_out_format" /> + + <expand macro="advanced_options"> + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> + <expand macro="input_filter_query_default_false" /> + <expand macro="input_max_hits" /> + <expand macro="input_parse_deflines" /> + <expand macro="input_qcov_hsp_perc" /> + </expand> + </inputs> + <outputs> + <data name="output1" format="tabular" label="rpsblast on ${on_string}"> + + <expand macro="output_change_format" /> + + </data> + </outputs> + <tests> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="db" /> + <param name="database" value="cd00003_and_cd00008" /> + <param name="evalue_cutoff" value="1e-8" /> + <param name="out_format" value="6" /> + <output name="output1" file="empty_file.dat" ftype="tabular" /> + </test> + </tests> + <help> + +@SEARCH_TIME_WARNING@ + +**What it does** + +Search a *protein domain database* using a *protein query*, +using the NCBI BLAST+ rpsblast command line tool. + +The protein domain databases use position-specific scoring matrices +(PSSMs) and are available for a number of domain collections including: + +*CDD* - NCBI curarated meta-collection of domains, see +http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains + +*Kog* - PSSMs from automatically aligned sequences and sequence +fragments classified in the KOGs resource, the eukaryotic +counterpart to COGs, see http://www.ncbi.nlm.nih.gov/COG/new/ + +*Cog* - PSSMs from automatically aligned sequences and sequence +fragments classified in the COGs resource, which focuses primarily +on prokaryotes, see http://www.ncbi.nlm.nih.gov/COG/new/ + +*Pfam* - PSSMs from Pfam-A seed alignment database, see +http://pfam.sanger.ac.uk/ + +*Smart* - PSSMs from SMART domain alignment database, see +http://smart.embl-heidelberg.de/ + +*Tigr* - PSSMs from TIGRFAM database of protein families, see +http://www.jcvi.org/cms/research/projects/tigrfams/overview/ + +*Prk* - PSSms from automatically aligned stable clusters in the +Protein Clusters database, see +http://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&db=proteinclusters + +The exact list of domain databases offered will depend on how your +local Galaxy has been configured. + +----- + +@OUTPUT_FORMAT@ + +------- + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,118 @@ +<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.1.02"> + <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description> + <!-- If job splitting is enabled, break up the query file into parts --> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism> + <macros> + <token name="@BINARY@">rpstblastn</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +rpstblastn +-query "$query" +#if $db_opts.db_opts_selector == "db": + -db "${db_opts.database.fields.path}" +#elif $db_opts.db_opts_selector == "histdb": + -db "${os.path.join($db_opts.histdb.files_path,'blastdb')}" +#end if +-evalue $evalue_cutoff +@BLAST_OUTPUT@ +## rpstblastn does not support multiple threads up to release 2.2.27+. Added in BLAST 2.2.28+. +##-num_threads 8 +#if $adv_opts.adv_opts_selector=="advanced": +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_QCOV_HSP_PERC@ +## End of advanced options: +#end if + </command> + <expand macro="stdio" /> + <inputs> + <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> + + <expand macro="input_conditional_pssm" /> + + <expand macro="input_evalue" /> + + <expand macro="input_out_format" /> + + <expand macro="advanced_options"> + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> + <expand macro="input_filter_query_default_false" /> + <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> + <expand macro="input_max_hits" /> + <expand macro="input_parse_deflines" /> + <expand macro="input_qcov_hsp_perc" /> + </expand> + </inputs> + <outputs> + <data name="output1" format="tabular" label="rpstblastn on ${on_string}"> + <expand macro="output_change_format" /> + </data> + </outputs> + <tests> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="db" /> + <param name="database" value="cd00003_and_cd00008" /> + <param name="evalue_cutoff" value="1e-8" /> + <param name="out_format" value="6" /> + <output name="output1" file="empty_file.dat" ftype="tabular" /> + </test> + </tests> + <help> + +@SEARCH_TIME_WARNING@ + +**What it does** + +Search a *protein domain database* using a *nucleotide query*, +using the NCBI BLAST+ rpstblastn command line tool. + +The protein domain databases use position-specific scoring matrices +(PSSMs) and are available for a number of domain collections including: + +*CDD* - NCBI curarated meta-collection of domains, see +http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains + +*Kog* - PSSMs from automatically aligned sequences and sequence +fragments classified in the KOGs resource, the eukaryotic +counterpart to COGs, see http://www.ncbi.nlm.nih.gov/COG/new/ + +*Cog* - PSSMs from automatically aligned sequences and sequence +fragments classified in the COGs resource, which focuses primarily +on prokaryotes, see http://www.ncbi.nlm.nih.gov/COG/new/ + +*Pfam* - PSSMs from Pfam-A seed alignment database, see +http://pfam.sanger.ac.uk/ + +*Smart* - PSSMs from SMART domain alignment database, see +http://smart.embl-heidelberg.de/ + +*Tigr* - PSSMs from TIGRFAM database of protein families, see +http://www.jcvi.org/cms/research/projects/tigrfams/overview/ + +*Prk* - PSSms from automatically aligned stable clusters in the +Protein Clusters database, see +http://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&db=proteinclusters + +The exact list of domain databases offered will depend on how your +local Galaxy has been configured. + +----- + +@OUTPUT_FORMAT@ + +------- + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,102 @@ +<tool id="ncbi_segmasker_wrapper" name="NCBI BLAST+ segmasker" version="0.1.02"> + <description>low-complexity regions in protein sequences</description> + <macros> + <token name="@BINARY@">segmasker</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +segmasker +#if $db_opts.db_opts_selector == "db": + -in "${db_opts.database.fields.path}" -infmt blastdb +#elif $db_opts.db_opts_selector == "histdb": + -in "${os.path.join($db_opts.histdb.files_path, 'blastdb')}" -infmt blastdb +#else: + -in "$subject" -infmt fasta +#end if +-out "$outfile" +-window $window +-locut $locut +-hicut $hicut +-outfmt $outformat + </command> + <expand macro="stdio" /> + <inputs> + <expand macro="input_conditional_protein_db" /> + <param name="window" type="integer" value="12" label="SEG window length" help="(-window)" /> + <param name="locut" type="float" value="2.2" label="SEG low cutoff" help="(-locut)" /> + <param name="hicut" type="float" value="2.5" label="SEG high cutoff" help="(-hicut)" /> + <param name="outformat" type="select" label="Output format"> + <!-- seqloc_* formats are not very useful + and what BLAST+ calls 'interval' is not what Galaxy calls interval format + --> + <option value="fasta">FASTA</option> + <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option> + <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option> + <option value="maskinfo_xml">maskinfo_xml</option> + </param> + </inputs> + <outputs> + <data name="outfile" format="maskinfo-asn1" label="SEG Masked File"> + <change_format> + <when input="outformat" value="fasta" format="fasta" /> + <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" /> + <!-- + <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" /> + --> + <when input="outformat" value="maskinfo_xml" format="xml" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="window" value="12" /> + <param name="locut" value="2.2" /> + <param name="hicut" value="2.5" /> + <param name="outformat" value="fasta" /> + <output name="outfile" file="segmasker_four_human.fasta" /> + </test> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="window" value="12" /> + <param name="locut" value="2.2" /> + <param name="hicut" value="2.5" /> + <param name="outformat" value="maskinfo_asn1_bin" /> + <output name="outfile" file="segmasker_four_human.maskinfo-asn1-binary" /> + </test> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="window" value="12" /> + <param name="locut" value="2.2" /> + <param name="hicut" value="2.5" /> + <param name="outformat" value="maskinfo_asn1_text" /> + <output name="outfile" file="segmasker_four_human.maskinfo-asn1" /> + </test> + </tests> + <help> +**What it does** + +This tool identifies and masks out low complexity regions of a protein database (or proteins in FASTA format) by using the SEG_ algorithm. + +If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool. + +More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ +.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706 + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,173 @@ +<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.1.02"> + <description>Search translated nucleotide database with protein query sequence(s)</description> + <!-- If job splitting is enabled, break up the query file into parts --> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism> + <macros> + <token name="@BINARY@">tblastn</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +tblastn +-query "$query" +@BLAST_DB_SUBJECT@ +-task $blast_type +-evalue $evalue_cutoff +@BLAST_OUTPUT@ +@THREADS@ +#if $adv_opts.adv_opts_selector=="advanced": +-db_gencode $adv_opts.db_gencode +-matrix $adv_opts.matrix +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_WORD_SIZE@ +##Ungapped disabled for now - see comments below +##$adv_opts.ungapped +@ADV_ID_LIST_FILTER@ +@ADV_QCOV_HSP_PERC@ +## End of advanced options: +#end if + </command> + + <expand macro="stdio" /> + + <inputs> + <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> + + <expand macro="input_conditional_nucleotide_db" /> + <param name="blast_type" type="select" display="radio" label="Type of BLAST"> + <option value="tblastn">tblastn - Traditional TBLASTN to compare protein query to translated nucleotide database</option> + <option value="tblastn-fast">tblastn-fast - Use longer words for seeding, faster but less accurate</option> + </param> + <expand macro="input_evalue" /> + <expand macro="input_out_format" /> + <expand macro="advanced_options"> + <expand macro="input_db_gencode" /> + + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> + <expand macro="input_filter_query_default_true" /> + <expand macro="input_scoring_matrix" /> + <expand macro="input_max_hits" /> + <expand macro="input_word_size" /> + <!-- + Can't use '-ungapped' on its own, error back is: + Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search + Tried using '-ungapped -comp_based_stats F' and tblastn crashed with 'Attempt to access NULL pointer.' + <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" /> + --> + <expand macro="input_parse_deflines" /> + <expand macro="advanced_optional_id_files" /> + <expand macro="input_qcov_hsp_perc" /> + </expand> + </inputs> + <outputs> + <data name="output1" format="tabular" label="tblastn $query.name vs @ON_DB_SUBJECT@"> + <expand macro="output_change_format" /> + </data> + </outputs> + <tests> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="5" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="false" /> + <param name="matrix" value="BLOSUM80" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="false" /> + <output name="output1" file="tblastn_four_human_vs_rhodopsin.xml" ftype="blastxml" /> + </test> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="ext" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="false" /> + <param name="matrix" value="BLOSUM80" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="false" /> + <output name="output1" file="tblastn_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="6" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="false" /> + <param name="matrix" value="BLOSUM80" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="false" /> + <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" /> + </test> + <test> + <!-- Same as above, but parse deflines - on BLAST 2.2.25+ - 2.2.27+ makes no difference --> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="6" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="false" /> + <param name="matrix" value="BLOSUM80" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="true" /> + <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" /> + </test> + <test> + <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="0 -html" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="filter_query" value="false" /> + <param name="matrix" value="BLOSUM80" /> + <param name="max_hits" value="0" /> + <param name="word_size" value="0" /> + <param name="parse_deflines" value="false" /> + <output name="output1" file="tblastn_four_human_vs_rhodopsin.html" ftype="html" /> + </test> + </tests> + <help> + +@SEARCH_TIME_WARNING@ + +**What it does** + +Search a *translated nucleotide database* using a *protein query*, +using the NCBI BLAST+ tblastn command line tool. + +@FASTA_WARNING@ + +----- + +@OUTPUT_FORMAT@ + +------- + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,100 @@ +<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.1.02"> + <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description> + <!-- If job splitting is enabled, break up the query file into parts --> + <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism> + <macros> + <token name="@BINARY@">tblastx</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +tblastx +-query "$query" +@BLAST_DB_SUBJECT@ +-query_gencode $query_gencode +-evalue $evalue_cutoff +@BLAST_OUTPUT@ +@THREADS@ +#if $adv_opts.adv_opts_selector=="advanced": +-db_gencode $adv_opts.db_gencode +$adv_opts.strand +-matrix $adv_opts.matrix +@ADV_FILTER_QUERY@ +@ADV_MAX_HITS@ +@ADV_WORD_SIZE@ +@ADV_ID_LIST_FILTER@ +@ADV_QCOV_HSP_PERC@ +## End of advanced options: +#end if + </command> + + <expand macro="stdio" /> + + <inputs> + <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> + + <expand macro="input_conditional_nucleotide_db" /> + <expand macro="input_query_gencode" /> + <expand macro="input_evalue" /> + + <expand macro="input_out_format" /> + <expand macro="advanced_options"> + <expand macro="input_db_gencode" /> + + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> + <expand macro="input_filter_query_default_true" /> + <expand macro="input_strand" /> + <expand macro="input_scoring_matrix" /> + <expand macro="input_max_hits" /> + <!-- I'd like word_size to be optional, with minimum 2 for tblastx --> + <expand macro="input_word_size" /> + <expand macro="input_parse_deflines" /> + <expand macro="advanced_optional_id_files" /> + <expand macro="input_qcov_hsp_perc" /> + </expand> + </inputs> + <outputs> + <data name="output1" format="tabular" label="tblastx $query.name vs @ON_DB_SUBJECT@"> + <expand macro="output_change_format" /> + </data> + </outputs> + <tests> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-40" /> + <param name="out_format" value="6" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="tblastx_rhodopsin_vs_three_human.tabular" ftype="tabular" /> + </test> + </tests> + <help> + +@SEARCH_TIME_WARNING@ + +**What it does** + +Search a *translated nucleotide database* using a *translated nucleotide query*, +using the NCBI BLAST+ tblastx command line tool. + +@FASTA_WARNING@ + +----- + +@OUTPUT_FORMAT@ + +------- + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations" /> +</tool> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/repository_dependencies.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format)."> + <repository changeset_revision="236046d1d441" name="blast_datatypes" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> +</repositories> |
| b |
| diff -r 000000000000 -r 432ea9614cc9 tools/ncbi_blast_plus/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/tool_dependencies.xml Fri Jan 30 08:27:28 2015 -0500 |
| b |
| @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="blast+" version="2.2.30"> + <repository changeset_revision="f69b90d89b62" name="package_blast_plus_2_2_30" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |