Mercurial > repos > peterjc > ncbi_blast_plus
changeset 39:22b7cdcf4960 draft
Uploaded v0.1.0 preview 2, includes missing new test files
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human.columns.tabular Thu Feb 20 05:39:48 2014 -0500 @@ -0,0 +1,7 @@ +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 4301 1213 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 983 1213 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1047 1213
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human.xml Thu Feb 20 05:39:48 2014 -0500 @@ -0,0 +1,549 @@ +<?xml version="1.0"?> +<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> +<BlastOutput> + <BlastOutput_program>blastn</BlastOutput_program> + <BlastOutput_version>BLASTN 2.2.29+</BlastOutput_version> + <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), "A greedy algorithm for aligning DNA sequences", J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference> + <BlastOutput_db></BlastOutput_db> + <BlastOutput_query-ID>Query_1</BlastOutput_query-ID> + <BlastOutput_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</BlastOutput_query-def> + <BlastOutput_query-len>1047</BlastOutput_query-len> + <BlastOutput_param> + <Parameters> + <Parameters_expect>1e-40</Parameters_expect> + <Parameters_sc-match>1</Parameters_sc-match> + <Parameters_sc-mismatch>-2</Parameters_sc-mismatch> + <Parameters_gap-open>0</Parameters_gap-open> + <Parameters_gap-extend>0</Parameters_gap-extend> + <Parameters_filter>L;m;</Parameters_filter> + </Parameters> + </BlastOutput_param> +<BlastOutput_iterations> +<Iteration> + <Iteration_iter-num>1</Iteration_iter-num> + <Iteration_query-ID>Query_1</Iteration_query-ID> + <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>2</Iteration_iter-num> + <Iteration_query-ID>Query_1</Iteration_query-ID> + <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>3</Iteration_iter-num> + <Iteration_query-ID>Query_1</Iteration_query-ID> + <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +<Hit> + <Hit_num>1</Hit_num> + <Hit_id>Subject_3</Hit_id> + <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def> + <Hit_accession>Subject_3</Hit_accession> + <Hit_len>1213</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>1474.75</Hsp_bit-score> + <Hsp_score>798</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>1047</Hsp_query-to> + <Hsp_hit-from>88</Hsp_hit-from> + <Hsp_hit-to>1134</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>964</Hsp_identity> + <Hsp_positive>964</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>1047</Hsp_align-len> + <Hsp_qseq>ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCCTTCTCCAACAAAACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTCCTGCTCATCGTGCTTGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACGGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCTGACCTCTTCATGGTCTTCGGTGGCTTCACCACCACCCTCTACACCTCTCTGCATGGATACTTTGTCTTTGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACACTGGGCGGTGAAATTGCCCTGTGGTCTTTGGTGGTCCTGGCCATTGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCATGCCATAATGGGCGTCGCTTTCACCTGGGTCATGGCACTGGCCTGCGCTGCACCCCCCCTCGTTGGTTGGTCCAGGTACATCCCTGAAGGCATGCAGTGTTCATGCGGGATCGACTACTACACACTCAAGCCAGAAGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATCGTCATCTTCTTTTGCTACGGGCAGCTTGTCTTCACAGTCAAGGAGGCGGCAGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCTGAGAAGGAGGTCACTCGCATGGTCATCATCATGGTCATTGCTTTCCTGATCTGTTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGGTCCAACTTTGGCCCCATCTTCATGACACTCCCGGCGTTCTTCGCAAAGTCCTCCTCCATCTACAACCCTGTCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCCACTGGGTGATGACGAGGCTTCCACAACCGGTTCCAAGACGGAGACCAGCCAGGTGGCACCGGCCTAA</Hsp_qseq> + <Hsp_hseq>ATGAATGGCACAGAAGGCCCTAACTTCTACGTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGATGAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAA</Hsp_hseq> + <Hsp_midline>||||| || || || ||||| ||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ||||| |||||||| ||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||| | ||||||||||||| |||||||||||||||||||||||||||||| ||||| |||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||| ||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||| ||||||||||||||||| |||||||| || ||||||||||||||||| ||||||||||| |||||||| |||| || ||||||||||||||||| || ||| ||||||| || || || |||||||||||||| |||||||| || |||||||||||||| |||||||||||||||||||||||||||||||||||||||||||| ||||||| || ||||| |||||||| |||||||| ||||||||||| || |||||||||||||||||||||||||| |||||||| |||||||||||||| ||||||||||||||||||||||| |||||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||| |||||||| || |||||||||||||| |||| |||||||| || ||| | || |||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||| ||| ||||||| |||||||||||||||||||| || ||||| || | |||| |||||||||||||| ||||||||||| |||||||||</Hsp_midline> + </Hsp> + </Hit_hsps> +</Hit> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> +</Iteration> +<Iteration> + <Iteration_iter-num>4</Iteration_iter-num> + <Iteration_query-ID>Query_2</Iteration_query-ID> + <Iteration_query-def>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds</Iteration_query-def> + <Iteration_query-len>1574</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>7453579</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>5</Iteration_iter-num> + <Iteration_query-ID>Query_2</Iteration_query-ID> + <Iteration_query-def>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds</Iteration_query-def> + <Iteration_query-len>1574</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>7453579</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>6</Iteration_iter-num> + <Iteration_query-ID>Query_2</Iteration_query-ID> + <Iteration_query-def>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds</Iteration_query-def> + <Iteration_query-len>1574</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>7453579</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>7</Iteration_iter-num> + <Iteration_query-ID>Query_3</Iteration_query-ID> + <Iteration_query-def>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds</Iteration_query-def> + <Iteration_query-len>4301</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>16</Statistics_hsp-len> + <Statistics_eff-space>20482300</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>8</Iteration_iter-num> + <Iteration_query-ID>Query_3</Iteration_query-ID> + <Iteration_query-def>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds</Iteration_query-def> + <Iteration_query-len>4301</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>16</Statistics_hsp-len> + <Statistics_eff-space>20482300</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>9</Iteration_iter-num> + <Iteration_query-ID>Query_3</Iteration_query-ID> + <Iteration_query-def>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds</Iteration_query-def> + <Iteration_query-len>4301</Iteration_query-len> +<Iteration_hits> +<Hit> + <Hit_num>1</Hit_num> + <Hit_id>Subject_3</Hit_id> + <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def> + <Hit_accession>Subject_3</Hit_accession> + <Hit_len>1213</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>460.936</Hsp_bit-score> + <Hsp_score>249</Hsp_score> + <Hsp_evalue>3.59583e-132</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>333</Hsp_query-to> + <Hsp_hit-from>118</Hsp_hit-from> + <Hsp_hit-to>450</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>305</Hsp_identity> + <Hsp_positive>305</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>333</Hsp_align-len> + <Hsp_qseq>GTGCCCTTCTCCAACAAGACAGGCGTGGTGCGCAGTCCCTTCGAGCATCCACAGTACTACCTGGCCGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTCGGCTTCCCCATCAACTTCCTCACGCTCTATGTCACGGTTCAGCACAAGAAGCTGCGTACGCCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCCGACCTCTTCATGGTCTTCGGAGGCTTCACCACCACCCTCTACACCTCCCTGCATGGATACTTTGTCTTCGGGCCTACGGGATGCAATCTGGAGGGCTTTTTTGCCACCCTGGGAGGT</Hsp_qseq> + <Hsp_hseq>GTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGT</Hsp_hseq> + <Hsp_midline>|||||||||||||| ||| || ||||| ||||| ||||||||| | ||||||||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||||||||| ||||||||||||||||||||||||||||| ||||| || ||||||||||||||||| ||||||||||||||||||||||||||||| |||||||| ||||||||||||||| | || |||||||||| ||||||||||||||| |||||||||||||| ||||||||||| || ||||||||| |||||||||| |||||||||||||| |||</Hsp_midline> + </Hsp> + <Hsp> + <Hsp_num>2</Hsp_num> + <Hsp_bit-score>331.671</Hsp_bit-score> + <Hsp_score>179</Hsp_score> + <Hsp_evalue>2.94161e-93</Hsp_evalue> + <Hsp_query-from>3127</Hsp_query-from> + <Hsp_query-to>3368</Hsp_query-to> + <Hsp_hit-from>782</Hsp_hit-from> + <Hsp_hit-to>1023</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>222</Hsp_identity> + <Hsp_positive>222</Hsp_positive> + <Hsp_gaps>2</Hsp_gaps> + <Hsp_align-len>243</Hsp_align-len> + <Hsp_qseq>AGGCAGCTGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCCGAGAAGGAGGTCACCCGTATGGTCATCATCATGGTCATTGCTTTCCTAATCTGTTGGCTGCCGTATGCCGGCGTGGCATTCTACATCTTCACCCACCAGGGCTCTAACTTTGGCCCCATCTTCATGACCCTCCCGGCATTCTTTGCCAAG-TCGTCCTCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAG</Hsp_qseq> + <Hsp_hseq>AGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCG-CCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAG</Hsp_hseq> + <Hsp_midline>|||| ||||||||||||||||||||||||||||| |||||||| ||||||||||||||||| |||||||||||||||||||| |||||||| ||||| ||| |||| || ||| |||||||||||||||||||||||||||||||||| ||||| || ||||||||||||||| |||| || |||||||||||| || || |||||||||||||||||||||||||||||||||||||||||</Hsp_midline> + </Hsp> + <Hsp> + <Hsp_num>3</Hsp_num> + <Hsp_bit-score>265.191</Hsp_bit-score> + <Hsp_score>143</Hsp_score> + <Hsp_evalue>3.02604e-73</Hsp_evalue> + <Hsp_query-from>1410</Hsp_query-from> + <Hsp_query-to>1582</Hsp_query-to> + <Hsp_hit-from>448</Hsp_hit-from> + <Hsp_hit-to>620</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>163</Hsp_identity> + <Hsp_positive>163</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>173</Hsp_align-len> + <Hsp_qseq>GGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTATGCAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCCTTGCCCTCACCTGGGTCATGGCACTGGCCTGCGCCGCGCCCCCGCTAGTCGGCTGGTCCAGGTA</Hsp_qseq> + <Hsp_hseq>GGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTA</Hsp_hseq> + <Hsp_midline>|||||||||||||||||||||||||||||||||||||||||||||||||||||||| || ||||||||||||||||||||||||||||||||||| |||||||||||| ||||| |||||||||||||||| |||||||||||||| ||||| || | |||||||||||||||</Hsp_midline> + </Hsp> + <Hsp> + <Hsp_num>4</Hsp_num> + <Hsp_bit-score>248.571</Hsp_bit-score> + <Hsp_score>134</Hsp_score> + <Hsp_evalue>3.04752e-68</Hsp_evalue> + <Hsp_query-from>2854</Hsp_query-from> + <Hsp_query-to>3023</Hsp_query-to> + <Hsp_hit-from>615</Hsp_hit-from> + <Hsp_hit-to>784</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>158</Hsp_identity> + <Hsp_positive>158</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>170</Hsp_align-len> + <Hsp_qseq>CAGGTACATCCCAGAGGGCATGCAGTGCTCATGTGGAATCGACTACTACACCCTCAAGCCGGAGGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCTATGATTGTCATATTCTTTTGCTATGGACAGCTGGTCTTCACCGTCAAGGAGG</Hsp_qseq> + <Hsp_hseq>CAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGG</Hsp_hseq> + <Hsp_midline>|||||||||||| |||||| |||||||||| |||||||||||||||||||| |||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||| |||||| |||| || || |||||||| ||||| |||||||||||||||||||</Hsp_midline> + </Hsp> + </Hit_hsps> +</Hit> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>16</Statistics_hsp-len> + <Statistics_eff-space>20482300</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> +</Iteration> +<Iteration> + <Iteration_iter-num>10</Iteration_iter-num> + <Iteration_query-ID>Query_4</Iteration_query-ID> + <Iteration_query-def>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds</Iteration_query-def> + <Iteration_query-len>983</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4628008</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>11</Iteration_iter-num> + <Iteration_query-ID>Query_4</Iteration_query-ID> + <Iteration_query-def>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds</Iteration_query-def> + <Iteration_query-len>983</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4628008</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>12</Iteration_iter-num> + <Iteration_query-ID>Query_4</Iteration_query-ID> + <Iteration_query-def>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds</Iteration_query-def> + <Iteration_query-len>983</Iteration_query-len> +<Iteration_hits> +<Hit> + <Hit_num>1</Hit_num> + <Hit_id>Subject_3</Hit_id> + <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def> + <Hit_accession>Subject_3</Hit_accession> + <Hit_len>1213</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>1323.32</Hsp_bit-score> + <Hsp_score>716</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>959</Hsp_query-to> + <Hsp_hit-from>118</Hsp_hit-from> + <Hsp_hit-to>1076</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>878</Hsp_identity> + <Hsp_positive>878</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>959</Hsp_align-len> + <Hsp_qseq>GTGCCCTTCTCCAACAAGACGGGTGTGGTGCGCAGCCCCTTCGAGTACCCGCAGTACTACCTGGCTGAGCCCTGGCAGTTCTCCATGCTGGCTGCCTACATGTTTCTGCTGATCGTGCTCGGATTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCTGTGGCCAACCTCTTCATGGTCTTTGGAGGCTTCACCACCACCCTGTATACCTCTATGCATGGATACTTCGTCTTCGGGGCCACGGGATGCAATCTGGAGGGCTTCTTTGCCACGCTGGGCGGTGAAATCGCCCTGTGGTCCCTGGTGGTCCTGGCCATCGAGCGGTATGTGGTGGTCTGCAAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCACGCCATCATGGGCCTCGCCTTCACGTGGGTCATGGCACTGGCCTGCGCTGCACCCCCACTAGCCGGCTGGTCCAGGTACATCCCAGAGGGCATGCAGTGCTCGTGTGGGATTGACTACTACACGCTCAAACCGGAGGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTGTCATTTTCTTCTGCTACGGACAGCTGGTGTTCACAGTGAAGGAGGCGGCTGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCCGAGAAGGAAGTCACGCGCATGGTCATCATCATGGTCGTTGCGTTCCTAATCTGTTGGCTGCCCTACGCCAGCGTGGCATTCTACATCTTTACCCACCAGGGCTCTAACTTTGGCCCTGTCTTCATGACCATCCCGGCATTCTTCGCCAAGTCATCCTCCATCTACAACCCGGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCCTCTGCTGTGGCAAGAACCCACTGGGTGA</Hsp_qseq> + <Hsp_hseq>GTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGA</Hsp_hseq> + <Hsp_midline>|||||||||||||| |||||||||||| |||||||||||||||||||| |||||||||||||||||||| |||||||||||||||||||| |||||||||||||||||||||||||| || ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| || ||||| |||||||||||||| | || |||||||||| |||||| || |||||| ||||||||||||||||||||||| |||| ||||||||| ||||||||||||||||||| |||||||||||||| |||||||||||| ||||||||||||||||||||||||| |||||||| || ||||||||||||||||||||||| ||||||||||| |||||||||||| | |||||||| ||||||||||| ||||||||||| ||||||||||| ||||||||||||||||||||||| |||||| |||||||||||||||| || ||||||||||||||||| |||||||||||||||||||| || |||||||||||||||||||||||||||||||||||||||||| |||| || |||||||| || ||||| || ||||| || |||||||| ||||||||||||||||||||||||||||| |||||||| |||||||| ||||| ||||||||||||||||||||| | || ||||| ||||| ||| ||||||||||||||||||||||||||||||| |||||||||||||| ||||| || || |||||||||||||||| || ||||| |||||| || ||||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||||||| ||||||| ||||||||||||||||||||</Hsp_midline> + </Hsp> + </Hit_hsps> +</Hit> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4628008</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> +</Iteration> +<Iteration> + <Iteration_iter-num>13</Iteration_iter-num> + <Iteration_query-ID>Query_5</Iteration_query-ID> + <Iteration_query-def>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>14</Iteration_iter-num> + <Iteration_query-ID>Query_5</Iteration_query-ID> + <Iteration_query-def>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>15</Iteration_iter-num> + <Iteration_query-ID>Query_5</Iteration_query-ID> + <Iteration_query-def>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +<Hit> + <Hit_num>1</Hit_num> + <Hit_id>Subject_3</Hit_id> + <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def> + <Hit_accession>Subject_3</Hit_accession> + <Hit_len>1213</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>1208.83</Hsp_bit-score> + <Hsp_score>654</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>1047</Hsp_query-to> + <Hsp_hit-from>88</Hsp_hit-from> + <Hsp_hit-to>1134</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>917</Hsp_identity> + <Hsp_positive>917</Hsp_positive> + <Hsp_gaps>2</Hsp_gaps> + <Hsp_align-len>1048</Hsp_align-len> + <Hsp_qseq>ATGAACGGGACCGAGGGCCCAAACTTCTACGTGCCTTTCTCCAACAAGACGGGCGTCGTACGCAGCCCCTTCGAGGCGCCGCAGTACTACCTGGCTGAGCCATGGCAGTTCAGCATGCTGGCCGCCTACATGTTCCTGCTGATCATGCTTGGCTTCCCCATCAACTTCCTCACGCTGTACGTCACAGTCCAGCACAAGAAGCTGAGGACCCCCCTCAACTACATCCTGCTCAACCTGGCCGTGGCAGATCTCTTCATGGTGTTCGGGGGCTTCACCACCACCCTGTATACCTCTCTGCACGGGTACTTCGTGTTCGGTCCGACGGGCTGCAACCTCGAGGGCTTCTTTGCCACCTTAGGCGGTGAAATTGCACTGTGGTCCTTGGTGGTGCTAGCCATCGAGCGGTACGTAGTGGTGTGCAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCGTCGCATTCACCTGGGTCATGGCTCTGGCCTGTGCGGCCCCCCCCCTCGTCGGCTGGTCTAGATACATCCCGGAGGGGATGCAGTGCTCGTGCGGGATCGATTACTACACGCCCCACGAGGAGACCAACAATGAGTCGTTCGTCATCTACATGTTCGTTGTACACTTCATCATCCCCCTGATTGTCATATTCTTCTGCTACGGGCAGCTGGTCTTCACCGTCAAGGAGGCTGCAGCCCAGCAGCAGGAGTCGGCCACCACTCAGAAGGCCGAGAAGGAGGTCACGCGTATGGTCATCATCATGGTCATCGCTTTCCTCATATGCTGGCTGCCCTACGCAGGTGTGGCGTTCTACATCTTCACCCATCAGGGATCCGACTTTGGCCCCATCTTCATGACCATCCCGGCTTTCTTTGCCAAGA-CGTCTGCCGTCTATAACCCCGTCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGGTCACCACTCTCTGCTGTGGCAAGAACCCCCTAGGTGACGACGAGGCCTCCACGACCGTGTCCAAGACAGAGACCAGCCAAGTGGCCCCTGCCTAA</Hsp_qseq> + <Hsp_hseq>ATGAATGGCACAGAAGGCCCTAACTTCTACGTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCC-GCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGATGAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAA</Hsp_hseq> + <Hsp_midline>||||| || || || ||||| |||||||||||||| |||||||| |||||| || |||||||||||||||||| || |||||||||||||||||||||||||||||| ||||||||||||||||||||| ||||||||| |||| |||||||||||||||||||||||||| |||||||| |||||||||||||||||| | || || ||||||||||||||||||||||| |||||||| || ||||||||||| | || |||||||||| |||||| || ||||||||||| || |||||||| ||||| || || || ||||| | |||||||||||||||||| | |||||||||||||| ||||||||||||||||| || ||||||||||||||||| |||||||| ||||||||||||||||||||||||||||||||||| |||||||||||||| || ||||||||||||||||| |||||||| || || ||||| |||| ||||||||| || |||||||| ||||| ||||||||||||| || ||||| |||||||||| | | |||| |||||| ||||| || ||||||||||||||||| || ||||||| ||||||| ||||| |||| || |||||||| |||||||| |||||||||||||||||||| || ||||||||||||||||| |||||||| |||||||| |||||||||||||| || ||||||||||||||||||||||||||||| || |||||| |||||||||| | ||||| ||||||||||||||||| ||||| ||| |||| || |||||||||||||||||||| || ||||||||||||| || | ||| |||| ||||| |||||||| ||||||||||||||||||||||||||||||||| ||||||| ||||||| ||||||||||| || |||||||| |||||||| | |||||||||||||| ||||| ||||| |||||||| ||||||</Hsp_midline> + </Hsp> + </Hit_hsps> +</Hit> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> +</Iteration> +<Iteration> + <Iteration_iter-num>16</Iteration_iter-num> + <Iteration_query-ID>Query_6</Iteration_query-ID> + <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def> + <Iteration_query-len>1344</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>6353949</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>17</Iteration_iter-num> + <Iteration_query-ID>Query_6</Iteration_query-ID> + <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def> + <Iteration_query-len>1344</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>6353949</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>18</Iteration_iter-num> + <Iteration_query-ID>Query_6</Iteration_query-ID> + <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def> + <Iteration_query-len>1344</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>6353949</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +</BlastOutput_iterations> +</BlastOutput> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.log Thu Feb 20 05:39:48 2014 -0500 @@ -0,0 +1,10 @@ + + +Building a new DB, current time: 02/10/2014 18:40:09 +New DB name: four_human_proteins_taxid.fasta +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B +Adding sequences from FASTA; added 4 sequences in 0.00230002 seconds.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.phd Thu Feb 20 05:39:48 2014 -0500 @@ -0,0 +1,4 @@ +11117184492 +29249033410 +36665887501 +5392473183
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.psd Thu Feb 20 05:39:48 2014 -0500 @@ -0,0 +1,4 @@ +gnl|bl_ord_id|00 +gnl|bl_ord_id|11 +gnl|bl_ord_id|22 +gnl|bl_ord_id|33
--- a/tools/ncbi_blast_plus/README.rst Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/README.rst Thu Feb 20 05:39:48 2014 -0500 @@ -25,17 +25,7 @@ (``blastxml``) and protein and nucleotide BLAST databases (``blastdbp`` and ``blastdbn``). -You must tell Galaxy about any system level BLAST databases using configuration -files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein -databases like NR), and blastdb_d.loc (protein domain databases like CDD or -SMART) which are located in the tool-data/ folder. Sample files are included -which explain the tab-based format to use. - -You can download the NCBI provided databases as tar-balls from here: - -* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR) -* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD) - +See the configuration notes below. Manual Installation =================== @@ -78,6 +68,31 @@ ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools +Configuration +============= + +You must tell Galaxy about any system level BLAST databases using configuration +files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein +databases like NR), and blastdb_d.loc (protein domain databases like CDD or +SMART) which are located in the tool-data/ folder. Sample files are included +which explain the tab-based format to use. + +You can download the NCBI provided databases as tar-balls from here: + +* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR) +* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD) + +The BLAST+ binaries support multi-threaded operation, which is handled via the +$GALAXY_SLOTS environment variable. This should be set automatically by Galaxy +via your job runner settings, which allows you to (for example) allocate four +cores to each BLAST job. + +In addition, the BLAST+ wrappers also support high level parallelism by task +splitting if ``use_tasked_jobs = True`` is enabled in your ``universe_wsgi.ini`` +configuration file. Essentially, the FASTA input query files are broken up into +batches of 1000 sequences, a separate BLAST child job is run for each chunk, +and then the BLAST output files are merged (in order). This is transparent +for the end user. History ======= @@ -105,7 +120,7 @@ (all too often our users where having to re-run searches just to get one of the missing columns like query or subject length) v0.0.18 - Defensive quoting of filenames in case of spaces (where possible, - BLAST+ handling of some mult-file arguments is problematic). + BLAST+ handling of some multi-file arguments is problematic). v0.0.19 - Added wrappers for rpsblast and rpstblastn, and new blastdb_d.loc for the domain databases they use (e.g. CDD, PFAM or SMART). - Correct case of exception regular expression (for error handling @@ -139,6 +154,7 @@ - Tablar output now includes option to pick specific columns - BLAST XML to tabular tool supports multiple input files. - More detailed descriptions for BLASTN and BLASTP task option + - Supports setting a taxonomy ID in makeblastdb wrapper. ======= ======================================================================
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Thu Feb 20 05:39:48 2014 -0500 @@ -66,7 +66,7 @@ from optparse import OptionParser if "-v" in sys.argv or "--version" in sys.argv: - print "v0.0.23" + print "v0.1.00" sys.exit(0) if sys.version_info[:2] >= ( 2, 5 ):
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Thu Feb 20 05:39:48 2014 -0500 @@ -28,33 +28,33 @@ <when value="ext"/> <when value="cols"> <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns"> - <option selected="true" value="qseqid">1 qseqid = Query Seq-id (ID of your sequence)</option> - <option selected="true" value="sseqid">2 sseqid = Subject Seq-id (ID of the database hit)</option> - <option selected="true" value="pident">3 pident = Percentage of identical matches</option> - <option selected="true" value="length">4 length = Alignment length</option> - <option selected="true" value="mismatch">5 mismatch = Number of mismatches</option> - <option selected="true" value="gapopen">6 gapopen = Number of gap openings</option> - <option selected="true" value="qstart">7 qstart = Start of alignment in query</option> - <option selected="true" value="qend">8 qend = End of alignment in query</option> - <option selected="true" value="sstart">9 sstart = Start of alignment in subject (database hit)</option> - <option selected="true" value="send">10 send = End of alignment in subject (database hit)</option> - <option selected="true" value="evalue">11 evalue = Expectation value (E-value)</option> - <option selected="true" value="bitscore">12 bitscore = Bit score</option> + <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option> + <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option> + <option selected="true" value="pident">pident = Percentage of identical matches</option> + <option selected="true" value="length">length = Alignment length</option> + <option selected="true" value="mismatch">mismatch = Number of mismatches</option> + <option selected="true" value="gapopen">gapopen = Number of gap openings</option> + <option selected="true" value="qstart">qstart = Start of alignment in query</option> + <option selected="true" value="qend">qend = End of alignment in query</option> + <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option> + <option selected="true" value="send">send = End of alignment in subject (database hit)</option> + <option selected="true" value="evalue">evalue = Expectation value (E-value)</option> + <option selected="true" value="bitscore">bitscore = Bit score</option> </param> <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns"> - <option value="sallseqid">13 sallseqid = All subject Seq-id(s), separated by a ';'</option> - <option value="score">14 score = Raw score</option> - <option value="nident">15 nident = Number of identical matches</option> - <option value="positive">16 positive = Number of positive-scoring matches</option> - <option value="gaps">17 gaps = Total number of gaps</option> - <option value="ppos">18 ppos = Percentage of positive-scoring matches</option> - <option value="qframe">19 qframe = Query frame</option> - <option value="sframe">20 sframe = Subject frame</option> - <option value="qseq">21 qseq = Aligned part of query sequence</option> - <option value="sseq">22 sseq = Aligned part of subject sequence</option> - <option value="qlen">23 qlen = Query sequence length</option> - <option value="slen">24 slen = Subject sequence length</option> - <option value="salltitles">25 salltitles = All subject title(s), separated by a '<>'</option> + <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option> + <option value="score">score = Raw score</option> + <option value="nident">nident = Number of identical matches</option> + <option value="positive">positive = Number of positive-scoring matches</option> + <option value="gaps">gaps = Total number of gaps</option> + <option value="ppos">ppos = Percentage of positive-scoring matches</option> + <option value="qframe">qframe = Query frame</option> + <option value="sframe">sframe = Subject frame</option> + <option value="qseq">qseq = Aligned part of query sequence</option> + <option value="sseq">sseq = Aligned part of subject sequence</option> + <option value="qlen">qlen = Query sequence length</option> + <option value="slen">slen = Subject sequence length</option> + <option value="salltitles">salltitles = All subject title(s), separated by a '<>'</option> </param> </when> </conditional>
--- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Thu Feb 20 05:39:48 2014 -0500 @@ -83,13 +83,14 @@ <help> **What it does** -This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST algorithm. +This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST_ algorithm. If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool. More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_. .. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ +.. _DUST: http://www.ncbi.nlm.nih.gov/pubmed/16796549 **References**
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Thu Feb 20 05:39:48 2014 -0500 @@ -31,33 +31,33 @@ <when value="ext"/> <when value="cols"> <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns"> - <option selected="true" value="qseqid">1 qseqid = Query Seq-id (ID of your sequence)</option> - <option selected="true" value="sseqid">2 sseqid = Subject Seq-id (ID of the database hit)</option> - <option selected="true" value="pident">3 pident = Percentage of identical matches</option> - <option selected="true" value="length">4 length = Alignment length</option> - <option selected="true" value="mismatch">5 mismatch = Number of mismatches</option> - <option selected="true" value="gapopen">6 gapopen = Number of gap openings</option> - <option selected="true" value="qstart">7 qstart = Start of alignment in query</option> - <option selected="true" value="qend">8 qend = End of alignment in query</option> - <option selected="true" value="sstart">9 sstart = Start of alignment in subject (database hit)</option> - <option selected="true" value="send">10 send = End of alignment in subject (database hit)</option> - <option selected="true" value="evalue">11 evalue = Expectation value (E-value)</option> - <option selected="true" value="bitscore">12 bitscore = Bit score</option> + <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option> + <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option> + <option selected="true" value="pident">pident = Percentage of identical matches</option> + <option selected="true" value="length">length = Alignment length</option> + <option selected="true" value="mismatch">mismatch = Number of mismatches</option> + <option selected="true" value="gapopen">gapopen = Number of gap openings</option> + <option selected="true" value="qstart">qstart = Start of alignment in query</option> + <option selected="true" value="qend">qend = End of alignment in query</option> + <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option> + <option selected="true" value="send">send = End of alignment in subject (database hit)</option> + <option selected="true" value="evalue">evalue = Expectation value (E-value)</option> + <option selected="true" value="bitscore">bitscore = Bit score</option> </param> <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns"> - <option value="sallseqid">13 sallseqid = All subject Seq-id(s), separated by a ';'</option> - <option value="score">14 score = Raw score</option> - <option value="nident">15 nident = Number of identical matches</option> - <option value="positive">16 positive = Number of positive-scoring matches</option> - <option value="gaps">17 gaps = Total number of gaps</option> - <option value="ppos">18 ppos = Percentage of positive-scoring matches</option> - <option value="qframe">19 qframe = Query frame</option> - <option value="sframe">20 sframe = Subject frame</option> - <option value="qseq">21 qseq = Aligned part of query sequence</option> - <option value="sseq">22 sseq = Aligned part of subject sequence</option> - <option value="qlen">23 qlen = Query sequence length</option> - <option value="slen">24 slen = Subject sequence length</option> - <option value="salltitles">25 salltitles = All subject title(s), separated by a '<>'</option> + <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option> + <option value="score">score = Raw score</option> + <option value="nident">nident = Number of identical matches</option> + <option value="positive">positive = Number of positive-scoring matches</option> + <option value="gaps">gaps = Total number of gaps</option> + <option value="ppos">ppos = Percentage of positive-scoring matches</option> + <option value="qframe">qframe = Query frame</option> + <option value="sframe">sframe = Subject frame</option> + <option value="qseq">qseq = Aligned part of query sequence</option> + <option value="sseq">sseq = Aligned part of subject sequence</option> + <option value="qlen">qlen = Query sequence length</option> + <option value="slen">slen = Subject sequence length</option> + <option value="salltitles">salltitles = All subject title(s), separated by a '<>'</option> </param> <!-- TODO, the other columns, like taxonomy --> </when>
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Feb 20 05:39:48 2014 -0500 @@ -29,7 +29,10 @@ ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful -title "BLAST Database" #end if --dbtype $dbtype +-dbtype $dbtype +## -------------------------------------------------------------------- +## Masking +## -------------------------------------------------------------------- #set $mask_string = '' #set $sep = '-mask_data ' #for $i in $mask_data @@ -44,11 +47,15 @@ ## #set $sep = ',' ## #end for ## $gi_mask_string -## #if $tax.select == 'id': -## -taxid $tax.id -## #else if $tax.select == 'map': -## -taxid_map $tax.map -## #end if +## -------------------------------------------------------------------- +## Taxonomy +## -------------------------------------------------------------------- +#if $tax.taxselect == 'id': +-taxid $tax.taxid +## TODO - Can we use a tabular file for the taxonomy mapping? +## #else if $tax.taxselect == 'map': +## -taxid_map $tax.taxmap +#end if ## -------------------------------------------------------------------- ## Capture the stdout log information to the primary file (plain text): >> "$outfile" @@ -83,23 +90,25 @@ --> <!-- TAXONOMY OPTIONS --> - <!-- TODO <conditional name="tax"> - <param name="select" type="select" label="Taxonomy options"> - <option value="">Do not assign sequences to Taxonomy IDs</option> - <option value="id">Assign all sequences to one Taxonomy ID</option> + <param name="taxselect" type="select" label="Taxonomy options"> + <option value="">Do not assign a Taxonomy ID to the sequences</option> + <option value="id">Assign the same Taxonomy ID to all the sequences</option> + <!-- <option value="map">Supply text file mapping sequence IDs to taxnomy IDs</option> + --> </param> <when value=""> </when> <when value="id"> - <param name="id" type="integer" value="" label="NCBI taxonomy ID" help="Integer >=0" /> + <param name="taxid" type="integer" value="" label="NCBI taxonomy ID" help="Integer >=0, e.g. 9606 for Homo sapiens" min="0" /> </when> + <!-- TODO: File format? <when value="map"> - <param name="file" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> + <param name="taxmap" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> </when> + --> </conditional> - --> </inputs> <outputs> <!-- If we only accepted one FASTA file, we could use its human name here... --> @@ -112,6 +121,8 @@ </outputs> <tests> <!-- Note the (two line) PIN file is not reproducible run to run. + Likewise there is a datestamp in the log file as well. + With and without the taxid the only real difference is in the *.phr file. --> <test> <param name="dbtype" value="prot" /> @@ -130,6 +141,25 @@ <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" /> </output> </test> + <test> + <param name="dbtype" value="prot" /> + <param name="file" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="title" value="Just 4 human proteins" /> + <param name="parse_seqids" value="" /> + <param name="hash_index" value="true" /> + <param name="taxselect" value="id" /> + <param name="taxid" value="9606" /> + <output name="out_file" file="four_human_proteins_taxid.fasta.log" ftype="blastdbp" lines_diff="6"> + <extra_files type="file" value="four_human_proteins_taxid.fasta.phr" name="blastdb.phr" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.psq" name="blastdb.psq" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.pog" name="blastdb.pog" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.phd" name="blastdb.phd" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.phi" name="blastdb.phi" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.psd" name="blastdb.psd" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.psi" name="blastdb.psi" /> + </output> + </test> </tests> <help> **What it does**
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Wed Jan 15 05:38:14 2014 -0500 +++ b/tools/ncbi_blast_plus/tool_dependencies.xml Thu Feb 20 05:39:48 2014 -0500 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> <package name="blast+" version="2.2.29"> - <repository changeset_revision="61c4017d3bf2" name="package_blast_plus_2_2_29" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="c021862e9ea8" name="package_blast_plus_2_2_29" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>