Mercurial > repos > peterjc > ncbi_blast_plus
changeset 30:5ec998a3530c draft
Uploaded v0.0.21, includes support for masking, uses macros internally, and now targets BLAST+ 2.2.27 rather than BLAST+ 2.2.26
line wrap: on
line diff
--- a/test-data/blastn_rhodopsin_vs_three_human.tabular Mon Sep 23 06:13:22 2013 -0400 +++ b/test-data/blastn_rhodopsin_vs_three_human.tabular Tue Oct 08 05:08:26 2013 -0400 @@ -1,7 +1,7 @@ -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.10 1050 77 6 1 1047 88 1134 0.0 1474 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 9e-133 460 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 7e-94 331 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 8e-74 265 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.98 171 10 2 2854 3023 615 784 8e-69 248 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.58 962 75 6 1 959 118 1076 0.0 1323 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.55 1052 121 10 1 1047 88 1134 0.0 1208 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 8e-69 248 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208
--- a/test-data/blastp_four_human_vs_rhodopsin.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/test-data/blastp_four_human_vs_rhodopsin.xml Tue Oct 08 05:08:26 2013 -0400 @@ -2,7 +2,7 @@ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> <BlastOutput> <BlastOutput_program>blastp</BlastOutput_program> - <BlastOutput_version>BLASTP 2.2.26+</BlastOutput_version> + <BlastOutput_version>BLASTP 2.2.27+</BlastOutput_version> <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference> <BlastOutput_db></BlastOutput_db> <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID>
--- a/test-data/blastx_rhodopsin_vs_four_human.tabular Mon Sep 23 06:13:22 2013 -0400 +++ b/test-data/blastx_rhodopsin_vs_four_human.tabular Tue Oct 08 05:08:26 2013 -0400 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662 -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2 -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613 -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641 -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559 +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532
--- a/test-data/blastx_rhodopsin_vs_four_human.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/test-data/blastx_rhodopsin_vs_four_human.xml Tue Oct 08 05:08:26 2013 -0400 @@ -2,7 +2,7 @@ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> <BlastOutput> <BlastOutput_program>blastx</BlastOutput_program> - <BlastOutput_version>BLASTX 2.2.26+</BlastOutput_version> + <BlastOutput_version>BLASTX 2.2.27+</BlastOutput_version> <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference> <BlastOutput_db></BlastOutput_db> <BlastOutput_query-ID>Query_1</BlastOutput_query-ID> @@ -28,8 +28,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>102080</Statistics_eff-space> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119944</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -47,8 +47,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>102080</Statistics_eff-space> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119944</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -66,8 +66,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>102080</Statistics_eff-space> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119944</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -90,8 +90,8 @@ <Hit_hsps> <Hsp> <Hsp_num>1</Hsp_num> - <Hsp_bit-score>662.144</Hsp_bit-score> - <Hsp_score>1707</Hsp_score> + <Hsp_bit-score>639.032</Hsp_bit-score> + <Hsp_score>1647</Hsp_score> <Hsp_evalue>0</Hsp_evalue> <Hsp_query-from>1</Hsp_query-from> <Hsp_query-to>1044</Hsp_query-to> @@ -114,8 +114,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>102080</Statistics_eff-space> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119944</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -132,8 +132,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>32</Statistics_hsp-len> - <Statistics_eff-space>155472</Statistics_eff-space> + <Statistics_hsp-len>33</Statistics_hsp-len> + <Statistics_eff-space>183143</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -151,8 +151,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>32</Statistics_hsp-len> - <Statistics_eff-space>155472</Statistics_eff-space> + <Statistics_hsp-len>33</Statistics_hsp-len> + <Statistics_eff-space>183143</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -170,8 +170,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>32</Statistics_hsp-len> - <Statistics_eff-space>155472</Statistics_eff-space> + <Statistics_hsp-len>33</Statistics_hsp-len> + <Statistics_eff-space>183143</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -194,8 +194,8 @@ <Hit_hsps> <Hsp> <Hsp_num>1</Hsp_num> - <Hsp_bit-score>575.089</Hsp_bit-score> - <Hsp_score>1481</Hsp_score> + <Hsp_bit-score>551.206</Hsp_bit-score> + <Hsp_score>1419</Hsp_score> <Hsp_evalue>0</Hsp_evalue> <Hsp_query-from>42</Hsp_query-from> <Hsp_query-to>1037</Hsp_query-to> @@ -218,8 +218,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>32</Statistics_hsp-len> - <Statistics_eff-space>155472</Statistics_eff-space> + <Statistics_hsp-len>33</Statistics_hsp-len> + <Statistics_eff-space>183143</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -236,8 +236,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>39</Statistics_hsp-len> - <Statistics_eff-space>430746</Statistics_eff-space> + <Statistics_hsp-len>40</Statistics_hsp-len> + <Statistics_eff-space>509838</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -255,8 +255,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>39</Statistics_hsp-len> - <Statistics_eff-space>430746</Statistics_eff-space> + <Statistics_hsp-len>40</Statistics_hsp-len> + <Statistics_eff-space>509838</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -274,8 +274,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>39</Statistics_hsp-len> - <Statistics_eff-space>430746</Statistics_eff-space> + <Statistics_hsp-len>40</Statistics_hsp-len> + <Statistics_eff-space>509838</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -298,9 +298,9 @@ <Hit_hsps> <Hsp> <Hsp_num>1</Hsp_num> - <Hsp_bit-score>224.172</Hsp_bit-score> - <Hsp_score>570</Hsp_score> - <Hsp_evalue>1.84906e-68</Hsp_evalue> + <Hsp_bit-score>220.32</Hsp_bit-score> + <Hsp_score>560</Hsp_score> + <Hsp_evalue>4.29169e-67</Hsp_evalue> <Hsp_query-from>1</Hsp_query-from> <Hsp_query-to>333</Hsp_query-to> <Hsp_hit-from>11</Hsp_hit-from> @@ -317,9 +317,9 @@ </Hsp> <Hsp> <Hsp_num>2</Hsp_num> - <Hsp_bit-score>129.413</Hsp_bit-score> - <Hsp_score>324</Hsp_score> - <Hsp_evalue>4.62305e-36</Hsp_evalue> + <Hsp_bit-score>127.487</Hsp_bit-score> + <Hsp_score>319</Hsp_score> + <Hsp_evalue>1.95079e-35</Hsp_evalue> <Hsp_query-from>3174</Hsp_query-from> <Hsp_query-to>3368</Hsp_query-to> <Hsp_hit-from>248</Hsp_hit-from> @@ -336,28 +336,28 @@ </Hsp> <Hsp> <Hsp_num>3</Hsp_num> - <Hsp_bit-score>120.939</Hsp_bit-score> - <Hsp_score>302</Hsp_score> - <Hsp_evalue>2.85192e-33</Hsp_evalue> + <Hsp_bit-score>121.324</Hsp_bit-score> + <Hsp_score>303</Hsp_score> + <Hsp_evalue>1.96633e-33</Hsp_evalue> <Hsp_query-from>2855</Hsp_query-from> - <Hsp_query-to>3022</Hsp_query-to> + <Hsp_query-to>3031</Hsp_query-to> <Hsp_hit-from>177</Hsp_hit-from> - <Hsp_hit-to>232</Hsp_hit-to> + <Hsp_hit-to>235</Hsp_hit-to> <Hsp_query-frame>2</Hsp_query-frame> <Hsp_hit-frame>0</Hsp_hit-frame> <Hsp_identity>54</Hsp_identity> - <Hsp_positive>56</Hsp_positive> + <Hsp_positive>57</Hsp_positive> <Hsp_gaps>0</Hsp_gaps> - <Hsp_align-len>56</Hsp_align-len> - <Hsp_qseq>RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKE</Hsp_qseq> - <Hsp_hseq>RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKE</Hsp_hseq> - <Hsp_midline>RYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKE</Hsp_midline> + <Hsp_align-len>59</Hsp_align-len> + <Hsp_qseq>RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS</Hsp_qseq> + <Hsp_hseq>RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA</Hsp_hseq> + <Hsp_midline>RYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKE +</Hsp_midline> </Hsp> <Hsp> <Hsp_num>4</Hsp_num> - <Hsp_bit-score>118.242</Hsp_bit-score> - <Hsp_score>295</Hsp_score> - <Hsp_evalue>2.16231e-32</Hsp_evalue> + <Hsp_bit-score>97.0561</Hsp_bit-score> + <Hsp_score>240</Hsp_score> + <Hsp_evalue>1.16473e-25</Hsp_evalue> <Hsp_query-from>1404</Hsp_query-from> <Hsp_query-to>1580</Hsp_query-to> <Hsp_hit-from>119</Hsp_hit-from> @@ -374,22 +374,22 @@ </Hsp> <Hsp> <Hsp_num>5</Hsp_num> - <Hsp_bit-score>56.225</Hsp_bit-score> - <Hsp_score>134</Hsp_score> - <Hsp_evalue>2.05678e-12</Hsp_evalue> + <Hsp_bit-score>56.9954</Hsp_bit-score> + <Hsp_score>136</Hsp_score> + <Hsp_evalue>1.23645e-12</Hsp_evalue> <Hsp_query-from>4222</Hsp_query-from> - <Hsp_query-to>4296</Hsp_query-to> + <Hsp_query-to>4299</Hsp_query-to> <Hsp_hit-from>312</Hsp_hit-from> - <Hsp_hit-to>336</Hsp_hit-to> + <Hsp_hit-to>337</Hsp_hit-to> <Hsp_query-frame>1</Hsp_query-frame> <Hsp_hit-frame>0</Hsp_hit-frame> <Hsp_identity>23</Hsp_identity> <Hsp_positive>24</Hsp_positive> <Hsp_gaps>0</Hsp_gaps> - <Hsp_align-len>25</Hsp_align-len> - <Hsp_qseq>QFRNCMLTTLCCGKNPLGDDEASTT</Hsp_qseq> - <Hsp_hseq>QFRNCMLTTICCGKNPLGDDEASAT</Hsp_hseq> - <Hsp_midline>QFRNCMLTT+CCGKNPLGDDEAS T</Hsp_midline> + <Hsp_align-len>26</Hsp_align-len> + <Hsp_qseq>QFRNCMLTTLCCGKNPLGDDEASTTA</Hsp_qseq> + <Hsp_hseq>QFRNCMLTTICCGKNPLGDDEASATV</Hsp_hseq> + <Hsp_midline>QFRNCMLTT+CCGKNPLGDDEAS T </Hsp_midline> </Hsp> </Hit_hsps> </Hit> @@ -398,8 +398,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>39</Statistics_hsp-len> - <Statistics_eff-space>430746</Statistics_eff-space> + <Statistics_hsp-len>40</Statistics_hsp-len> + <Statistics_eff-space>509838</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -416,8 +416,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>28</Statistics_hsp-len> - <Statistics_eff-space>95680</Statistics_eff-space> + <Statistics_hsp-len>29</Statistics_hsp-len> + <Statistics_eff-space>112346</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -435,8 +435,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>28</Statistics_hsp-len> - <Statistics_eff-space>95680</Statistics_eff-space> + <Statistics_hsp-len>29</Statistics_hsp-len> + <Statistics_eff-space>112346</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -454,8 +454,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>28</Statistics_hsp-len> - <Statistics_eff-space>95680</Statistics_eff-space> + <Statistics_hsp-len>29</Statistics_hsp-len> + <Statistics_eff-space>112346</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -478,8 +478,8 @@ <Hit_hsps> <Hsp> <Hsp_num>1</Hsp_num> - <Hsp_bit-score>613.994</Hsp_bit-score> - <Hsp_score>1582</Hsp_score> + <Hsp_bit-score>589.341</Hsp_bit-score> + <Hsp_score>1518</Hsp_score> <Hsp_evalue>0</Hsp_evalue> <Hsp_query-from>1</Hsp_query-from> <Hsp_query-to>978</Hsp_query-to> @@ -502,8 +502,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>28</Statistics_hsp-len> - <Statistics_eff-space>95680</Statistics_eff-space> + <Statistics_hsp-len>29</Statistics_hsp-len> + <Statistics_eff-space>112346</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -520,8 +520,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>102080</Statistics_eff-space> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119944</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -539,8 +539,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>102080</Statistics_eff-space> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119944</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -558,8 +558,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>102080</Statistics_eff-space> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119944</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -582,8 +582,8 @@ <Hit_hsps> <Hsp> <Hsp_num>1</Hsp_num> - <Hsp_bit-score>641.728</Hsp_bit-score> - <Hsp_score>1654</Hsp_score> + <Hsp_bit-score>619.387</Hsp_bit-score> + <Hsp_score>1596</Hsp_score> <Hsp_evalue>0</Hsp_evalue> <Hsp_query-from>1</Hsp_query-from> <Hsp_query-to>1044</Hsp_query-to> @@ -606,8 +606,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>29</Statistics_hsp-len> - <Statistics_eff-space>102080</Statistics_eff-space> + <Statistics_hsp-len>30</Statistics_hsp-len> + <Statistics_eff-space>119944</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -624,8 +624,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>31</Statistics_hsp-len> - <Statistics_eff-space>132189</Statistics_eff-space> + <Statistics_hsp-len>32</Statistics_hsp-len> + <Statistics_eff-space>155584</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -643,8 +643,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>31</Statistics_hsp-len> - <Statistics_eff-space>132189</Statistics_eff-space> + <Statistics_hsp-len>32</Statistics_hsp-len> + <Statistics_eff-space>155584</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -662,8 +662,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>31</Statistics_hsp-len> - <Statistics_eff-space>132189</Statistics_eff-space> + <Statistics_hsp-len>32</Statistics_hsp-len> + <Statistics_eff-space>155584</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy> @@ -686,22 +686,22 @@ <Hit_hsps> <Hsp> <Hsp_num>1</Hsp_num> - <Hsp_bit-score>559.296</Hsp_bit-score> - <Hsp_score>1440</Hsp_score> + <Hsp_bit-score>532.717</Hsp_bit-score> + <Hsp_score>1371</Hsp_score> <Hsp_evalue>0</Hsp_evalue> <Hsp_query-from>23</Hsp_query-from> - <Hsp_query-to>1018</Hsp_query-to> + <Hsp_query-to>1021</Hsp_query-to> <Hsp_hit-from>1</Hsp_hit-from> - <Hsp_hit-to>332</Hsp_hit-to> + <Hsp_hit-to>333</Hsp_hit-to> <Hsp_query-frame>2</Hsp_query-frame> <Hsp_hit-frame>0</Hsp_hit-frame> <Hsp_identity>272</Hsp_identity> <Hsp_positive>307</Hsp_positive> <Hsp_gaps>0</Hsp_gaps> - <Hsp_align-len>332</Hsp_align-len> - <Hsp_qseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEED</Hsp_qseq> - <Hsp_hseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE</Hsp_hseq> - <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP +++</Hsp_midline> + <Hsp_align-len>333</Hsp_align-len> + <Hsp_qseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG</Hsp_qseq> + <Hsp_hseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA</Hsp_hseq> + <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP +++ </Hsp_midline> </Hsp> </Hit_hsps> </Hit> @@ -710,8 +710,8 @@ <Statistics> <Statistics_db-num>0</Statistics_db-num> <Statistics_db-len>0</Statistics_db-len> - <Statistics_hsp-len>31</Statistics_hsp-len> - <Statistics_eff-space>132189</Statistics_eff-space> + <Statistics_hsp-len>32</Statistics_hsp-len> + <Statistics_eff-space>155584</Statistics_eff-space> <Statistics_kappa>0.041</Statistics_kappa> <Statistics_lambda>0.267</Statistics_lambda> <Statistics_entropy>0.14</Statistics_entropy>
--- a/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Mon Sep 23 06:13:22 2013 -0400 +++ b/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Tue Oct 08 05:08:26 2013 -0400 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662 -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2 -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613 -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641 -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559 +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532
--- a/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Mon Sep 23 06:13:22 2013 -0400 +++ b/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Tue Oct 08 05:08:26 2013 -0400 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662 sp|P08100|OPSD_HUMAN 1707 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575 sp|P08100|OPSD_HUMAN 1481 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224 sp|P08100|OPSD_HUMAN 570 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129 sp|P08100|OPSD_HUMAN 324 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120 sp|P08100|OPSD_HUMAN 302 54 56 0 100.00 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKE RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKE 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118 sp|P08100|OPSD_HUMAN 295 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2 sp|P08100|OPSD_HUMAN 134 23 24 0 96.00 1 0 QFRNCMLTTLCCGKNPLGDDEASTT QFRNCMLTTICCGKNPLGDDEASAT 4301 348 -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613 sp|P08100|OPSD_HUMAN 1582 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641 sp|P08100|OPSD_HUMAN 1654 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559 sp|P08100|OPSD_HUMAN 1440 272 307 0 92.47 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEED MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1344 348 +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 sp|P08100|OPSD_HUMAN 560 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 303 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348
--- a/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Mon Sep 23 06:13:22 2013 -0400 +++ b/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Tue Oct 08 05:08:26 2013 -0400 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662 sp|P08100|OPSD_HUMAN 1707 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575 sp|P08100|OPSD_HUMAN 1481 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224 sp|P08100|OPSD_HUMAN 570 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129 sp|P08100|OPSD_HUMAN 324 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120 sp|P08100|OPSD_HUMAN 302 54 56 0 100.00 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKE RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKE 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118 sp|P08100|OPSD_HUMAN 295 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2 sp|P08100|OPSD_HUMAN 134 23 24 0 96.00 1 0 QFRNCMLTTLCCGKNPLGDDEASTT QFRNCMLTTICCGKNPLGDDEASAT 4301 348 -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613 sp|P08100|OPSD_HUMAN 1582 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641 sp|P08100|OPSD_HUMAN 1654 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559 sp|P08100|OPSD_HUMAN 1440 272 307 0 92.47 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEED MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1344 348 +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 sp|P08100|OPSD_HUMAN 560 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 303 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dustmasker_three_human.fasta Tue Oct 08 05:08:26 2013 -0400 @@ -0,0 +1,183 @@ +>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds +GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG +CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC +GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC +CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA +TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC +TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT +TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA +CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA +TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA +CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC +TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA +TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT +TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG +GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA +TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA +ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG +TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA +CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC +AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT +CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA +ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC +CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA +TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG +CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC +TATGTGTatttttattttgaataaacagaaagaaattttgggtttttaatttttttCTCC +CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTaaaaaaaaaaaaaCCTGCTA +GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT +ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA +GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT +CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT +GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA +TTGCTTATGAAAATTCCATAGTGGTAtttttttGGATTCTTAATGTGTAACTTAAACATA +CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA +AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG +GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG +AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT +TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT +TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACtt +tttttCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG +CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA +CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT +TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC +TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT +GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT +AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT +TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT +GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT +TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA +GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA +TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA +TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT +AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT +TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT +ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT +GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA +GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT +GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT +TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT +TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT +AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG +AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC +TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT +CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA +GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT +TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT +GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT +TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG +GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA +CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA +CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA +GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA +GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT +GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG +GAAACTTTTAGATGACATTCTACAAATTAtttttttCTTTAAATTAAAAGAACCTAGCCA +ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA +AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA +ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA +GGTTGTCAAGAAGGCttttttttttttcttttttAAACCTGAGGGCAAAAAGGAATGGAT +AAAGTAGTGTAATGGATTGACAATCAGGAAGAACAGAATAACTCAGtttttttttCTCCT +ACAAGGAGATATGGCTGGACCAAAATAAAATGACATGAAATTGCAAAAATGAAAAT +>ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds +GGGGGGCTGCGCGGCCGGGTCGGTGCGCACACGAGAAGGACGCGCGGCCCCCAGCGCTCT +TGGGGGCCGCCTCGGAGCATGACCCCCGCGGGCCAGCGCCGCGCGCCTGATCCGAGGAGA +CCCCGCGCTCCCGCAGCCAtgggcaccgggggccggcggggggcggcggccgcgccgctg +ctggtggcggtggccgcgctgctactgggcgccgcgggccACCTGTACCCCGGAGAGGTG +TGTCCCGGCATGGATATCCGGAACAACCTCACTAGGTTGCATGAGCTGGAGAATTGCTCT +GTCATCGAAGGACACTTGCAGATACTCTTGATGTTCAAAACGAGGCCCGAAGATTTCCGA +GACCTCAGTTTCCCCAAACTCATCATGATCACTGATTACTTGCTGCTCTTCCGGGTCTAT +GGGCTCGAGAGCCTGAAGGACCTGTTCCCCAACCTCACGGTCATCCGGGGATCACGACTG +TTCTTTAACTACGCGCTGGTCATCTTCGAGATGGTTCACCTCAAGGAACTCGGCCTCTAC +AACCTGATGAACATCACCCGGGGTTCTGTCCGCATCGAGAAGAACAATGAGCTCTGTTAC +TTGGCCACTATCGACTGGTCCCGTATCCTGGATTCCGTGGAGGATAATCACATCGTGTTG +AACAAAGATGACAACGAGGAGTGTGGAGACATCTGTCCGGGTACCGCGAAGGGCAAGACC +AACTGCCCCGCCACCGTCATCAACGGGCAGTTTGTCGAACGATGTTGGACTCATAGTCAC +TGCCAGAAAGTTTGCCCGACCATCTGTAAGTCACACGGCTGCACCGCCGAAGGCCTCTGT +TGCCACAGCGAGTGCCTGGGCAACTGTTCTCAGCCCGACGACCCCACCAAGTGCGTGGCC +TGCCGCAACTTCTACCTGGACGGCAGGTGTGTGGAGACCTGCCCGCCCCCGTACTACCAC +TTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAGGACCTGCACCACAAATGCAAG +AACTCGCGGAGGCAGGGCTGCCACCAATACGTCATTCACAACAACAAGTGCATCCCTGAG +TGTCCCTCCGGGTACACGATGAATTCCAGCAACTTGCTGTGCACCCCATGCCTGGGTCCC +TGTCCCAAGGTGTGCCACCTCCTAGAAGGCGAGAAGACCATCGACTCGGTGACGTCTGCC +CAGGAGCTCCGAGGATGCACCGTCATCAACGGGAGTCTGATCATCAACATTCGAGGAGGC +AACAATCTGGCAGCTGAGCTAGAAGCCAACCTCGGCCTCATTGAAGAAATTTCAGGGTAT +CTAAAAATCCGCCGATCCTACGCTCTGGTGTCACTTTCCTTCTTCCGGAAGTTACGTCTG +ATTCGAGGAGAGACCTTGGAAATTGGGAACTACTCCTTCTATGCCTTGGACAACCAGAAC +CTAAGGCAGCTCTGGGACTGGAGCAAACACAACCTCACCACCACTCAGGGGAAACTCTTC +TTCCACTATAACCCCAAACTCTGCTTGTCAGAAATCCACAAGATGGAAGAAGTTTCAGGA +ACCAAGGGGCGCCAGGAGAGAAACGACATTGCCCTGAAGACCAATGGGGACAAGGCATCC +TGTGAAAATGAGTTACTTAAATTTTCTTACATTCGGACATCTTTTGACAAGATCTTGCTG +AGATGGGAGCCGTACTGGCCCCCCGACTTCCGAGACCTCTTGGGGTTCATGCTGTTCTAC +AAAGAGGCCCCTTATCAGAATGTGACGGAGTTCGATGGGCAGGATGCGTGTGGTTCCAAC +AGTTGGACGGTGGTAGACATTGACCCACCCCTGAGGTCCAACGACCCCAAATCACAGAAC +CACCCAGGGTGGCTGATGCGGGGTCTCAAGCCCTGGACCCAGTATGCCATCTTTGTGAAG +ACCCTGGTCACCTTTTCGGATGAACGCCGGACCTATGGGGCCAAGAGTGACATCATTTAT +GTCCAGACAGATGCCACCAACCCCTCTGTGCCCCTGGATCCAATCTCAGTGTCTAACTCA +TCATCCCAGATTATTCTGAAGTGGAAACCACCCTCCGACCCCAATGGCAACATCACCCAC +TACCTGGTTTTCTGGGAGAGGCAGGCGGAAGACAGTGAGCTGTTCGAGCTGGATTATTGC +CTCAAAGGGCTGAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT +CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG +ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT +TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC +CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG +CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG +CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC +TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC +AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT +GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG +AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG +GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT +GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC +TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT +GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT +ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT +TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC +GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC +GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG +GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG +GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC +AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC +CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA +GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG +TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA +ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC +AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC +ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA +CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT +CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA +TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG +CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT +GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC +TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC +TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG +ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT +CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT +ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC +TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA +GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT +TTTTTCGTTccccccacccgcccccAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT +CttttttttttttttttttttttttttttGCTGGTGTCTGAGCTTCAGTATAAAAGACAA +AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA +>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds +CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC +CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG +CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG +GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC +TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT +CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC +ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT +TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG +GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC +CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA +CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC +TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC +TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG +GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC +CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG +GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA +GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG +CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT +GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG +CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA +GCCATCCCACCAG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dustmasker_three_human.maskinfo-asn1 Tue Oct 08 05:08:26 2013 -0400 @@ -0,0 +1,64 @@ +Blast-db-mask-info ::= { + algo-id 2, + algo-program dust, + algo-options "window=64; level=20; linker=1", + masks { + masks { + packed-int { + { + from 1447, + to 1495, + id local id 1 + }, + { + from 1540, + to 1552, + id local id 1 + }, + { + from 1886, + to 1892, + id local id 1 + }, + { + from 2278, + to 2284, + id local id 1 + }, + { + from 4409, + to 4415, + id local id 1 + }, + { + from 4635, + to 4653, + id local id 1 + }, + { + from 4726, + to 4734, + id local id 1 + } + }, + packed-int { + { + from 139, + to 219, + id local id 2 + }, + { + from 4569, + to 4584, + id local id 2 + }, + { + from 4621, + to 4648, + id local id 2 + } + } + }, + more FALSE + } +}
--- a/test-data/tblastn_four_human_vs_rhodopsin.html Mon Sep 23 06:13:22 2013 -0400 +++ b/test-data/tblastn_four_human_vs_rhodopsin.html Tue Oct 08 05:08:26 2013 -0400 @@ -3,7 +3,7 @@ <BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099"> <PRE> -<b>TBLASTN 2.2.26+</b> +<b>TBLASTN 2.2.27+</b> <b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 @@ -20,12 +20,12 @@ -Lambda K H - 0.347 0.182 0.684 +Lambda K H a alpha + 0.347 0.182 0.684 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 127710 @@ -45,12 +45,12 @@ -Lambda K H - 0.347 0.182 0.684 +Lambda K H a alpha + 0.347 0.182 0.684 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 127710 @@ -70,12 +70,12 @@ -Lambda K H - 0.347 0.182 0.684 +Lambda K H a alpha + 0.347 0.182 0.684 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 127710 @@ -95,12 +95,12 @@ -Lambda K H - 0.347 0.182 0.684 +Lambda K H a alpha + 0.347 0.182 0.684 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 127710 @@ -120,12 +120,12 @@ -Lambda K H - 0.347 0.182 0.684 +Lambda K H a alpha + 0.347 0.182 0.684 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 127710 @@ -145,12 +145,12 @@ -Lambda K H - 0.347 0.182 0.684 +Lambda K H a alpha + 0.347 0.182 0.684 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 127710 @@ -169,12 +169,12 @@ -Lambda K H - 0.334 0.170 0.615 +Lambda K H a alpha + 0.334 0.170 0.615 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 370988 @@ -194,12 +194,12 @@ -Lambda K H - 0.334 0.170 0.615 +Lambda K H a alpha + 0.334 0.170 0.615 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 370988 @@ -219,12 +219,12 @@ -Lambda K H - 0.334 0.170 0.615 +Lambda K H a alpha + 0.334 0.170 0.615 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 370988 @@ -244,12 +244,12 @@ -Lambda K H - 0.334 0.170 0.615 +Lambda K H a alpha + 0.334 0.170 0.615 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 370988 @@ -269,12 +269,12 @@ -Lambda K H - 0.334 0.170 0.615 +Lambda K H a alpha + 0.334 0.170 0.615 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 370988 @@ -294,12 +294,12 @@ -Lambda K H - 0.334 0.170 0.615 +Lambda K H a alpha + 0.334 0.170 0.615 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 370988 @@ -318,12 +318,12 @@ -Lambda K H - 0.346 0.180 0.700 +Lambda K H a alpha + 0.346 0.180 0.700 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 441350 @@ -343,12 +343,12 @@ -Lambda K H - 0.346 0.180 0.700 +Lambda K H a alpha + 0.346 0.180 0.700 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 441350 @@ -368,12 +368,12 @@ -Lambda K H - 0.346 0.180 0.700 +Lambda K H a alpha + 0.346 0.180 0.700 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 441350 @@ -393,12 +393,12 @@ -Lambda K H - 0.346 0.180 0.700 +Lambda K H a alpha + 0.346 0.180 0.700 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 441350 @@ -418,12 +418,12 @@ -Lambda K H - 0.346 0.180 0.700 +Lambda K H a alpha + 0.346 0.180 0.700 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 441350 @@ -443,12 +443,12 @@ -Lambda K H - 0.346 0.180 0.700 +Lambda K H a alpha + 0.346 0.180 0.700 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 441350 @@ -492,12 +492,12 @@ -Lambda K H - 0.351 0.182 0.707 +Lambda K H a alpha + 0.351 0.182 0.707 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 109230 @@ -542,12 +542,12 @@ -Lambda K H - 0.351 0.182 0.707 +Lambda K H a alpha + 0.351 0.182 0.707 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 109230 @@ -616,12 +616,12 @@ -Lambda K H - 0.351 0.182 0.707 +Lambda K H a alpha + 0.351 0.182 0.707 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 109230 @@ -666,12 +666,12 @@ -Lambda K H - 0.351 0.182 0.707 +Lambda K H a alpha + 0.351 0.182 0.707 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 109230 @@ -716,12 +716,12 @@ -Lambda K H - 0.351 0.182 0.707 +Lambda K H a alpha + 0.351 0.182 0.707 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 109230 @@ -766,12 +766,12 @@ -Lambda K H - 0.351 0.182 0.707 +Lambda K H a alpha + 0.351 0.182 0.707 0.522 1.92 Gapped -Lambda K H - 0.299 0.0710 0.270 +Lambda K H a alpha sigma + 0.299 0.0710 0.270 1.10 13.8 14.5 Effective search space used: 109230
--- a/test-data/tblastn_four_human_vs_rhodopsin.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/test-data/tblastn_four_human_vs_rhodopsin.xml Tue Oct 08 05:08:26 2013 -0400 @@ -2,7 +2,7 @@ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> <BlastOutput> <BlastOutput_program>tblastn</BlastOutput_program> - <BlastOutput_version>TBLASTN 2.2.26+</BlastOutput_version> + <BlastOutput_version>TBLASTN 2.2.27+</BlastOutput_version> <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference> <BlastOutput_db></BlastOutput_db> <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
--- a/tools/ncbi_blast_plus/README.rst Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/README.rst Tue Oct 08 05:08:26 2013 -0400 @@ -53,6 +53,7 @@ <tool file="ncbi_blast_plus/ncbi_tblastn_wrapper.xml" /> <tool file="ncbi_blast_plus/ncbi_tblastx_wrapper.xml" /> <tool file="ncbi_blast_plus/ncbi_makeblastdb.xml" /> + <tool file="ncbi_blast_plus/ncbi_dustmasker_wrapper.xml" /> <tool file="ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml" /> <tool file="ncbi_blast_plus/ncbi_blastdbcmd_info.xml" /> <tool file="ncbi_blast_plus/ncbi_rpsblast_wrapper.xml" /> @@ -62,7 +63,9 @@ You will also need to install 'blast_datatypes' from the Tool Shed. This defines the BLAST XML file format ('blastxml') and protein and nucleotide -BLAST databases composite file formats ('blastdbp' and 'blastdbn'). +BLAST databases composite file formats ('blastdbp' and 'blastdbn'): + +* http://toolshed.g2.bx.psu.edu/view/devteam/blast_datatypes As described above for an automated installation, you must also tell Galaxy about any system level BLAST databases using the tool-data/blastdb*.loc files. @@ -117,6 +120,13 @@ - Adopted standard MIT License. - Development moved to GitHub, https://github.com/peterjc/galaxy_blast - Updated citation information (Cock et al. 2013). +v0.0.21 - Use macros to simplify the XML wrappers. + - Added wrapper for dustmasker + - Enabled masking for makeblastdb + - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes + defined in updated blast_datatypes on Galaxy ToolShed. + - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26 + - Now depends on package_blast_plus_2_2_27 in ToolShed ======= ======================================================================
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,46 +1,19 @@ -<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.6"> +<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.21"> <description>Show BLAST database information from blastdbcmd</description> <requirements> <requirement type="binary">blastdbcmd</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> </requirements> <version_command>blastdbcmd -version</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> <command> blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out "$info" </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- Suspect blastdbcmd sometimes fails to set error level --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + <expand macro="stdio" /> <inputs> - <conditional name="db_opts"> - <param name="db_type" type="select" label="Type of BLAST database"> - <option value="nucl" selected="True">Nucleotide</option> - <option value="prot">Protein</option> - </param> - <when value="nucl"> - <param name="database" type="select" label="Nucleotide BLAST database"> - <options from_file="blastdb.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - </when> - <when value="prot"> - <param name="database" type="select" label="Protein BLAST database"> - <options from_file="blastdb_p.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - </when> - </conditional> + <expand macro="input_conditional_choose_db_type" /> </inputs> <outputs> <data name="info" format="txt" label="${db_opts.database.fields.name} info" />
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,10 +1,13 @@ -<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.6"> +<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.21"> <description>Extract sequence(s) from BLAST database</description> <requirements> <requirement type="binary">blastdbcmd</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> </requirements> <version_command>blastdbcmd -version</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -47,39 +50,9 @@ | sed 's/>\(lcl|\|gnl|BL_ORD_ID|[0-9]* \)/>/1' > "$seq" #end if </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- Suspect blastdbcmd sometimes fails to set error level --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + <expand macro="stdio" /> <inputs> - <conditional name="db_opts"> - <param name="db_type" type="select" label="Type of BLAST database"> - <option value="nucl" selected="True">Nucleotide</option> - <option value="prot">Protein</option> - </param> - <when value="nucl"> - <param name="database" type="select" label="Nucleotide BLAST database"> - <options from_file="blastdb.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - </when> - <when value="prot"> - <param name="database" type="select" label="Protein BLAST database"> - <options from_file="blastdb_p.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - </when> - </conditional> + <expand macro="input_conditional_choose_db_type" /> <conditional name="id_opts"> <param name="id_type" type="select" label="Type of identifier list"> <option value="file">From file</option>
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,12 +1,15 @@ -<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.20"> +<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.21"> <description>Search nucleotide database with nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> <requirements> <requirement type="binary">blastn</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> </requirements> <version_command>blastn -version</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -48,44 +51,14 @@ ## End of advanced options: #end if </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- In case the return code has not been set propery check stderr too --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + + <expand macro="stdio" /> + <inputs> <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> - <conditional name="db_opts"> - <param name="db_opts_selector" type="select" label="Subject database/sequences"> - <option value="db" selected="True">Locally installed BLAST database</option> - <option value="histdb">BLAST database from your history</option> - <option value="file">FASTA file from your history (see warning note below)</option> - </param> - <when value="db"> - <param name="database" type="select" label="Nucleotide BLAST database"> - <options from_file="blastdb.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="hidden" value="" /> - </when> - <when value="histdb"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" /> - <param name="subject" type="hidden" value="" /> - </when> - <when value="file"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> - </when> - </conditional> + + <expand macro="input_conditional_nucleotide_db" /> + <param name="blast_type" type="select" display="radio" label="Type of BLAST"> <option value="megablast">megablast</option> <option value="blastn">blastn</option> @@ -97,20 +70,7 @@ --> </param> <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> - <param name="out_format" type="select" label="Output format"> - <option value="6">Tabular (standard 12 columns)</option> - <option value="ext" selected="True">Tabular (extended 24 columns)</option> - <option value="5">BLAST XML</option> - <option value="0">Pairwise text</option> - <option value="0 -html">Pairwise HTML</option> - <option value="2">Query-anchored text</option> - <option value="2 -html">Query-anchored HTML</option> - <option value="4">Flat query-anchored text</option> - <option value="4 -html">Flat query-anchored HTML</option> - <!-- - <option value="-outfmt 11">BLAST archive format (ASN.1)</option> - --> - </param> + <expand macro="input_out_format" /> <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> <option value="basic" selected="True">Hide Advanced Options</option> @@ -141,15 +101,7 @@ </inputs> <outputs> <data name="output1" format="tabular" label="${blast_type.value_label} on ${on_string}"> - <change_format> - <when input="out_format" value="0" format="txt"/> - <when input="out_format" value="0 -html" format="html"/> - <when input="out_format" value="2" format="txt"/> - <when input="out_format" value="2 -html" format="html"/> - <when input="out_format" value="4" format="txt"/> - <when input="out_format" value="4 -html" format="html"/> - <when input="out_format" value="5" format="blastxml"/> - </change_format> + <expand macro="output_change_format" /> </data> </outputs> <tests>
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,12 +1,15 @@ -<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.20"> +<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.21"> <description>Search protein database with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> <requirements> <requirement type="binary">blastp</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> </requirements> <version_command>blastp -version</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -46,63 +49,20 @@ ## End of advanced options: #end if </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- In case the return code has not been set propery check stderr too --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + + <expand macro="stdio" /> + <inputs> <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> - <conditional name="db_opts"> - <param name="db_opts_selector" type="select" label="Subject database/sequences"> - <option value="db" selected="True">Locally installed BLAST database</option> - <option value="histdb">BLAST database from your history</option> - <option value="file">FASTA file from your history (see warning note below)</option> - </param> - <when value="db"> - <param name="database" type="select" label="Protein BLAST database"> - <options from_file="blastdb_p.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="hidden" value="" /> - </when> - <when value="histdb"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="data" format="blastdbp" label="Protein BLAST database" /> - <param name="subject" type="hidden" value="" /> - </when> - <when value="file"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> - </when> - </conditional> + + <expand macro="input_conditional_protein_db" /> + <param name="blast_type" type="select" display="radio" label="Type of BLAST"> <option value="blastp">blastp</option> <option value="blastp-short">blastp-short</option> </param> <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> - <param name="out_format" type="select" label="Output format"> - <option value="6">Tabular (standard 12 columns)</option> - <option value="ext" selected="True">Tabular (extended 24 columns)</option> - <option value="5">BLAST XML</option> - <option value="0">Pairwise text</option> - <option value="0 -html">Pairwise HTML</option> - <option value="2">Query-anchored text</option> - <option value="2 -html">Query-anchored HTML</option> - <option value="4">Flat query-anchored text</option> - <option value="4 -html">Flat query-anchored HTML</option> - <!-- - <option value="-outfmt 11">BLAST archive format (ASN.1)</option> - --> - </param> + <expand macro="input_out_format" /> <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> <option value="basic" selected="True">Hide Advanced Options</option> @@ -112,16 +72,9 @@ <when value="advanced"> <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" /> - <param name="matrix" type="select" label="Scoring matrix"> - <option value="BLOSUM90">BLOSUM90</option> - <option value="BLOSUM80">BLOSUM80</option> - <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> - <option value="BLOSUM50">BLOSUM50</option> - <option value="BLOSUM45">BLOSUM45</option> - <option value="PAM250">PAM250</option> - <option value="PAM70">PAM70</option> - <option value="PAM30">PAM30</option> - </param> + + <expand macro="input_scoring_matrix" /> + <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> <validator type="in_range" min="0" /> @@ -142,15 +95,7 @@ </inputs> <outputs> <data name="output1" format="tabular" label="${blast_type.value_label} on ${on_string}"> - <change_format> - <when input="out_format" value="0" format="txt"/> - <when input="out_format" value="0 -html" format="html"/> - <when input="out_format" value="2" format="txt"/> - <when input="out_format" value="2 -html" format="html"/> - <when input="out_format" value="4" format="txt"/> - <when input="out_format" value="4 -html" format="html"/> - <when input="out_format" value="5" format="blastxml"/> - </change_format> + <expand macro="output_change_format" /> </data> </outputs> <tests>
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,12 +1,15 @@ -<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.19"> +<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.21"> <description>Search protein database with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> <requirements> <requirement type="binary">blastx</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> </requirements> <version_command>blastx -version</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -46,80 +49,18 @@ ## End of advanced options: #end if </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- In case the return code has not been set propery check stderr too --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + + <expand macro="stdio" /> + <inputs> <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> - <conditional name="db_opts"> - <param name="db_opts_selector" type="select" label="Subject database/sequences"> - <option value="db" selected="True">Locally installed BLAST database</option> - <option value="histdb">BLAST database from your history</option> - <option value="file">FASTA file from your history (see warning note below)</option> - </param> - <when value="db"> - <param name="database" type="select" label="Protein BLAST database"> - <options from_file="blastdb_p.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="hidden" value="" /> - </when> - <when value="histdb"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="data" format="blastdbp" label="Protein BLAST database" /> - <param name="subject" type="hidden" value="" /> - </when> - <when value="file"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> - </when> - </conditional> - <param name="query_gencode" type="select" label="Query genetic code"> - <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details --> - <option value="1" select="True">1. Standard</option> - <option value="2">2. Vertebrate Mitochondrial</option> - <option value="3">3. Yeast Mitochondrial</option> - <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> - <option value="5">5. Invertebrate Mitochondrial</option> - <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> - <option value="9">9. Echinoderm Mitochondrial</option> - <option value="10">10. Euplotid Nuclear</option> - <option value="11">11. Bacteria and Archaea</option> - <option value="12">12. Alternative Yeast Nuclear</option> - <option value="13">13. Ascidian Mitochondrial</option> - <option value="14">14. Flatworm Mitochondrial</option> - <option value="15">15. Blepharisma Macronuclear</option> - <option value="16">16. Chlorophycean Mitochondrial Code</option> - <option value="21">21. Trematode Mitochondrial Code</option> - <option value="22">22. Scenedesmus obliquus mitochondrial Code</option> - <option value="23">23. Thraustochytrium Mitochondrial Code</option> - <option value="24">24. Pterobranchia mitochondrial code</option> - </param> + + <expand macro="input_conditional_protein_db" /> + <expand macro="input_query_gencode" /> <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> - <param name="out_format" type="select" label="Output format"> - <option value="6">Tabular (standard 12 columns)</option> - <option value="ext" selected="True">Tabular (extended 24 columns)</option> - <option value="5">BLAST XML</option> - <option value="0">Pairwise text</option> - <option value="0 -html">Pairwise HTML</option> - <option value="2">Query-anchored text</option> - <option value="2 -html">Query-anchored HTML</option> - <option value="4">Flat query-anchored text</option> - <option value="4 -html">Flat query-anchored HTML</option> - <!-- - <option value="-outfmt 11">BLAST archive format (ASN.1)</option> - --> - </param> + + <expand macro="input_out_format" /> + <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> <option value="basic" selected="True">Hide Advanced Options</option> @@ -134,16 +75,9 @@ <option value="-strand plus">Plus (forward)</option> <option value="-strand minus">Minus (reverse complement)</option> </param> - <param name="matrix" type="select" label="Scoring matrix"> - <option value="BLOSUM90">BLOSUM90</option> - <option value="BLOSUM80">BLOSUM80</option> - <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> - <option value="BLOSUM50">BLOSUM50</option> - <option value="BLOSUM45">BLOSUM45</option> - <option value="PAM250">PAM250</option> - <option value="PAM70">PAM70</option> - <option value="PAM30">PAM30</option> - </param> + + <expand macro="input_scoring_matrix" /> + <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> <validator type="in_range" min="0" /> @@ -159,15 +93,7 @@ </inputs> <outputs> <data name="output1" format="tabular" label="blastx on ${on_string}"> - <change_format> - <when input="out_format" value="0" format="txt"/> - <when input="out_format" value="0 -html" format="html"/> - <when input="out_format" value="2" format="txt"/> - <when input="out_format" value="2 -html" format="html"/> - <when input="out_format" value="4" format="txt"/> - <when input="out_format" value="4 -html" format="html"/> - <when input="out_format" value="5" format="blastxml"/> - </change_format> + <expand macro="output_change_format" /> </data> </outputs> <tests>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Tue Oct 08 05:08:26 2013 -0400 @@ -0,0 +1,115 @@ +<tool id="ncbi_dustmasker_wrapper" name="NCBI dustmasker" version="0.0.21"> + <!-- dustmasker wrapper from Edward Kirton and Nicola Soranzo --> + <description>masks low complexity regions</description> + <requirements> + <requirement type="binary">dustmasker</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> + </requirements> + <version_command>dustmasker -version-full</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +dustmasker +#if $db_opts.db_opts_selector == "db": + -in "${db_opts.database.fields.path}" -infmt blastdb +#elif $db_opts.db_opts_selector == "histdb": + -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb +#else: + -in "$subject" -infmt fasta +#end if +-out "$outfile" +-window $window -level $level -linker $linker -outfmt $outformat + </command> + <expand macro="stdio" /> + <inputs> + <expand macro="input_conditional_nucleotide_db" /> + <param name="window" type="integer" value="64" label="DUST window length" /> + <param name="level" type="integer" value="20" label="DUST level" help="Score threshold for subwindows" /> + <param name="linker" type="integer" value="1" label="DUST linker" help="How close masked intervals should be to get merged together" /> + <param name="outformat" type="select" label="Output format"> +<!-- acclist and maskinfo_xml are listed as possible output formats in + "dustmasker -help", but were not recognized by NCBI BLAST up to + release 2.2.27+. Fixed in BLAST 2.2.28+. + seqloc_* formats are not very useful --> +<!-- <option value="acclist">acclist</option>--> + <option value="fasta">FASTA</option> + <option value="interval" selected="true">interval</option> + <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option> + <option value="maskinfo_asn1_text">maskinfo ASN.1 text</option> +<!-- <option value="maskinfo_xml">maskinfo_xml</option> + <option value="seqloc_asn1_bin">seqloc_asn1_bin</option> + <option value="seqloc_asn1_text">seqloc_asn1_text</option> + <option value="seqloc_xml">seqloc_xml</option>--> + </param> + </inputs> + <outputs> + <data name="outfile" format="interval" label="DUST Masked File"> + <change_format> + <when input="outformat" value="fasta" format="fasta" /> + <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" /> + <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="window" value="64" /> + <param name="level" value="20" /> + <param name="linker" value="1" /> + <param name="outformat" value="fasta" /> + <output name="outfile" file="dustmasker_three_human.fasta" /> + </test> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="window" value="64" /> + <param name="level" value="20" /> + <param name="linker" value="1" /> + <param name="outformat" value="maskinfo_asn1_bin" /> + <output name="outfile" file="dustmasker_three_human.maskinfo-asn1-binary" /> + </test> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="window" value="64" /> + <param name="level" value="20" /> + <param name="linker" value="1" /> + <param name="outformat" value="maskinfo_asn1_text" /> + <output name="outfile" file="dustmasker_three_human.maskinfo-asn1" /> + </test> + </tests> + <help> +**What it does** + +This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST algorithm. + +If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool. + +More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers (a more specific paper covering this wrapper is planned): + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Christiam Camacho et al. (2009). +BLAST+: architecture and applications. +BMC Bioinformatics. 15;10:421. +http://dx.doi.org/10.1186/1471-2105-10-421 + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Tue Oct 08 05:08:26 2013 -0400 @@ -0,0 +1,220 @@ +<macros> + <macro name="output_change_format"> + + <change_format> + <when input="out_format" value="0" format="txt"/> + <when input="out_format" value="0 -html" format="html"/> + <when input="out_format" value="2" format="txt"/> + <when input="out_format" value="2 -html" format="html"/> + <when input="out_format" value="4" format="txt"/> + <when input="out_format" value="4 -html" format="html"/> + <when input="out_format" value="5" format="blastxml"/> + </change_format> + + </macro> + <macro name="input_out_format"> + <param name="out_format" type="select" label="Output format"> + <option value="6">Tabular (standard 12 columns)</option> + <option value="ext" selected="True">Tabular (extended 24 columns)</option> + <option value="5">BLAST XML</option> + <option value="0">Pairwise text</option> + <option value="0 -html">Pairwise HTML</option> + <option value="2">Query-anchored text</option> + <option value="2 -html">Query-anchored HTML</option> + <option value="4">Flat query-anchored text</option> + <option value="4 -html">Flat query-anchored HTML</option> + <!-- + <option value="-outfmt 11">BLAST archive format (ASN.1)</option> + --> + </param> + </macro> + <macro name="input_scoring_matrix"> + <param name="matrix" type="select" label="Scoring matrix"> + <option value="BLOSUM90">BLOSUM90</option> + <option value="BLOSUM80">BLOSUM80</option> + <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> + <option value="BLOSUM50">BLOSUM50</option> + <option value="BLOSUM45">BLOSUM45</option> + <option value="PAM250">PAM250</option> + <option value="PAM70">PAM70</option> + <option value="PAM30">PAM30</option> + </param> + </macro> + <macro name="stdio"> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + </macro> + <macro name="input_query_gencode"> + <param name="query_gencode" type="select" label="Query genetic code"> + <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details --> + <option value="1" select="True">1. Standard</option> + <option value="2">2. Vertebrate Mitochondrial</option> + <option value="3">3. Yeast Mitochondrial</option> + <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> + <option value="5">5. Invertebrate Mitochondrial</option> + <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> + <option value="9">9. Echinoderm Mitochondrial</option> + <option value="10">10. Euplotid Nuclear</option> + <option value="11">11. Bacteria and Archaea</option> + <option value="12">12. Alternative Yeast Nuclear</option> + <option value="13">13. Ascidian Mitochondrial</option> + <option value="14">14. Flatworm Mitochondrial</option> + <option value="15">15. Blepharisma Macronuclear</option> + <option value="16">16. Chlorophycean Mitochondrial Code</option> + <option value="21">21. Trematode Mitochondrial Code</option> + <option value="22">22. Scenedesmus obliquus mitochondrial Code</option> + <option value="23">23. Thraustochytrium Mitochondrial Code</option> + <option value="24">24. Pterobranchia mitochondrial code</option> + </param> + </macro> + + <macro name="input_db_gencode"> + <param name="db_gencode" type="select" label="Database/subject genetic code"> + <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details --> + <option value="1" select="True">1. Standard</option> + <option value="2">2. Vertebrate Mitochondrial</option> + <option value="3">3. Yeast Mitochondrial</option> + <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> + <option value="5">5. Invertebrate Mitochondrial</option> + <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> + <option value="9">9. Echinoderm Mitochondrial</option> + <option value="10">10. Euplotid Nuclear</option> + <option value="11">11. Bacteria and Archaea</option> + <option value="12">12. Alternative Yeast Nuclear</option> + <option value="13">13. Ascidian Mitochondrial</option> + <option value="14">14. Flatworm Mitochondrial</option> + <option value="15">15. Blepharisma Macronuclear</option> + <option value="16">16. Chlorophycean Mitochondrial Code</option> + <option value="21">21. Trematode Mitochondrial Code</option> + <option value="22">22. Scenedesmus obliquus mitochondrial Code</option> + <option value="23">23. Thraustochytrium Mitochondrial Code</option> + <option value="24">24. Pterobranchia mitochondrial code</option> + </param> + </macro> + + <macro name="input_conditional_nucleotide_db"> + <conditional name="db_opts"> + <param name="db_opts_selector" type="select" label="Subject database/sequences"> + <option value="db" selected="True">Locally installed BLAST database</option> + <option value="histdb">BLAST database from your history</option> + <option value="file">FASTA file from your history (see warning note below)</option> + </param> + <when value="db"> + <param name="database" type="select" label="Nucleotide BLAST database"> + <options from_file="blastdb.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="file"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> + </when> + </conditional> + </macro> + + <macro name="input_conditional_protein_db"> + <conditional name="db_opts"> + <param name="db_opts_selector" type="select" label="Subject database/sequences"> + <option value="db" selected="True">Locally installed BLAST database</option> + <option value="histdb">BLAST database from your history</option> + <option value="file">FASTA file from your history (see warning note below)</option> + </param> + <when value="db"> + <param name="database" type="select" label="Protein BLAST database"> + <options from_file="blastdb_p.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbp" label="Protein BLAST database" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="file"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> + </when> + </conditional> + </macro> + + <macro name="input_conditional_pssm"> + <conditional name="db_opts"> + <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)"> + <option value="db" selected="True">Locally installed BLAST database</option> + <!-- TODO - define new datatype + <option value="histdb">BLAST protein domain database from your history</option> + --> + </param> + <when value="db"> + <param name="database" type="select" label="Protein domain database"> + <options from_file="blastdb_d.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="hidden" value="" /> + </when> + <!-- TODO - define new datatype + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbd" label="Protein domain database" /> + <param name="subject" type="hidden" value="" /> + </when> + --> + </conditional> + </macro> + + <macro name="input_conditional_choose_db_type"> + <conditional name="db_opts"> + <param name="db_type" type="select" label="Type of BLAST database"> + <option value="nucl" selected="True">Nucleotide</option> + <option value="prot">Protein</option> + </param> + <when value="nucl"> + <param name="database" type="select" label="Nucleotide BLAST database"> + <options from_file="blastdb.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + </when> + <when value="prot"> + <param name="database" type="select" label="Protein BLAST database"> + <options from_file="blastdb_p.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + </when> + </conditional> + </macro> + + +</macros>
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,10 +1,13 @@ -<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.5"> +<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.21"> <description>Make BLAST database</description> <requirements> <requirement type="binary">makeblastdb</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> </requirements> <version_command>makeblastdb -version</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> <command> makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}" $parse_seqids @@ -24,30 +27,29 @@ -title "BLAST Database" #end if -dbtype $dbtype -## #set $sep = '-mask_data ' -## #for $i in $mask_data -## $sep${i.file} -## #set $set = ', ' -## #end for +#set $mask_string = '' +#set $sep = '-mask_data ' +#for $i in $mask_data +#set $mask_string += $sep + str($i.file) +#set $sep = ',' +#end for +$mask_string +## #set $gi_mask_string = '' ## #set $sep = '-gi_mask -gi_mask_name ' ## #for $i in $gi_mask -## $sep${i.file} -## #set $set = ', ' -## #end for +## #set $gi_mask_string += $sep + str($i.file) +## #set $sep = ',' +## #end for +## $gi_mask_string ## #if $tax.select == 'id': ## -taxid $tax.id ## #else if $tax.select == 'map': ## -taxid_map $tax.map ## #end if </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- In case the return code has not been set propery check stderr too --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + + <expand macro="stdio" /> + <inputs> <param name="dbtype" type="select" display="radio" label="Molecule type of input"> <option value="prot">protein</option> @@ -66,10 +68,10 @@ <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values." help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> <!-- SEQUENCE MASKING OPTIONS --> + <repeat name="mask_data" title="Masking data file"> + <param name="file" type="data" format="maskinfo-asn1,maskinfo-asn1-binary" label="ASN.1 file containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> + </repeat> <!-- TODO - <repeat name="mask_data" title="Provide one or more files containing masking data"> - <param name="file" type="data" format="asnb" label="File containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> - </repeat> <repeat name="gi_mask" title="Create GI indexed masking data"> <param name="file" type="data" format="asnb" label="Masking data output file" /> </repeat>
--- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,12 +1,15 @@ -<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.4"> +<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.21"> <description>Search protein domain database (PSSMs) with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism> <requirements> <requirement type="binary">rpsblast</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> </requirements> <version_command>rpsblast -version</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -40,57 +43,18 @@ ## End of advanced options: #end if </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- In case the return code has not been set propery check stderr too --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + + <expand macro="stdio" /> + <inputs> <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> - <conditional name="db_opts"> - <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)"> - <option value="db" selected="True">Locally installed BLAST database</option> - <!-- TODO - define new datatype - <option value="histdb">BLAST protein domain database from your history</option> - --> - </param> - <when value="db"> - <param name="database" type="select" label="Protein domain database"> - <options from_file="blastdb_d.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="hidden" value="" /> - </when> - <!-- TODO - define new datatype - <when value="histdb"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="data" format="blastdbd" label="Protein domain database" /> - <param name="subject" type="hidden" value="" /> - </when> - --> - </conditional> + + <expand macro="input_conditional_pssm" /> + <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> - <param name="out_format" type="select" label="Output format"> - <option value="6">Tabular (standard 12 columns)</option> - <option value="ext" selected="True">Tabular (extended 24 columns)</option> - <option value="5">BLAST XML</option> - <option value="0">Pairwise text</option> - <option value="0 -html">Pairwise HTML</option> - <option value="2">Query-anchored text</option> - <option value="2 -html">Query-anchored HTML</option> - <option value="4">Flat query-anchored text</option> - <option value="4 -html">Flat query-anchored HTML</option> - <!-- - <option value="-outfmt 11">BLAST archive format (ASN.1)</option> - --> - </param> + + <expand macro="input_out_format" /> + <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> <option value="basic" selected="True">Hide Advanced Options</option> @@ -114,15 +78,9 @@ </inputs> <outputs> <data name="output1" format="tabular" label="rpsblast on ${on_string}"> - <change_format> - <when input="out_format" value="0" format="txt"/> - <when input="out_format" value="0 -html" format="html"/> - <when input="out_format" value="2" format="txt"/> - <when input="out_format" value="2 -html" format="html"/> - <when input="out_format" value="4" format="txt"/> - <when input="out_format" value="4 -html" format="html"/> - <when input="out_format" value="5" format="blastxml"/> - </change_format> + + <expand macro="output_change_format" /> + </data> </outputs> <help>
--- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,12 +1,15 @@ -<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.4"> +<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.21"> <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism> <requirements> <requirement type="binary">rpstblastn</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> </requirements> <version_command>rpstblastn -version</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -41,57 +44,16 @@ ## End of advanced options: #end if </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- In case the return code has not been set propery check stderr too --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + <expand macro="stdio" /> <inputs> <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> - <conditional name="db_opts"> - <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)"> - <option value="db" selected="True">Locally installed BLAST database</option> - <!-- TODO - define new datatype - <option value="histdb">BLAST protein domain database from your history</option> - --> - </param> - <when value="db"> - <param name="database" type="select" label="Protein domain database"> - <options from_file="blastdb_d.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="hidden" value="" /> - </when> - <!-- TODO - define new datatype - <when value="histdb"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="data" format="blastdbd" label="Protein domain database" /> - <param name="subject" type="hidden" value="" /> - </when> - --> - </conditional> + + <expand macro="input_conditional_pssm" /> + <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> - <param name="out_format" type="select" label="Output format"> - <option value="6">Tabular (standard 12 columns)</option> - <option value="ext" selected="True">Tabular (extended 24 columns)</option> - <option value="5">BLAST XML</option> - <option value="0">Pairwise text</option> - <option value="0 -html">Pairwise HTML</option> - <option value="2">Query-anchored text</option> - <option value="2 -html">Query-anchored HTML</option> - <option value="4">Flat query-anchored text</option> - <option value="4 -html">Flat query-anchored HTML</option> - <!-- - <option value="-outfmt 11">BLAST archive format (ASN.1)</option> - --> - </param> + + <expand macro="input_out_format" /> + <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> <option value="basic" selected="True">Hide Advanced Options</option> @@ -115,15 +77,7 @@ </inputs> <outputs> <data name="output1" format="tabular" label="rpstblastn on ${on_string}"> - <change_format> - <when input="out_format" value="0" format="txt"/> - <when input="out_format" value="0 -html" format="html"/> - <when input="out_format" value="2" format="txt"/> - <when input="out_format" value="2 -html" format="html"/> - <when input="out_format" value="4" format="txt"/> - <when input="out_format" value="4 -html" format="html"/> - <when input="out_format" value="5" format="blastxml"/> - </change_format> + <expand macro="output_change_format" /> </data> </outputs> <help>
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,12 +1,15 @@ -<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.20"> +<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.21"> <description>Search translated nucleotide database with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> <requirements> <requirement type="binary">tblastn</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> </requirements> <version_command>tblastn -version</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -46,59 +49,16 @@ ## End of advanced options: #end if </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- In case the return code has not been set propery check stderr too --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + + <expand macro="stdio" /> + <inputs> <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> - <conditional name="db_opts"> - <param name="db_opts_selector" type="select" label="Subject database/sequences"> - <option value="db" selected="True">Locally installed BLAST database</option> - <option value="histdb">BLAST database from your history</option> - <option value="file">FASTA file from your history (see warning note below)</option> - </param> - <when value="db"> - <param name="database" type="select" label="Nucleotide BLAST database"> - <options from_file="blastdb.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="hidden" value="" /> - </when> - <when value="histdb"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" /> - <param name="subject" type="hidden" value="" /> - </when> - <when value="file"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> - </when> - </conditional> + + <expand macro="input_conditional_nucleotide_db" /> + <expand macro="input_out_format" /> <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> - <param name="out_format" type="select" label="Output format"> - <option value="6">Tabular (standard 12 columns)</option> - <option value="ext" selected="True">Tabular (extended 24 columns)</option> - <option value="5">BLAST XML</option> - <option value="0">Pairwise text</option> - <option value="0 -html">Pairwise HTML</option> - <option value="2">Query-anchored text</option> - <option value="2 -html">Query-anchored HTML</option> - <option value="4">Flat query-anchored text</option> - <option value="4 -html">Flat query-anchored HTML</option> - <!-- - <option value="-outfmt 11">BLAST archive format (ASN.1)</option> - --> - </param> + <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> <option value="basic" selected="True">Hide Advanced Options</option> @@ -106,39 +66,14 @@ </param> <when value="basic" /> <when value="advanced"> - <param name="db_gencode" type="select" label="Database/subject genetic code"> - <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details --> - <option value="1" select="True">1. Standard</option> - <option value="2">2. Vertebrate Mitochondrial</option> - <option value="3">3. Yeast Mitochondrial</option> - <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> - <option value="5">5. Invertebrate Mitochondrial</option> - <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> - <option value="9">9. Echinoderm Mitochondrial</option> - <option value="10">10. Euplotid Nuclear</option> - <option value="11">11. Bacteria and Archaea</option> - <option value="12">12. Alternative Yeast Nuclear</option> - <option value="13">13. Ascidian Mitochondrial</option> - <option value="14">14. Flatworm Mitochondrial</option> - <option value="15">15. Blepharisma Macronuclear</option> - <option value="16">16. Chlorophycean Mitochondrial Code</option> - <option value="21">21. Trematode Mitochondrial Code</option> - <option value="22">22. Scenedesmus obliquus mitochondrial Code</option> - <option value="23">23. Thraustochytrium Mitochondrial Code</option> - <option value="24">24. Pterobranchia mitochondrial code</option> - </param> + + <expand macro="input_db_gencode" /> + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" /> - <param name="matrix" type="select" label="Scoring matrix"> - <option value="BLOSUM90">BLOSUM90</option> - <option value="BLOSUM80">BLOSUM80</option> - <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> - <option value="BLOSUM50">BLOSUM50</option> - <option value="BLOSUM45">BLOSUM45</option> - <option value="PAM250">PAM250</option> - <option value="PAM70">PAM70</option> - <option value="PAM30">PAM30</option> - </param> + + <expand macro="input_scoring_matrix" /> + <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> <validator type="in_range" min="0" /> @@ -159,15 +94,7 @@ </inputs> <outputs> <data name="output1" format="tabular" label="tblastn on ${on_string}"> - <change_format> - <when input="out_format" value="0" format="txt"/> - <when input="out_format" value="0 -html" format="html"/> - <when input="out_format" value="2" format="txt"/> - <when input="out_format" value="2 -html" format="html"/> - <when input="out_format" value="4" format="txt"/> - <when input="out_format" value="4 -html" format="html"/> - <when input="out_format" value="5" format="blastxml"/> - </change_format> + <expand macro="output_change_format" /> </data> </outputs> <tests>
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,12 +1,15 @@ -<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.20"> +<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.21"> <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> <requirements> <requirement type="binary">tblastx</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> + <requirement type="package" version="2.2.27">blast+</requirement> </requirements> <version_command>tblastx -version</version_command> + <macros> + <import>ncbi_macros.xml</import> + </macros> <command> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -46,80 +49,18 @@ ## End of advanced options: #end if </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- In case the return code has not been set propery check stderr too --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + + <expand macro="stdio" /> + <inputs> <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> - <conditional name="db_opts"> - <param name="db_opts_selector" type="select" label="Subject database/sequences"> - <option value="db" selected="True">Locally installed BLAST database</option> - <option value="histdb">BLAST database from your history</option> - <option value="file">FASTA file from your history (see warning note below)</option> - </param> - <when value="db"> - <param name="database" type="select" label="Nucleotide BLAST database"> - <options from_file="blastdb.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> - </param> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="hidden" value="" /> - </when> - <when value="histdb"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" /> - <param name="subject" type="hidden" value="" /> - </when> - <when value="file"> - <param name="database" type="hidden" value="" /> - <param name="histdb" type="hidden" value="" /> - <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> - </when> - </conditional> - <param name="query_gencode" type="select" label="Query genetic code"> - <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details --> - <option value="1" select="True">1. Standard</option> - <option value="2">2. Vertebrate Mitochondrial</option> - <option value="3">3. Yeast Mitochondrial</option> - <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> - <option value="5">5. Invertebrate Mitochondrial</option> - <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> - <option value="9">9. Echinoderm Mitochondrial</option> - <option value="10">10. Euplotid Nuclear</option> - <option value="11">11. Bacteria and Archaea</option> - <option value="12">12. Alternative Yeast Nuclear</option> - <option value="13">13. Ascidian Mitochondrial</option> - <option value="14">14. Flatworm Mitochondrial</option> - <option value="15">15. Blepharisma Macronuclear</option> - <option value="16">16. Chlorophycean Mitochondrial Code</option> - <option value="21">21. Trematode Mitochondrial Code</option> - <option value="22">22. Scenedesmus obliquus mitochondrial Code</option> - <option value="23">23. Thraustochytrium Mitochondrial Code</option> - <option value="24">24. Pterobranchia mitochondrial code</option> - </param> + + <expand macro="input_conditional_nucleotide_db" /> + <expand macro="input_query_gencode" /> <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> - <param name="out_format" type="select" label="Output format"> - <option value="6">Tabular (standard 12 columns)</option> - <option value="ext" selected="True">Tabular (extended 24 columns)</option> - <option value="5">BLAST XML</option> - <option value="0">Pairwise text</option> - <option value="0 -html">Pairwise HTML</option> - <option value="2">Query-anchored text</option> - <option value="2 -html">Query-anchored HTML</option> - <option value="4">Flat query-anchored text</option> - <option value="4 -html">Flat query-anchored HTML</option> - <!-- - <option value="-outfmt 11">BLAST archive format (ASN.1)</option> - --> - </param> + + <expand macro="input_out_format" /> + <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> <option value="basic" selected="True">Hide Advanced Options</option> @@ -127,27 +68,9 @@ </param> <when value="basic" /> <when value="advanced"> - <param name="db_gencode" type="select" label="Database/subject genetic code"> - <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details --> - <option value="1" select="True">1. Standard</option> - <option value="2">2. Vertebrate Mitochondrial</option> - <option value="3">3. Yeast Mitochondrial</option> - <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> - <option value="5">5. Invertebrate Mitochondrial</option> - <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> - <option value="9">9. Echinoderm Mitochondrial</option> - <option value="10">10. Euplotid Nuclear</option> - <option value="11">11. Bacteria and Archaea</option> - <option value="12">12. Alternative Yeast Nuclear</option> - <option value="13">13. Ascidian Mitochondrial</option> - <option value="14">14. Flatworm Mitochondrial</option> - <option value="15">15. Blepharisma Macronuclear</option> - <option value="16">16. Chlorophycean Mitochondrial Code</option> - <option value="21">21. Trematode Mitochondrial Code</option> - <option value="22">22. Scenedesmus obliquus mitochondrial Code</option> - <option value="23">23. Thraustochytrium Mitochondrial Code</option> - <option value="24">24. Pterobranchia mitochondrial code</option> - </param> + + <expand macro="input_db_gencode" /> + <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" /> <param name="strand" type="select" label="Query strand(s) to search against database/subject"> @@ -155,16 +78,7 @@ <option value="-strand plus">Plus (forward)</option> <option value="-strand minus">Minus (reverse complement)</option> </param> - <param name="matrix" type="select" label="Scoring matrix"> - <option value="BLOSUM90">BLOSUM90</option> - <option value="BLOSUM80">BLOSUM80</option> - <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> - <option value="BLOSUM50">BLOSUM50</option> - <option value="BLOSUM45">BLOSUM45</option> - <option value="PAM250">PAM250</option> - <option value="PAM70">PAM70</option> - <option value="PAM30">PAM30</option> - </param> + <expand macro="input_scoring_matrix" /> <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> <validator type="in_range" min="0" /> @@ -179,15 +93,7 @@ </inputs> <outputs> <data name="output1" format="tabular" label="tblastx on ${on_string}"> - <change_format> - <when input="out_format" value="0" format="txt"/> - <when input="out_format" value="0 -html" format="html"/> - <when input="out_format" value="2" format="txt"/> - <when input="out_format" value="2 -html" format="html"/> - <when input="out_format" value="4" format="txt"/> - <when input="out_format" value="4 -html" format="html"/> - <when input="out_format" value="5" format="blastxml"/> - </change_format> + <expand macro="output_change_format" /> </data> </outputs> <tests>
--- a/tools/ncbi_blast_plus/repository_dependencies.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/repository_dependencies.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,4 +1,4 @@ <?xml version="1.0"?> <repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format)."> - <repository changeset_revision="a44a7a5456e1" name="blast_datatypes" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="7ceb2ae30ff4" name="blast_datatypes" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </repositories>
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Mon Sep 23 06:13:22 2013 -0400 +++ b/tools/ncbi_blast_plus/tool_dependencies.xml Tue Oct 08 05:08:26 2013 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="blast+" version="2.2.26+"> - <repository changeset_revision="c85236cd100d" name="package_blast_plus_2_2_26" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <package name="blast+" version="2.2.27"> + <repository changeset_revision="eab09bc4d63e" name="package_blast_plus_2_2_27" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>