# HG changeset patch
# User peterjc
# Date 1392892788 18000
# Node ID 22b7cdcf49603e8626ee5a7d15e961df8d947b5f
# Parent 2f7fac29bb3c62143204845cd0e344522aa6bbda
Uploaded v0.1.0 preview 2, includes missing new test files
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/blastn_rhodopsin_vs_three_human.columns.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human.columns.tabular Thu Feb 20 05:39:48 2014 -0500
@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 4301 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 4301 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 4301 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 4301 1213
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 983 1213
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1047 1213
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/blastn_rhodopsin_vs_three_human.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human.xml Thu Feb 20 05:39:48 2014 -0500
@@ -0,0 +1,549 @@
+
+
+
+ blastn
+ BLASTN 2.2.29+
+ Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), "A greedy algorithm for aligning DNA sequences", J Comput Biol 2000; 7(1-2):203-14.
+
+ Query_1
+ gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA
+ 1047
+
+
+ 1e-40
+ 1
+ -2
+ 0
+ 0
+ L;m;
+
+
+
+
+ 1
+ Query_1
+ gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA
+ 1047
+
+
+
+
+ 0
+ 0
+ 15
+ 4933992
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 2
+ Query_1
+ gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA
+ 1047
+
+
+
+
+ 0
+ 0
+ 15
+ 4933992
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 3
+ Query_1
+ gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA
+ 1047
+
+
+ 1
+ Subject_3
+ ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds
+ Subject_3
+ 1213
+
+
+ 1
+ 1474.75
+ 798
+ 0
+ 1
+ 1047
+ 88
+ 1134
+ 1
+ 1
+ 964
+ 964
+ 0
+ 1047
+ ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCCTTCTCCAACAAAACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTCCTGCTCATCGTGCTTGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACGGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCTGACCTCTTCATGGTCTTCGGTGGCTTCACCACCACCCTCTACACCTCTCTGCATGGATACTTTGTCTTTGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACACTGGGCGGTGAAATTGCCCTGTGGTCTTTGGTGGTCCTGGCCATTGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCATGCCATAATGGGCGTCGCTTTCACCTGGGTCATGGCACTGGCCTGCGCTGCACCCCCCCTCGTTGGTTGGTCCAGGTACATCCCTGAAGGCATGCAGTGTTCATGCGGGATCGACTACTACACACTCAAGCCAGAAGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATCGTCATCTTCTTTTGCTACGGGCAGCTTGTCTTCACAGTCAAGGAGGCGGCAGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCTGAGAAGGAGGTCACTCGCATGGTCATCATCATGGTCATTGCTTTCCTGATCTGTTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGGTCCAACTTTGGCCCCATCTTCATGACACTCCCGGCGTTCTTCGCAAAGTCCTCCTCCATCTACAACCCTGTCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCCACTGGGTGATGACGAGGCTTCCACAACCGGTTCCAAGACGGAGACCAGCCAGGTGGCACCGGCCTAA
+ ATGAATGGCACAGAAGGCCCTAACTTCTACGTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGATGAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAA
+ ||||| || || || ||||| ||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ||||| |||||||| ||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||| | ||||||||||||| |||||||||||||||||||||||||||||| ||||| |||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||| ||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||| ||||||||||||||||| |||||||| || ||||||||||||||||| ||||||||||| |||||||| |||| || ||||||||||||||||| || ||| ||||||| || || || |||||||||||||| |||||||| || |||||||||||||| |||||||||||||||||||||||||||||||||||||||||||| ||||||| || ||||| |||||||| |||||||| ||||||||||| || |||||||||||||||||||||||||| |||||||| |||||||||||||| ||||||||||||||||||||||| |||||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||| |||||||| || |||||||||||||| |||| |||||||| || ||| | || |||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||| ||| ||||||| |||||||||||||||||||| || ||||| || | |||| |||||||||||||| ||||||||||| |||||||||
+
+
+
+
+
+
+ 0
+ 0
+ 15
+ 4933992
+ 0.46
+ 1.28
+ 0.85
+
+
+
+
+ 4
+ Query_2
+ gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds
+ 1574
+
+
+
+
+ 0
+ 0
+ 15
+ 7453579
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 5
+ Query_2
+ gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds
+ 1574
+
+
+
+
+ 0
+ 0
+ 15
+ 7453579
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 6
+ Query_2
+ gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds
+ 1574
+
+
+
+
+ 0
+ 0
+ 15
+ 7453579
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 7
+ Query_3
+ gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds
+ 4301
+
+
+
+
+ 0
+ 0
+ 16
+ 20482300
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 8
+ Query_3
+ gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds
+ 4301
+
+
+
+
+ 0
+ 0
+ 16
+ 20482300
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 9
+ Query_3
+ gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds
+ 4301
+
+
+ 1
+ Subject_3
+ ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds
+ Subject_3
+ 1213
+
+
+ 1
+ 460.936
+ 249
+ 3.59583e-132
+ 1
+ 333
+ 118
+ 450
+ 1
+ 1
+ 305
+ 305
+ 0
+ 333
+ GTGCCCTTCTCCAACAAGACAGGCGTGGTGCGCAGTCCCTTCGAGCATCCACAGTACTACCTGGCCGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTCGGCTTCCCCATCAACTTCCTCACGCTCTATGTCACGGTTCAGCACAAGAAGCTGCGTACGCCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCCGACCTCTTCATGGTCTTCGGAGGCTTCACCACCACCCTCTACACCTCCCTGCATGGATACTTTGTCTTCGGGCCTACGGGATGCAATCTGGAGGGCTTTTTTGCCACCCTGGGAGGT
+ GTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGT
+ |||||||||||||| ||| || ||||| ||||| ||||||||| | ||||||||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||||||||| ||||||||||||||||||||||||||||| ||||| || ||||||||||||||||| ||||||||||||||||||||||||||||| |||||||| ||||||||||||||| | || |||||||||| ||||||||||||||| |||||||||||||| ||||||||||| || ||||||||| |||||||||| |||||||||||||| |||
+
+
+ 2
+ 331.671
+ 179
+ 2.94161e-93
+ 3127
+ 3368
+ 782
+ 1023
+ 1
+ 1
+ 222
+ 222
+ 2
+ 243
+ AGGCAGCTGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCCGAGAAGGAGGTCACCCGTATGGTCATCATCATGGTCATTGCTTTCCTAATCTGTTGGCTGCCGTATGCCGGCGTGGCATTCTACATCTTCACCCACCAGGGCTCTAACTTTGGCCCCATCTTCATGACCCTCCCGGCATTCTTTGCCAAG-TCGTCCTCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAG
+ AGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCG-CCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAG
+ |||| ||||||||||||||||||||||||||||| |||||||| ||||||||||||||||| |||||||||||||||||||| |||||||| ||||| ||| |||| || ||| |||||||||||||||||||||||||||||||||| ||||| || ||||||||||||||| |||| || |||||||||||| || || |||||||||||||||||||||||||||||||||||||||||
+
+
+ 3
+ 265.191
+ 143
+ 3.02604e-73
+ 1410
+ 1582
+ 448
+ 620
+ 1
+ 1
+ 163
+ 163
+ 0
+ 173
+ GGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTATGCAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCCTTGCCCTCACCTGGGTCATGGCACTGGCCTGCGCCGCGCCCCCGCTAGTCGGCTGGTCCAGGTA
+ GGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTA
+ |||||||||||||||||||||||||||||||||||||||||||||||||||||||| || ||||||||||||||||||||||||||||||||||| |||||||||||| ||||| |||||||||||||||| |||||||||||||| ||||| || | |||||||||||||||
+
+
+ 4
+ 248.571
+ 134
+ 3.04752e-68
+ 2854
+ 3023
+ 615
+ 784
+ 1
+ 1
+ 158
+ 158
+ 0
+ 170
+ CAGGTACATCCCAGAGGGCATGCAGTGCTCATGTGGAATCGACTACTACACCCTCAAGCCGGAGGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCTATGATTGTCATATTCTTTTGCTATGGACAGCTGGTCTTCACCGTCAAGGAGG
+ CAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGG
+ |||||||||||| |||||| |||||||||| |||||||||||||||||||| |||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||| |||||| |||| || || |||||||| ||||| |||||||||||||||||||
+
+
+
+
+
+
+ 0
+ 0
+ 16
+ 20482300
+ 0.46
+ 1.28
+ 0.85
+
+
+
+
+ 10
+ Query_4
+ gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds
+ 983
+
+
+
+
+ 0
+ 0
+ 15
+ 4628008
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 11
+ Query_4
+ gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds
+ 983
+
+
+
+
+ 0
+ 0
+ 15
+ 4628008
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 12
+ Query_4
+ gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds
+ 983
+
+
+ 1
+ Subject_3
+ ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds
+ Subject_3
+ 1213
+
+
+ 1
+ 1323.32
+ 716
+ 0
+ 1
+ 959
+ 118
+ 1076
+ 1
+ 1
+ 878
+ 878
+ 0
+ 959
+ GTGCCCTTCTCCAACAAGACGGGTGTGGTGCGCAGCCCCTTCGAGTACCCGCAGTACTACCTGGCTGAGCCCTGGCAGTTCTCCATGCTGGCTGCCTACATGTTTCTGCTGATCGTGCTCGGATTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCTGTGGCCAACCTCTTCATGGTCTTTGGAGGCTTCACCACCACCCTGTATACCTCTATGCATGGATACTTCGTCTTCGGGGCCACGGGATGCAATCTGGAGGGCTTCTTTGCCACGCTGGGCGGTGAAATCGCCCTGTGGTCCCTGGTGGTCCTGGCCATCGAGCGGTATGTGGTGGTCTGCAAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCACGCCATCATGGGCCTCGCCTTCACGTGGGTCATGGCACTGGCCTGCGCTGCACCCCCACTAGCCGGCTGGTCCAGGTACATCCCAGAGGGCATGCAGTGCTCGTGTGGGATTGACTACTACACGCTCAAACCGGAGGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTGTCATTTTCTTCTGCTACGGACAGCTGGTGTTCACAGTGAAGGAGGCGGCTGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCCGAGAAGGAAGTCACGCGCATGGTCATCATCATGGTCGTTGCGTTCCTAATCTGTTGGCTGCCCTACGCCAGCGTGGCATTCTACATCTTTACCCACCAGGGCTCTAACTTTGGCCCTGTCTTCATGACCATCCCGGCATTCTTCGCCAAGTCATCCTCCATCTACAACCCGGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCCTCTGCTGTGGCAAGAACCCACTGGGTGA
+ GTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGA
+ |||||||||||||| |||||||||||| |||||||||||||||||||| |||||||||||||||||||| |||||||||||||||||||| |||||||||||||||||||||||||| || ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| || ||||| |||||||||||||| | || |||||||||| |||||| || |||||| ||||||||||||||||||||||| |||| ||||||||| ||||||||||||||||||| |||||||||||||| |||||||||||| ||||||||||||||||||||||||| |||||||| || ||||||||||||||||||||||| ||||||||||| |||||||||||| | |||||||| ||||||||||| ||||||||||| ||||||||||| ||||||||||||||||||||||| |||||| |||||||||||||||| || ||||||||||||||||| |||||||||||||||||||| || |||||||||||||||||||||||||||||||||||||||||| |||| || |||||||| || ||||| || ||||| || |||||||| ||||||||||||||||||||||||||||| |||||||| |||||||| ||||| ||||||||||||||||||||| | || ||||| ||||| ||| ||||||||||||||||||||||||||||||| |||||||||||||| ||||| || || |||||||||||||||| || ||||| |||||| || ||||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||||||| ||||||| ||||||||||||||||||||
+
+
+
+
+
+
+ 0
+ 0
+ 15
+ 4628008
+ 0.46
+ 1.28
+ 0.85
+
+
+
+
+ 13
+ Query_5
+ gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds
+ 1047
+
+
+
+
+ 0
+ 0
+ 15
+ 4933992
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 14
+ Query_5
+ gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds
+ 1047
+
+
+
+
+ 0
+ 0
+ 15
+ 4933992
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 15
+ Query_5
+ gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds
+ 1047
+
+
+ 1
+ Subject_3
+ ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds
+ Subject_3
+ 1213
+
+
+ 1
+ 1208.83
+ 654
+ 0
+ 1
+ 1047
+ 88
+ 1134
+ 1
+ 1
+ 917
+ 917
+ 2
+ 1048
+ ATGAACGGGACCGAGGGCCCAAACTTCTACGTGCCTTTCTCCAACAAGACGGGCGTCGTACGCAGCCCCTTCGAGGCGCCGCAGTACTACCTGGCTGAGCCATGGCAGTTCAGCATGCTGGCCGCCTACATGTTCCTGCTGATCATGCTTGGCTTCCCCATCAACTTCCTCACGCTGTACGTCACAGTCCAGCACAAGAAGCTGAGGACCCCCCTCAACTACATCCTGCTCAACCTGGCCGTGGCAGATCTCTTCATGGTGTTCGGGGGCTTCACCACCACCCTGTATACCTCTCTGCACGGGTACTTCGTGTTCGGTCCGACGGGCTGCAACCTCGAGGGCTTCTTTGCCACCTTAGGCGGTGAAATTGCACTGTGGTCCTTGGTGGTGCTAGCCATCGAGCGGTACGTAGTGGTGTGCAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCGTCGCATTCACCTGGGTCATGGCTCTGGCCTGTGCGGCCCCCCCCCTCGTCGGCTGGTCTAGATACATCCCGGAGGGGATGCAGTGCTCGTGCGGGATCGATTACTACACGCCCCACGAGGAGACCAACAATGAGTCGTTCGTCATCTACATGTTCGTTGTACACTTCATCATCCCCCTGATTGTCATATTCTTCTGCTACGGGCAGCTGGTCTTCACCGTCAAGGAGGCTGCAGCCCAGCAGCAGGAGTCGGCCACCACTCAGAAGGCCGAGAAGGAGGTCACGCGTATGGTCATCATCATGGTCATCGCTTTCCTCATATGCTGGCTGCCCTACGCAGGTGTGGCGTTCTACATCTTCACCCATCAGGGATCCGACTTTGGCCCCATCTTCATGACCATCCCGGCTTTCTTTGCCAAGA-CGTCTGCCGTCTATAACCCCGTCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGGTCACCACTCTCTGCTGTGGCAAGAACCCCCTAGGTGACGACGAGGCCTCCACGACCGTGTCCAAGACAGAGACCAGCCAAGTGGCCCCTGCCTAA
+ ATGAATGGCACAGAAGGCCCTAACTTCTACGTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCC-GCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGATGAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAA
+ ||||| || || || ||||| |||||||||||||| |||||||| |||||| || |||||||||||||||||| || |||||||||||||||||||||||||||||| ||||||||||||||||||||| ||||||||| |||| |||||||||||||||||||||||||| |||||||| |||||||||||||||||| | || || ||||||||||||||||||||||| |||||||| || ||||||||||| | || |||||||||| |||||| || ||||||||||| || |||||||| ||||| || || || ||||| | |||||||||||||||||| | |||||||||||||| ||||||||||||||||| || ||||||||||||||||| |||||||| ||||||||||||||||||||||||||||||||||| |||||||||||||| || ||||||||||||||||| |||||||| || || ||||| |||| ||||||||| || |||||||| ||||| ||||||||||||| || ||||| |||||||||| | | |||| |||||| ||||| || ||||||||||||||||| || ||||||| ||||||| ||||| |||| || |||||||| |||||||| |||||||||||||||||||| || ||||||||||||||||| |||||||| |||||||| |||||||||||||| || ||||||||||||||||||||||||||||| || |||||| |||||||||| | ||||| ||||||||||||||||| ||||| ||| |||| || |||||||||||||||||||| || ||||||||||||| || | ||| |||| ||||| |||||||| ||||||||||||||||||||||||||||||||| ||||||| ||||||| ||||||||||| || |||||||| |||||||| | |||||||||||||| ||||| ||||| |||||||| ||||||
+
+
+
+
+
+
+ 0
+ 0
+ 15
+ 4933992
+ 0.46
+ 1.28
+ 0.85
+
+
+
+
+ 16
+ Query_6
+ gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds
+ 1344
+
+
+
+
+ 0
+ 0
+ 15
+ 6353949
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 17
+ Query_6
+ gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds
+ 1344
+
+
+
+
+ 0
+ 0
+ 15
+ 6353949
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+ 18
+ Query_6
+ gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds
+ 1344
+
+
+
+
+ 0
+ 0
+ 15
+ 6353949
+ 0.46
+ 1.28
+ 0.85
+
+
+ No hits found
+
+
+
+
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.log Thu Feb 20 05:39:48 2014 -0500
@@ -0,0 +1,10 @@
+
+
+Building a new DB, current time: 02/10/2014 18:40:09
+New DB name: four_human_proteins_taxid.fasta
+New DB title: Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
+Adding sequences from FASTA; added 4 sequences in 0.00230002 seconds.
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.phd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.phd Thu Feb 20 05:39:48 2014 -0500
@@ -0,0 +1,4 @@
+11117184492
+29249033410
+36665887501
+5392473183
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.phi
Binary file test-data/four_human_proteins_taxid.fasta.phi has changed
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.phr
Binary file test-data/four_human_proteins_taxid.fasta.phr has changed
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.pin
Binary file test-data/four_human_proteins_taxid.fasta.pin has changed
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.pog
Binary file test-data/four_human_proteins_taxid.fasta.pog has changed
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.psd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.psd Thu Feb 20 05:39:48 2014 -0500
@@ -0,0 +1,4 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
+gnl|bl_ord_id|33
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.psi
Binary file test-data/four_human_proteins_taxid.fasta.psi has changed
diff -r 2f7fac29bb3c -r 22b7cdcf4960 test-data/four_human_proteins_taxid.fasta.psq
Binary file test-data/four_human_proteins_taxid.fasta.psq has changed
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Wed Jan 15 05:38:14 2014 -0500
+++ b/tools/ncbi_blast_plus/README.rst Thu Feb 20 05:39:48 2014 -0500
@@ -25,17 +25,7 @@
(``blastxml``) and protein and nucleotide BLAST databases (``blastdbp`` and
``blastdbn``).
-You must tell Galaxy about any system level BLAST databases using configuration
-files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
-databases like NR), and blastdb_d.loc (protein domain databases like CDD or
-SMART) which are located in the tool-data/ folder. Sample files are included
-which explain the tab-based format to use.
-
-You can download the NCBI provided databases as tar-balls from here:
-
-* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR)
-* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD)
-
+See the configuration notes below.
Manual Installation
===================
@@ -78,6 +68,31 @@
./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools
+Configuration
+=============
+
+You must tell Galaxy about any system level BLAST databases using configuration
+files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
+databases like NR), and blastdb_d.loc (protein domain databases like CDD or
+SMART) which are located in the tool-data/ folder. Sample files are included
+which explain the tab-based format to use.
+
+You can download the NCBI provided databases as tar-balls from here:
+
+* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR)
+* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD)
+
+The BLAST+ binaries support multi-threaded operation, which is handled via the
+$GALAXY_SLOTS environment variable. This should be set automatically by Galaxy
+via your job runner settings, which allows you to (for example) allocate four
+cores to each BLAST job.
+
+In addition, the BLAST+ wrappers also support high level parallelism by task
+splitting if ``use_tasked_jobs = True`` is enabled in your ``universe_wsgi.ini``
+configuration file. Essentially, the FASTA input query files are broken up into
+batches of 1000 sequences, a separate BLAST child job is run for each chunk,
+and then the BLAST output files are merged (in order). This is transparent
+for the end user.
History
=======
@@ -105,7 +120,7 @@
(all too often our users where having to re-run searches just to
get one of the missing columns like query or subject length)
v0.0.18 - Defensive quoting of filenames in case of spaces (where possible,
- BLAST+ handling of some mult-file arguments is problematic).
+ BLAST+ handling of some multi-file arguments is problematic).
v0.0.19 - Added wrappers for rpsblast and rpstblastn, and new blastdb_d.loc
for the domain databases they use (e.g. CDD, PFAM or SMART).
- Correct case of exception regular expression (for error handling
@@ -139,6 +154,7 @@
- Tablar output now includes option to pick specific columns
- BLAST XML to tabular tool supports multiple input files.
- More detailed descriptions for BLASTN and BLASTP task option
+ - Supports setting a taxonomy ID in makeblastdb wrapper.
======= ======================================================================
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/blastxml_to_tabular.py
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Wed Jan 15 05:38:14 2014 -0500
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Thu Feb 20 05:39:48 2014 -0500
@@ -66,7 +66,7 @@
from optparse import OptionParser
if "-v" in sys.argv or "--version" in sys.argv:
- print "v0.0.23"
+ print "v0.1.00"
sys.exit(0)
if sys.version_info[:2] >= ( 2, 5 ):
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/blastxml_to_tabular.xml
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Wed Jan 15 05:38:14 2014 -0500
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Thu Feb 20 05:39:48 2014 -0500
@@ -28,33 +28,33 @@
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Wed Jan 15 05:38:14 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Thu Feb 20 05:39:48 2014 -0500
@@ -83,13 +83,14 @@
**What it does**
-This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST algorithm.
+This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST_ algorithm.
If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_.
.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+.. _DUST: http://www.ncbi.nlm.nih.gov/pubmed/16796549
**References**
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/ncbi_macros.xml
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Wed Jan 15 05:38:14 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml Thu Feb 20 05:39:48 2014 -0500
@@ -31,33 +31,33 @@
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Jan 15 05:38:14 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Feb 20 05:39:48 2014 -0500
@@ -29,7 +29,10 @@
##Would default to being based on the cryptic Galaxy filenames, which is unhelpful
-title "BLAST Database"
#end if
--dbtype $dbtype
+-dbtype $dbtype
+## --------------------------------------------------------------------
+## Masking
+## --------------------------------------------------------------------
#set $mask_string = ''
#set $sep = '-mask_data '
#for $i in $mask_data
@@ -44,11 +47,15 @@
## #set $sep = ','
## #end for
## $gi_mask_string
-## #if $tax.select == 'id':
-## -taxid $tax.id
-## #else if $tax.select == 'map':
-## -taxid_map $tax.map
-## #end if
+## --------------------------------------------------------------------
+## Taxonomy
+## --------------------------------------------------------------------
+#if $tax.taxselect == 'id':
+-taxid $tax.taxid
+## TODO - Can we use a tabular file for the taxonomy mapping?
+## #else if $tax.taxselect == 'map':
+## -taxid_map $tax.taxmap
+#end if
## --------------------------------------------------------------------
## Capture the stdout log information to the primary file (plain text):
>> "$outfile"
@@ -83,23 +90,25 @@
-->
-
-
+
+
- -->
@@ -112,6 +121,8 @@
@@ -130,6 +141,25 @@
+
+
+
+
+
+
+
+
+
+
**What it does**
diff -r 2f7fac29bb3c -r 22b7cdcf4960 tools/ncbi_blast_plus/tool_dependencies.xml
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Wed Jan 15 05:38:14 2014 -0500
+++ b/tools/ncbi_blast_plus/tool_dependencies.xml Thu Feb 20 05:39:48 2014 -0500
@@ -1,6 +1,6 @@
-
+