# HG changeset patch # User peterjc # Date 1400172849 14400 # Node ID a96608a125fb750a3b9ae80873c86b875b7c1985 Uploaded v0.1.0, first release diff -r 000000000000 -r a96608a125fb test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular Thu May 15 12:54:09 2014 -0400 @@ -0,0 +1,2 @@ +#A_id B_id A_vs_B B_vs_A +ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1514 1514 diff -r 000000000000 -r a96608a125fb test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular Thu May 15 12:54:09 2014 -0400 @@ -0,0 +1,2 @@ +#A_id B_id A_vs_B B_vs_A +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1474 1474 diff -r 000000000000 -r a96608a125fb test-data/rbh_none.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rbh_none.tabular Thu May 15 12:54:09 2014 -0400 @@ -0,0 +1,1 @@ +#A_id B_id A_vs_B B_vs_A diff -r 000000000000 -r a96608a125fb test-data/rhodopsin_nucs.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rhodopsin_nucs.fasta Thu May 15 12:54:09 2014 -0400 @@ -0,0 +1,161 @@ +>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA +ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCCTTCTCCAACAAAACGGGTGTGGTACGCAGCCCCT +TCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTCCTGCT +CATCGTGCTTGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACGGTCCAGCACAAGAAGCTGCGCACG +CCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCTGACCTCTTCATGGTCTTCGGTGGCTTCACCACCA +CCCTCTACACCTCTCTGCATGGATACTTTGTCTTTGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGC +CACACTGGGCGGTGAAATTGCCCTGTGGTCTTTGGTGGTCCTGGCCATTGAGCGGTACGTGGTGGTGTGT +AAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCATGCCATAATGGGCGTCGCTTTCACCTGGGTCATGG +CACTGGCCTGCGCTGCACCCCCCCTCGTTGGTTGGTCCAGGTACATCCCTGAAGGCATGCAGTGTTCATG +CGGGATCGACTACTACACACTCAAGCCAGAAGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTC +CACTTCACCATCCCCATGATCGTCATCTTCTTTTGCTACGGGCAGCTTGTCTTCACAGTCAAGGAGGCGG +CAGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCTGAGAAGGAGGTCACTCGCATGGTCATCATCAT +GGTCATTGCTTTCCTGATCTGTTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGG +TCCAACTTTGGCCCCATCTTCATGACACTCCCGGCGTTCTTCGCAAAGTCCTCCTCCATCTACAACCCTG +TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCC +ACTGGGTGATGACGAGGCTTCCACAACCGGTTCCAAGACGGAGACCAGCCAGGTGGCACCGGCCTAA + +>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds +TCTTTCTAGTTTGGGGGGGGGGACTTTAAAGAGCCGCCAATATGAACGGAACAGAAGGCCCAAACTTTTA +CATACCCATGTCCAACAAGACTGGGGTGGTGCGAAGCCCCTTTGAATACCCTCAGTATTACCTGGCAGAG +CCATGGCAATATTCCATTCTGTGCGCGTACATGTTCCTGCTCATTCTACTTGGGTTCCCAATCAACTTCA +TGACCTTGTACGTCACCATCCAGCACAAGAAGCTCCGGACACCCTTAAACTATATCCTGCTGAATTTGGC +CTTTGCCAACCACTTCATGGTCCTGTGTGGATTCACGGTGACAATGTACTCCTCAATGAACGGATACTTC +ATCCTCGGAGCCACCGGTTGCTATGTTGAAGGCTTCTTCGCTACCCTTGGTGGTGAAATCGCCCTTTGGT +CCCTGGTGGTCTTGGCCATTGAACGATACGTGGTCGTCTGTAAGCCCATGAGCAACTTCCGATTTAGTGA +GAACCATGCCGTCATGGGCGTAGCGTTCACCTGGATAATGGCTTTGTCCTGTGCTGTTCCTCCACTCCTT +GGATGGTCCAGGTACATCCCCGAGGGCATGCAGTGCTCCTGCGGAGTCGACTACTACACCCTGAAGCCCG +AGGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTCGTCCACTTCACCATCCCCCTGATTATCATTTT +CTTCTGCTATGGCCGCCTGGTGTGCACTGTGAAAGAGGCTGCAGCTCAACAGCAAGAGTCCGCCACCACC +CAGAAGGCCGAGAAAGAGGTGACCAGGATGGTGATCATCATGGTGGTCTTCTTCCTTATCTGTTGGGTCC +CCTACGCCTCTGTCGCTTTCTTCATCTTCAGCAATCAGGGCTCTGAGTTCGGCCCCATCTTCATGACCGT +CCCAGCTTTCTTTGCCAAGAGTTCTTCCATCTACAACCCCGTCATCTACATCATGCTCAACAAGCAGTTC +CGTAACTGCATGATCACCACCCTGTGCTGCGGCAAGAATCCCTTTGGAGAAGACGATGCCTCCTCTGCCG +CCACCTCCAAGACAGAGGCTTCTTCTGTTTCTTCCAGCCAGGTGTCTCCTGCATAAGACCTTCCACCAGG +CCTGTCTCAGGGTCCGCTGCCTCACACAGCTCCCACCGCCCCAACTCCGTCTCCTGCTCGCTAAGGCGGC +GAAGTTCCCCTTCCATTACATAAAACGTATCTGTTCAAGAAAGGCGACGACGAAGGAGAAGAAGAGGAGC +CCCCCCGAACCCCTTCGCTGCTGCTGAAAACGACTTGATTGCTTCTGCAACGCAACGGGGCCTTACGGCA +GCGAAGGGGTTGTCATCCGGACGCGCCAAGAATTCCTTCGAGACTGTAAATATCTTAAAGGAACCGTCCT +GCTAGTTACCGACGCCGCTCCTGTAGCCGCCGTTCCCCCGCACTCCGGCCGGTTCATACCTCTTATTTTT +TTGCAATGCAACAGAAAATAATATTTTTGTTCCCACGGCTTTTCCCGGTCAGGTCTGGTAGTGGCGGAGA +TTGGCCGACCCCTCGCACCTGTAATAAAGCGCAG + +>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds +GTGCCCTTCTCCAACAAGACAGGCGTGGTGCGCAGTCCCTTCGAGCATCCACAGTACTACCTGGCCGAGC +CATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTCGGCTTCCCCATCAACTTCCT +CACGCTCTATGTCACGGTTCAGCACAAGAAGCTGCGTACGCCTCTCAACTACATCCTGCTCAACCTGGCC +GTGGCCGACCTCTTCATGGTCTTCGGAGGCTTCACCACCACCCTCTACACCTCCCTGCATGGATACTTTG +TCTTCGGGCCTACGGGATGCAATCTGGAGGGCTTTTTTGCCACCCTGGGAGGTATGAGCTGAGATGCGGG +TAAGGAGGAGGCATAGAGGCATCTGGGAACAGTCCCAAGCTTGGGGTGAAGGCTAAGAGGCCTTCTTCCT +TGTTCTGTCATTGGCGTCGTCCGAAGCCCTCACTTAATCAACAAACAGTTTGGTGGTGAGGCGCTGAGCT +CCATTTGGAGAGGGCAGGTATCGAGCACTGTTTTATCCCCCCTGGAGTGGTGCCATTGCCTTGCTTTACA +GCAAAGAAACTGAGGATGAGAGGAGTCGAGGGTCTTGCCAGGTCACATCATGGCAGAGACAGAGCTGAGT +TTCAACCCTGCATCTATGTGCAGTTTCCCTTGGAGCAGCTATGTTAGGTCAGACCCACGGTGGGCACTGG +GGAGAGAGCTGCACAAGACAGGTCCCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTCCTGATTGCCA +GGAGTGATGTGCAGCGCAAATGTCTGAATTCCATTATTATGTGCTCCTTCTTCCTCTGAGCCAAACATCC +ATCTTCATGGCTCCTAGAATTGGGTCCCACCCACATGAGCAGGTCATTTTGTTTCCCTAGAGGGGAGAGG +TCACTGCTGTGGAGGGAGGGAAGGTTCGTCCCGCTCCATGTTTCTGTTGTCTCTGCAATGCCTTTCTCTA +GGGACTCTGCCTATTGCCCCAAGAAGGACACATTCTTCTGTAAAAACTCCCTCCTGGGTTCCCAGTCTAA +TCAAGACCTCTAAACTGATTTCCATGTCCCTCATGAACCCAAAGCTCTAACTGAATTAAACTTCTCAGGA +CTTACTCCACTCTCCTCGTCCATCATGCAGCCCCTCTGCCCAGCACCCTATCTCCTCTTCTTCCCAGTGT +CTGAGCCCACTGTACCCTGAGACTTCGCTCCAGGCCTGCCCCAGGCTGCCTTCTCAGGTGCCCTCTCCCA +CATAGGAGGAGCACGGCCTCCTTAGACAGACGTGGGGTGCAGGTTGGTGGCATGCTGACTGATAGCTGAC +TGCCTTGCAGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTATGCA +AGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCCTTGCCCTCACCTGGGTCATGGC +ACTGGCCTGCGCCGCGCCCCCGCTAGTCGGCTGGTCCAGGTAATGGCACTGAACAGAAGGGAAGTGCCTC +TGAGGTCTTCTTAGGGTCCCCCAGCTGGGACTCAAACCTAGGGCTGTCTGGTTCCAGGCACGGAACTGGC +GACTCCACTGGGGTTGGGGTTTAGGGCAAGGAAGGAGAGGATCAGACCCTAATGTTGTTACGTGGGTTGG +TCCGCATGTCAAGGAGAATCCAAGACACCCAATCCTTCACCTTGGCTGTGCCCCTAATCCTCATCTAAGC +CAGGTTCAGATTCCAATCCTCTTTGGCCCAGTGCTCCGTGGGAAGCTCCCTCTGACCTTGGGCCTCAGCG +CCTGGGGTTGCTGAGCCTTCCTAGTATAGGTGGTGACATCGTAGCCCCTGGGACCTGGATCCTGCCCAGT +CTGCAGGCCATCATCTCCAAATGGGGCTGAGATGAGATGTGAGGAAAGAGGGGAGACAGTGGTTTGGAAA +ACTGGACTGGTGGCTTTTTTGGGTTTCCAGAGGACTCATCTTCCTCTGCTTCTAGAATATTCCCACTCTC +TCTTCCCTTTCCTCATTCTTCCTGGGTTATTTTTTTTTCCCTTTGCTGAATTCGAGCCCCATTCCCTCCA +GCCTCTTTCCCTGTCTTATCTAGCCCAGTCCAGTTATATTCTCATAGGCAGAGGCAACAGATGCTCCAAA +TTTTCTGAGGTCGGTTCCAACATCGCCACCCTCTAAAATCAGTGAAACATCCTAACTACATGCCTCATAG +TCCTCCTGTTTCCAAAAACTGCAAAGATCTCCTGGTTACCCTGTATGCCCATCTTTGGGCTAGAAAATCC +TCTCACCCTGTTAATAGTAAGACCCTGGTTTGTACAAACTGCCTCAAACACAGAGTTTAGGGGCTTTTCC +CTTCTCTCCGCCAACCTCTGACAGGCAGAGTCTGAGGCCTGGCCTCCAGCTGCTGCGGGGAGCAGGTCTG +GTAAAGAATCCTGTGCAGGTCAGTGGTATACAGGTCCTGTCAGGTGACAGCCTGGGCGAGAGACTGGAAA +GTATCAGGATAACACGGCTGCCAGACGAACAACAAAACAACACTGAATTCACAAGGCGCATTCGAATCCT +CTCTCAGTCCATTTGATCCTCAGTCACACAGCCGAGTAGACACTTTATCAACTCATTTAACAGAAAGGGA +AAGTGAAGCCCAGAGCGAGGCCAGCAACGTGGCAGGTCACTCTGGTCATCTAGGGCCTGTTCCCAACTCT +TTCACATGTGGGTCTCCAATATGTTCCCTCCTGTCCCAATCTCTGCCGGCCCTCAGGTACATCCCAGAGG +GCATGCAGTGCTCATGTGGAATCGACTACTACACCCTCAAGCCGGAGGTCAACAACGAGTCCTTTGTCAT +CTACATGTTCGTGGTCCACTTCACCATCCCTATGATTGTCATATTCTTTTGCTATGGACAGCTGGTCTTC +ACCGTCAAGGAGGTAAGGTCATGTGTTGGGCACTGGGGACATGCACACTGAGTGAATGGAGCCCAGCTCC +ATTCCCAGAGTTGCCACAGTCTGGACACCTGACCTTGTGTCCCTGCAGGCAGCTGCCCAGCAGCAGGAGT +CAGCCACCACCCAGAAGGCCGAGAAGGAGGTCACCCGTATGGTCATCATCATGGTCATTGCTTTCCTAAT +CTGTTGGCTGCCGTATGCCGGCGTGGCATTCTACATCTTCACCCACCAGGGCTCTAACTTTGGCCCCATC +TTCATGACCCTCCCGGCATTCTTTGCCAAGTCGTCCTCCATCTACAACCCTGTCATCTATATCATGATGA +ACAAGCAGGTGCCAGGTGGTAGGGAGGGAGGGTCTGGGTCCCCCAGGCTGCAGGCACTGCCCACAGAGGA +CAAGCCACATCCTTGACTAGGCAGACCCCAGTCTTCCCATCTGCAAAATTAGGCAGGGGAGTTCGTCTCC +CCCAGGCATCAGAGACATCGGGGAGAAATGCACATTTCTGGAGATGAATCAGCATCTCAGGGTGGGCCCA +GGAACCTGCACTTCTAAAAACCATTCCACATGACTCTGAGGCTAGCATGAGAAGTGATGATCCACATGGT +TCTGGAGGCCTGCTTTAAAAGTCAAGTGGTCAAAGTCCCAAGCCTGGGAACGGGATGGTGCCAGTCTCCA +TTAAAGAGATCAAAAGGAGCTAGAAAGTCTTGTGATGAAAGATGAAGGGATAAAGCCGTCCTTTAACACA +GATCAGTGATTTCTCTGCAGAATCCATGACCCAGTGGGAAAAAGTGGTCCCTGGAGTCAGGCATATTGGA +TTCAAATCCTAGCTCTGCTATTTTCTAGCTATGTAACCTTGGGCAAGTCATCTCCCTTCTCTGTGCTTCA +GTTTCTTCTTTCATAGAAAGGGTAAAATCCCAAACTCTTGGGTTAAATGAGATAACTTACATAGCCCTTG +ATATGCAGAGGCATTATGGAATGTCGTTAGTGACAAAGTTCCCTTGGGTTTGGTCCCTGGTATCTCTGGA +GTGAGATTGCATATGTTCCCTTCAGAGGGTCAGATTTGGGATGAGAGTGGAGGCTGCGAGGGCCTGAGTG +GGAAGGGATTGGAGGCAAATCTCACCAACCATGTCAGTTTGCTACACACACTTTGGGTGGACCCTGACCC +TGACTCATGCTTCTTGCCTTCCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCCAC +TGGGTGACGATGAGGCCTCCACCACTGCCTC + +>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds +GTGCCCTTCTCCAACAAGACGGGTGTGGTGCGCAGCCCCTTCGAGTACCCGCAGTACTACCTGGCTGAGC +CCTGGCAGTTCTCCATGCTGGCTGCCTACATGTTTCTGCTGATCGTGCTCGGATTCCCCATCAACTTCCT +CACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCT +GTGGCCAACCTCTTCATGGTCTTTGGAGGCTTCACCACCACCCTGTATACCTCTATGCATGGATACTTCG +TCTTCGGGGCCACGGGATGCAATCTGGAGGGCTTCTTTGCCACGCTGGGCGGTGAAATCGCCCTGTGGTC +CCTGGTGGTCCTGGCCATCGAGCGGTATGTGGTGGTCTGCAAGCCCATGAGCAACTTCCGCTTTGGGGAG +AACCACGCCATCATGGGCCTCGCCTTCACGTGGGTCATGGCACTGGCCTGCGCTGCACCCCCACTAGCCG +GCTGGTCCAGGTACATCCCAGAGGGCATGCAGTGCTCGTGTGGGATTGACTACTACACGCTCAAACCGGA +GGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTGTCATTTTC +TTCTGCTACGGACAGCTGGTGTTCACAGTGAAGGAGGCGGCTGCCCAGCAGCAGGAGTCAGCCACCACCC +AGAAGGCCGAGAAGGAAGTCACGCGCATGGTCATCATCATGGTCGTTGCGTTCCTAATCTGTTGGCTGCC +CTACGCCAGCGTGGCATTCTACATCTTTACCCACCAGGGCTCTAACTTTGGCCCTGTCTTCATGACCATC +CCGGCATTCTTCGCCAAGTCATCCTCCATCTACAACCCGGTCATCTATATCATGATGAACAAGCAGTTCC +GGAACTGCATGCTCACCACCCTCTGCTGTGGCAAGAACCCACTGGGTGATGACGAAGCATCCACCACTGC +CTC + +>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds +ATGAACGGGACCGAGGGCCCAAACTTCTACGTGCCTTTCTCCAACAAGACGGGCGTCGTACGCAGCCCCT +TCGAGGCGCCGCAGTACTACCTGGCTGAGCCATGGCAGTTCAGCATGCTGGCCGCCTACATGTTCCTGCT +GATCATGCTTGGCTTCCCCATCAACTTCCTCACGCTGTACGTCACAGTCCAGCACAAGAAGCTGAGGACC +CCCCTCAACTACATCCTGCTCAACCTGGCCGTGGCAGATCTCTTCATGGTGTTCGGGGGCTTCACCACCA +CCCTGTATACCTCTCTGCACGGGTACTTCGTGTTCGGTCCGACGGGCTGCAACCTCGAGGGCTTCTTTGC +CACCTTAGGCGGTGAAATTGCACTGTGGTCCTTGGTGGTGCTAGCCATCGAGCGGTACGTAGTGGTGTGC +AAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCGTCGCATTCACCTGGGTCATGG +CTCTGGCCTGTGCGGCCCCCCCCCTCGTCGGCTGGTCTAGATACATCCCGGAGGGGATGCAGTGCTCGTG +CGGGATCGATTACTACACGCCCCACGAGGAGACCAACAATGAGTCGTTCGTCATCTACATGTTCGTTGTA +CACTTCATCATCCCCCTGATTGTCATATTCTTCTGCTACGGGCAGCTGGTCTTCACCGTCAAGGAGGCTG +CAGCCCAGCAGCAGGAGTCGGCCACCACTCAGAAGGCCGAGAAGGAGGTCACGCGTATGGTCATCATCAT +GGTCATCGCTTTCCTCATATGCTGGCTGCCCTACGCAGGTGTGGCGTTCTACATCTTCACCCATCAGGGA +TCCGACTTTGGCCCCATCTTCATGACCATCCCGGCTTTCTTTGCCAAGACGTCTGCCGTCTATAACCCCG +TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGGTCACCACTCTCTGCTGTGGCAAGAACCC +CCTAGGTGACGACGAGGCCTCCACGACCGTGTCCAAGACAGAGACCAGCCAAGTGGCCCCTGCCTAA + +>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds +CCGCTACTGACGAACCGCAACCATGAACGGCACTGAGGGACCTAACTTCTACATCCCCATGTCAAACGCC +ACTGGTGTAGTGAGGAGTCCATTTGAATACCCGCAGTACTACCTTGCAGAACCATGGGCTTTCTCAGCTC +TGTCTGCCTACATGTTCTTCCTGATTATCGCCGGATTCCCCATCAACTTCCTCACCCTGTATGTCACCAT +CGAACATAAGAAACTGAGGACCCCACTGAACTACATTCTGCTGAACCTGGCCGTGGCCGACCTCTTCATG +GTGTTTGGCGGATTCACCACCACGATGTACACCTCCATGCACGGCTACTTTGTCTTCGGCCCCACCGGCT +GCAACATCGAAGGGTTCTTCGCCACCCTCGGCGGCGAGATTGCCCTCTGGTGCCTCGTTGTCCTGGCCAT +TGAAAGGTGGATGGTCGTCTGCAAGCCAGTGACCAATTTCCGCTTCGGTGAGAGCCATGCCATCATGGGT +GTCATGGTGACCTGGACCATGGCATTGGCCTGTGCCCTCCCCCCTCTCTTCGGCTGGTCTCGGTACATTC +CGGAAGGTCTGCAGTGCTCGTGCGGGATCGACTACTATACCCGGGCGCCTGGGATCAACAATGAGTCCTT +TGTGATCTACATGTTTACCTGCCACTTCTCCATCCCACTCGCCGTCATCTCTTTCTGCTACGGCCGACTG +GTGTGCACCGTCAAAGAGGCCGCTGCCCAGCAACAGGAGTCCGAGACCACCCAGAGGGCTGAGCGGGAGG +TCACCCGCATGGTCGTCATCATGGTCATCTCCTTCCTGGTCTGCTGGGTGCCCTATGCCAGTGTGGCCTG +GTACATCTTTACCCACCAGGGAAGCACTTTTGGGCCCATCTTCATGACCATTCCATCCTTCTTTGCCAAG +AGTTCAGCCCTCTACAACCCCATGATCTACATCTGCATGAACAAGCAGTTCCGCCATTGCATGATCACCA +CCCTCTGCTGTGGGAAGAACCCCTTCGAGGAGGAGGATGGAGCGTCCGCCACTAGCTCTAAAACTGAGGC +TTCATCCGTGTCCTCCAGCTCTGTCTCCCCGGCATAAACCTTGTTTGACCGAACACCACGCATCAACACA +AAGACCAAGAATGCTGACTAAATGCTAACATTTCAGGGAAATCCAAAGACTTTTTACTATTTTTTTACAC +AACCATATAGGTTGCAAACAGAGGTTTAGCCCTGTTTACAGGTTGTCATCAATGTGATGTCAGTATGTAC +AATATAGTCAACTTGATAGCAAGTTGTTGGCTTATTTCAGATTGTATGGGCAATGTAATCAACCATATGT +GAAATAAATTGCAA diff -r 000000000000 -r a96608a125fb test-data/three_human_mRNA.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/three_human_mRNA.fasta Thu May 15 12:54:09 2014 -0400 @@ -0,0 +1,183 @@ +>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds. +GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG +CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC +GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC +CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA +TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC +TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT +TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA +CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA +TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA +CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC +TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA +TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT +TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG +GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA +TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA +ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG +TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA +CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC +AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT +CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA +ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC +CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA +TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG +CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC +TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC +CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA +GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT +ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA +GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT +CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT +GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA +TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA +CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA +AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG +GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG +AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT +TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT +TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT +TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG +CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA +CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT +TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC +TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT +GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT +AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT +TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT +GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT +TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA +GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA +TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA +TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT +AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT +TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT +ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT +GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA +GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT +GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT +TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT +TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT +AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG +AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC +TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT +CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA +GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT +TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT +GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT +TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG +GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA +CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA +CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA +GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA +GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT +GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG +GAAACTTTTAGATGACATTCTACAAATTATTTTTTTCTTTAAATTAAAAGAACCTAGCCA +ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA +AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA +ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA +GGTTGTCAAGAAGGCTTTTTTTTTTTTCTTTTTTAAACCTGAGGGCAAAAAGGAATGGAT +AAAGTAGTGTAATGGATTGACAATCAGGAAGAACAGAATAACTCAGTTTTTTTTTCTCCT +ACAAGGAGATATGGCTGGACCAAAATAAAATGACATGAAATTGCAAAAATGAAAAT +>ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds. +GGGGGGCTGCGCGGCCGGGTCGGTGCGCACACGAGAAGGACGCGCGGCCCCCAGCGCTCT +TGGGGGCCGCCTCGGAGCATGACCCCCGCGGGCCAGCGCCGCGCGCCTGATCCGAGGAGA +CCCCGCGCTCCCGCAGCCATGGGCACCGGGGGCCGGCGGGGGGCGGCGGCCGCGCCGCTG +CTGGTGGCGGTGGCCGCGCTGCTACTGGGCGCCGCGGGCCACCTGTACCCCGGAGAGGTG +TGTCCCGGCATGGATATCCGGAACAACCTCACTAGGTTGCATGAGCTGGAGAATTGCTCT +GTCATCGAAGGACACTTGCAGATACTCTTGATGTTCAAAACGAGGCCCGAAGATTTCCGA +GACCTCAGTTTCCCCAAACTCATCATGATCACTGATTACTTGCTGCTCTTCCGGGTCTAT +GGGCTCGAGAGCCTGAAGGACCTGTTCCCCAACCTCACGGTCATCCGGGGATCACGACTG +TTCTTTAACTACGCGCTGGTCATCTTCGAGATGGTTCACCTCAAGGAACTCGGCCTCTAC +AACCTGATGAACATCACCCGGGGTTCTGTCCGCATCGAGAAGAACAATGAGCTCTGTTAC +TTGGCCACTATCGACTGGTCCCGTATCCTGGATTCCGTGGAGGATAATCACATCGTGTTG +AACAAAGATGACAACGAGGAGTGTGGAGACATCTGTCCGGGTACCGCGAAGGGCAAGACC +AACTGCCCCGCCACCGTCATCAACGGGCAGTTTGTCGAACGATGTTGGACTCATAGTCAC +TGCCAGAAAGTTTGCCCGACCATCTGTAAGTCACACGGCTGCACCGCCGAAGGCCTCTGT +TGCCACAGCGAGTGCCTGGGCAACTGTTCTCAGCCCGACGACCCCACCAAGTGCGTGGCC +TGCCGCAACTTCTACCTGGACGGCAGGTGTGTGGAGACCTGCCCGCCCCCGTACTACCAC +TTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAGGACCTGCACCACAAATGCAAG +AACTCGCGGAGGCAGGGCTGCCACCAATACGTCATTCACAACAACAAGTGCATCCCTGAG +TGTCCCTCCGGGTACACGATGAATTCCAGCAACTTGCTGTGCACCCCATGCCTGGGTCCC +TGTCCCAAGGTGTGCCACCTCCTAGAAGGCGAGAAGACCATCGACTCGGTGACGTCTGCC +CAGGAGCTCCGAGGATGCACCGTCATCAACGGGAGTCTGATCATCAACATTCGAGGAGGC +AACAATCTGGCAGCTGAGCTAGAAGCCAACCTCGGCCTCATTGAAGAAATTTCAGGGTAT +CTAAAAATCCGCCGATCCTACGCTCTGGTGTCACTTTCCTTCTTCCGGAAGTTACGTCTG +ATTCGAGGAGAGACCTTGGAAATTGGGAACTACTCCTTCTATGCCTTGGACAACCAGAAC +CTAAGGCAGCTCTGGGACTGGAGCAAACACAACCTCACCACCACTCAGGGGAAACTCTTC +TTCCACTATAACCCCAAACTCTGCTTGTCAGAAATCCACAAGATGGAAGAAGTTTCAGGA +ACCAAGGGGCGCCAGGAGAGAAACGACATTGCCCTGAAGACCAATGGGGACAAGGCATCC +TGTGAAAATGAGTTACTTAAATTTTCTTACATTCGGACATCTTTTGACAAGATCTTGCTG +AGATGGGAGCCGTACTGGCCCCCCGACTTCCGAGACCTCTTGGGGTTCATGCTGTTCTAC +AAAGAGGCCCCTTATCAGAATGTGACGGAGTTCGATGGGCAGGATGCGTGTGGTTCCAAC +AGTTGGACGGTGGTAGACATTGACCCACCCCTGAGGTCCAACGACCCCAAATCACAGAAC +CACCCAGGGTGGCTGATGCGGGGTCTCAAGCCCTGGACCCAGTATGCCATCTTTGTGAAG +ACCCTGGTCACCTTTTCGGATGAACGCCGGACCTATGGGGCCAAGAGTGACATCATTTAT +GTCCAGACAGATGCCACCAACCCCTCTGTGCCCCTGGATCCAATCTCAGTGTCTAACTCA +TCATCCCAGATTATTCTGAAGTGGAAACCACCCTCCGACCCCAATGGCAACATCACCCAC +TACCTGGTTTTCTGGGAGAGGCAGGCGGAAGACAGTGAGCTGTTCGAGCTGGATTATTGC +CTCAAAGGGCTGAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT +CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG +ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT +TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC +CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG +CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG +CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC +TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC +AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT +GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG +AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG +GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT +GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC +TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT +GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT +ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT +TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC +GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC +GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG +GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG +GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC +AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC +CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA +GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG +TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA +ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC +AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC +ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA +CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT +CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA +TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG +CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT +GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC +TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC +TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG +ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT +CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT +ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC +TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA +GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT +TTTTTCGTTCCCCCCACCCGCCCCCAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT +CTTTTTTTTTTTTTTTTTTTTTTTTTTTTGCTGGTGTCTGAGCTTCAGTATAAAAGACAA +AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA +>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds. +CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC +CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG +CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG +GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC +TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT +CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC +ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT +TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG +GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC +CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA +CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC +TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC +TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG +GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC +CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG +GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA +GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG +CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT +GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG +CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA +GCCATCCCACCAG diff -r 000000000000 -r a96608a125fb tools/blast_rbh/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blast_rbh/README.rst Thu May 15 12:54:09 2014 -0400 @@ -0,0 +1,100 @@ +Galaxy tool to find BLAST Reciprocal Best Hits (RBH) +==================================================== + +This tool is copyright 2011-2014 by Peter Cock, The James Hutton Institute +(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. +See the licence text below. + +This tool is a short Python script to run reciprocal BLAST searches on a +pair of sequence files, and extract the reciprocal best hits. + +This is a work in progress, and builds on an earlier implementation which +prequired the two BLAST searches be prepared in advance. Integration allows +a much simpler user experience, and can ensure sensible filters are used. + + +Automated Installation +====================== + +Installation via the Galaxy Tool Shed should take care of the Galaxy side of +things, including the dependency the NCBI BLAST+ binaries. + + +Manual Installation +=================== + +There are just two files to install: + +- ``blast_rbh.py`` (the Python script) +- ``blast_rbh.xml`` (the Galaxy tool definition) + +The suggested location is in a ``tools/blast_rbh/`` folder. You will then +need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool +by adding the line:: + + + +If you want to run the functional tests, include the same line in your +``tool_conf.xml.sample`` file, and the sample test files under Galaxy's +``test-data/`` directory. Then:: + + ./run_functional_tests.sh -id blast_reciprocal_best_hits + +You will need to have the NCBI BLAST+ binaries installed and on the ``$PATH``. + + +History +======= + +======= ====================================================================== +Version Changes +------- ---------------------------------------------------------------------- +v0.1.0 - Initial public release, targetting NCBI BLAST+ 2.2.29 +======= ====================================================================== + + +Developers +========== + +This tool is developed on the following GitHub repository: +https://github.com/peterjc/galaxy_blast/tree/master/tools/blast_rbh + +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use +the following command from the Galaxy root folder:: + + $ tar -czf blast_rbh.tar.gz tools/blast_rbh/README.rst tools/blast_rbh/blast_rbh.xml tools/blast_rbh/blast_rbh.py tools/blast_rbh/repository_dependencies.xml test-data/rhodopsin_nucs.fasta test-data/three_human_mRNA.fasta test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular test-data/rbh_none.tabular + +Check this worked:: + + $ tar -tzf blast_rbh.tar.gz + tools/blast_rbh/README.rst + tools/blast_rbh/blast_rbh.xml + tools/blast_rbh/blast_rbh.py + tools/blast_rbh/repository_dependencies.xml + test-data/rhodopsin_nucs.fasta + test-data/three_human_mRNA.fasta + test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular + test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular + test-data/rbh_none.tabular + + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff -r 000000000000 -r a96608a125fb tools/blast_rbh/blast_rbh.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blast_rbh/blast_rbh.py Thu May 15 12:54:09 2014 -0400 @@ -0,0 +1,155 @@ +#!/usr/bin/env python +"""BLAST Reciprocal Best Hit (RBH) from two FASTA input files. + +Takes the following command line options, +1. FASTA filename of species A +2. FASTA filename of species B +3. Sequence type (prot/nucl) +4. BLAST type (e.g. blastn, or blastp) consistent with sequence type +5. Minimum BLAST Percentage identity +6. Minimum BLAST query coverage +7. Output filename +""" + +import os +import sys +import tempfile +import shutil + +def stop_err( msg ): + sys.stderr.write("%s\n" % msg) + sys.exit(1) + +def run(cmd): + return_code = os.system(cmd) + if return_code: + stop_err("Error %i from: %s" % (return_code, cmd)) + +if "--version" in sys.argv[1:]: + #TODO - Capture version of BLAST+ binaries too? + print "BLAST RBH v0.1.0" + sys.exit(0) + +#Parse Command Line +#TODO - optparse +try: + fasta_a, fasta_b, dbtype, blast_type, min_identity, min_coverage, out_file = sys.argv[1:] +except: + stop_err("Expect 7 arguments, got %i" % (len(sys.argv) - 1)) + +if not os.path.isfile(fasta_a): + stop_err("Missing input file for species A: %r" % fasta_a) +if not os.path.isfile(fasta_b): + stop_err("Missing input file for species B: %r" % fasta_b) +if os.path.abspath(fasta_a) == os.path.abspath(fasta_b): + #TODO - is this ever useful, e.g. positive control? + stop_err("Asked to compare the FASTA file to itself!") + +try: + min_identity = float(min_identity) +except ValueError: + stop_err("Expected number between 0 and 100 for minimum identity, not %r" % min_identity) +if not (0 <= min_identity <= 100): + stop_err("Expected minimum identity between 0 and 100, not %0.2f" % min_identity) +try: + min_coverage = float(min_coverage) +except ValueError: + stop_err("Expected number between 0 and 100 for minimum coverage, not %r" % min_coverage) +if not (0 <= min_coverage <= 100): + stop_err("Expected minimum coverage between 0 and 100, not %0.2f" % min_coverage) + + +if dbtype == "nucl": + if blast_type not in ["megablast", "blastn", "blastn-short", "dc-megablast"]: + stop_err("Invalid BLAST type for BLASTN: %r" % blast_type) + blast_exe = "blastn" +elif dbtype == "prot": + if blast_type not in ["blastp", "blastp-short"]: + stop_err("Invalid BLAST type for BLASTP: %r" % blast_type) + blast_exe = "blastp" +else: + stop_err("Expected 'nucl' or 'prot' for BLAST database type, not %r" % blast_type) +makeblastdb_exe = "makeblastdb" + +def run(cmd): + return_code = os.system(cmd) + if return_code: + stop_err("Error %i from: %s" % (return_code, cmd)) + +def makeblastdb(fasta_file, dbtype, output_stem): + cmd = "%s -dbtype %s -in %s -out %s" % (makeblastdb_exe, dbtype, fasta_file, output_stem) + return run(cmd) + + +base_path = tempfile.mkdtemp() +db_a = os.path.join(base_path, "SpeciesA") +db_b = os.path.join(base_path, "SpeciesB") +a_vs_b = os.path.join(base_path, "A_vs_B.tabular") +b_vs_a = os.path.join(base_path, "B_vs_A.tabular") +log = os.path.join(base_path, "blast.log") + +cols = "qseqid sseqid bitscore pident qcovhsp" #Or qcovs? +c_query = 0 +c_match = 1 +c_score = 2 +c_identity = 3 +c_coverage = 4 + +print("Starting...") +#TODO - Report log in case of error? +run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_a, db_a, log)) +run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_b, db_b, log)) +print("BLAST databases prepared.") +run('%s -task %s -query "%s" -db "%s" -out "%s" -outfmt "6 %s"' % (blast_exe, blast_type, fasta_a, db_b, a_vs_b, cols)) +print("BLAST species A vs species B done.") +run('%s -task %s -query "%s" -db "%s" -out "%s" -outfmt "6 %s"' % (blast_exe, blast_type, fasta_b, db_a, b_vs_a, cols)) +print("BLAST species B vs species A done.") + +#TODO - Include identity and coverage filters... + +best_a_vs_b = dict() +for line in open(a_vs_b): + if line.startswith("#"): continue + parts = line.rstrip("\n").split("\t") + a = parts[c_query] + b = parts[c_match] + if float(parts[c_identity]) < min_identity or float(parts[c_coverage]) < min_coverage: + continue + score = float(parts[c_score]) + if a not in best_a_vs_b or score > best_a_vs_b[a][1]: + best_a_vs_b[a] = (b, score, parts[c_score]) +b_short_list = set(b for (b,score, score_str) in best_a_vs_b.values()) + +best_b_vs_a = dict() +for line in open(b_vs_a): + if line.startswith("#"): continue + parts = line.rstrip("\n").split("\t") + b = parts[c_query] + a = parts[c_match] + if a not in best_a_vs_b: + continue + #stop_err("The A-vs-B file does not have A-ID %r found in B-vs-A file" % a) + if b not in b_short_list: continue + if float(parts[c_identity])< min_identity or float(parts[c_coverage]) < min_coverage: + continue + score = float(parts[c_score]) + if b not in best_b_vs_a or score > best_b_vs_a[b][1]: + best_b_vs_a[b] = (a, score, parts[c_score]) +#TODO - Preserve order from A vs B? +a_short_list = sorted(set(a for (a,score,score_str) in best_b_vs_a.values())) + +count = 0 +outfile = open(out_file, 'w') +outfile.write("#A_id\tB_id\tA_vs_B\tB_vs_A\n") +for a in a_short_list: + b = best_a_vs_b[a][0] + if b in best_b_vs_a and a == best_b_vs_a[b][0]: + outfile.write("%s\t%s\t%s\t%s\n" % (a, b, best_a_vs_b[a][2], best_b_vs_a[b][2])) + count += 1 +outfile.close() +print "Done, %i RBH found" % count + + +#Remove temp files... +shutil.rmtree(base_path) + diff -r 000000000000 -r a96608a125fb tools/blast_rbh/blast_rbh.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blast_rbh/blast_rbh.xml Thu May 15 12:54:09 2014 -0400 @@ -0,0 +1,160 @@ + + from two FASTA files + + makeblastdb + blastp + blastn + blast+ + + +blast_rbh.py --version + + +blast_rbh.py "$fasta_a" "$fasta_b" $seq.dbtype +#if $seq.dbtype=="nucl" + $seq.nucl_type +#else + $seq.prot_type +#end if +$identity $q_cover "$output" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Takes two FASTA files (species *A* and species *B*), builds a BLAST database +for each, runs reciprocal BLAST searchs (*A vs B*, and *B vs A*), optionally +filters these, and then compiles a list of the reciprocal best hits (RBH). + +The output from this tool is a tabular file containing four columns, with +the order taken from input file A: + +====== ====================== +Column Description +------ ---------------------- + 1 ID from species *A* + 2 ID from species *B* + 3 Bitscore from *A vs B* + 4 Bitscore from *B vs A* +====== ====================== + +.. class:: warningmark + +**Note** + +If you are trying to use BLAST RBH matches to identify candidate orthologues +or transfer annotation, you *must* use a percentage identity and minimum +coverage threshold or similiar. See: + +Punta and Ofran (2008) The Rough Guide to In Silico Function Prediction, +or How To Use Sequence and Structure Information To Predict Protein +Function. PLoS Comput Biol 4(10): e1000160. +http://dx.doi.org/10.1371/journal.pcbi.1000160 + +The defaults are to require 70% sequence identity over the aligned region +(using ``pident`` in the BLAST+ tabular output), and that the HSP alignment +covers at least 50% of the query sequence (using ``qcovhsp`` in the BLAST+ +tabular output). + + +**References** + +A specific paper covering this tool is planned, but please also cite: + +Christiam Camacho et al. (2009). +BLAST+: architecture and applications. +BMC Bioinformatics. 15;10:421. +http://dx.doi.org/10.1186/1471-2105-10-421 + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blast_rbh + + diff -r 000000000000 -r a96608a125fb tools/blast_rbh/repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blast_rbh/repository_dependencies.xml Thu May 15 12:54:09 2014 -0400 @@ -0,0 +1,4 @@ + + + +