Mercurial > repos > devteam > fasta_compute_length
changeset 2:d75972d4bd2a draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit 6e148b31fed1b322ce720804d8525088ec6d43f9
author | devteam |
---|---|
date | Thu, 29 Oct 2015 22:14:42 -0400 |
parents | 2811169ce62b |
children | 19caae8fd9d4 |
files | fasta_compute_length.py fasta_compute_length.xml test-data/454.fasta test-data/extract_genomic_dna_out1.fasta test-data/fasta_tool_compute_length_1.out test-data/fasta_tool_compute_length_2.out test-data/fasta_tool_compute_length_3.out |
diffstat | 7 files changed, 674 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/fasta_compute_length.py Tue Oct 13 12:19:36 2015 -0400 +++ b/fasta_compute_length.py Thu Oct 29 22:14:42 2015 -0400 @@ -6,4 +6,4 @@ import sys from utils.fasta_to_len import compute_fasta_length -compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], False ) \ No newline at end of file +compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4] == 'id_only' )
--- a/fasta_compute_length.xml Tue Oct 13 12:19:36 2015 -0400 +++ b/fasta_compute_length.xml Thu Oct 29 22:14:42 2015 -0400 @@ -1,51 +1,78 @@ -<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.0"> - <description></description> - <command interpreter="python">fasta_compute_length.py $input $output $keep_first</command> - <inputs> - <param name="input" type="data" format="fasta" label="Compute length for these sequences"/> - <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/> - </inputs> - <outputs> - <data name="output" format="tabular"/> - </outputs> - <tests> - <test> - <param name="input" value="454.fasta" /> - <param name="keep_first" value="0"/> - <output name="output" file="fasta_tool_compute_length_1.out" /> - </test> - - <test> - <param name="input" value="extract_genomic_dna_out1.fasta" /> - <param name="keep_first" value="0"/> - <output name="output" file="fasta_tool_compute_length_2.out" /> - </test> - - <test> - <param name="input" value="454.fasta" /> - <param name="keep_first" value="14"/> - <output name="output" file="fasta_tool_compute_length_3.out" /> - </test> - </tests> - <help> +<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.1"> + <description></description> + <command interpreter="python">fasta_compute_length.py $input $output $keep_first $keep_first_word</command> + <inputs> + <param name="input" type="data" format="fasta" label="Compute length for these sequences"/> + <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/> + <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc" + selected="false" label="Strip fasta description from header?" + help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/> + + </inputs> + <outputs> + <data name="output" format="tabular"/> + </outputs> + <tests> + <test> + <param name="input" value="454.fasta" /> + <param name="keep_first" value="0"/> + <param name="keep_first_word" value="id_and_desc" /> + <output name="output" file="fasta_tool_compute_length_1.out" /> + </test> + + <test> + <param name="input" value="extract_genomic_dna_out1.fasta" /> + <param name="keep_first" value="0"/> + <param name="keep_first_word" value="id_and_desc" /> + <output name="output" file="fasta_tool_compute_length_2.out" /> + </test> + + <test> + <param name="input" value="454.fasta" /> + <param name="keep_first" value="14"/> + <param name="keep_first_word" value="id_and_desc" /> + <output name="output" file="fasta_tool_compute_length_3.out" /> + </test> + </tests> + <help> **What it does** -This tool counts the length of each fasta sequence in the file. The output file has two columns per line (separated by tab): fasta titles and lengths of the sequences. The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry. +This tool counts the length of each fasta sequence in the file. The output file has two columns per line (separated by tab): fasta titles and lengths of the sequences. The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry. ------ +----- **Example** Suppose you have the following FASTA formatted sequences from a Roche (454) FLX sequencing run:: - >EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG >EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAAfa + >EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ + TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG + TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG + >EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ + AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAAfa Running this tool while setting **How many characters to keep?** to **14** will produce this:: - - EYKX4VC02EQLO5 108 - EYKX4VC02D4GS2 60 + + EYKX4VC02EQLO5 108 + EYKX4VC02D4GS2 60 + +However, if your IDs are not all the same length, you may wish to just keep the fasta ID, and not the description:: + + >EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ + TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG + TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG + >EYKX4VC length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ + AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAAfa + +Running this tool with **Strip fasta description from header** set to **True** and **How many characters to keep?** set to **0** will produce:: + + EYKX4VC02EQLO5 108 + EYKX4VC 60 - </help> + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btq281</citation> + </citations> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/454.fasta Thu Oct 29 22:14:42 2015 -0400 @@ -0,0 +1,52 @@ +>EYKX4VC01B65GS length=54 xy=0784_1754 region=1 run=R_2007_11_07_16_15_57_ +CCGGTATCCGGGTGCCGTGATGAGCGCCACCGGAACGAATTCGACTATGCCGAA +>EYKX4VC01BNCSP length=187 xy=0558_3831 region=1 run=R_2007_11_07_16_15_57_ +CTTACCGGTCACCACCGTGCCTTCAGGATTGATCGCCAGATCGGTCGGTGCGTCAGGCGG +GGTGACATCGCCCACCACGGTACTCACTGGCTGGCTCTGGTTCCCGGCGGCATCGGAGGC +CACCACGTTGAGGGTATTCCCCTCGGTTTGTGGCTCGGTGAGAACCACGTTGTAGTCGCC +ATTGGTC +>EYKX4VC01CD9FT length=115 xy=0865_1719 region=1 run=R_2007_11_07_16_15_57_ +GGGGGCTTTGGCCTGTCGTCCGGCACCTCGCAAGAGCTACAGCAGGCGCGGCTGGCGATC +ATCGGCGGCACGCCGGCCTATATGTCGCCGGAACACACCACCCGCACCCAACGCG +>EYKX4VC01B8FW0 length=95 xy=0799_0514 region=1 run=R_2007_11_07_16_15_57_ +TAAATTTCAAGGAATGCAAATCAGGGTCGTGTGTTTAGACTTCGGCTTTAGAGACCTGAA +TACGTCAAAAACATAACTTCATGATATCTTGCAGT +>EYKX4VC01BCGYW length=115 xy=0434_3926 region=1 run=R_2007_11_07_16_15_57_ +GGCCAGCCGGGACAGCGTTGTTGGGCTGCATGGCGACGAGCTAAAAGTCGCCATCACCGC +CCCGCCGGTTGATGGGCAGGCTAATGCCCATCTGGTAAAAACTTTCTCGCCAAAC +>EYKX4VC01AZXC6 length=116 xy=0292_0280 region=1 run=R_2007_11_07_16_15_57_ +GGGGGCGTTTGGCCTGTCGTCCGGCACCTCGCAAGAGCTACAGCAGGCGCGGCTGGCGAT +CATCGGCGGCACGCCGGCCTATATGTCGCCGGAACACACCACCCGCACCCAACGCG +>EYKX4VC01CATH5 length=82 xy=0826_0843 region=1 run=R_2007_11_07_16_15_57_ +CGAAATTGCACATTCTCGGCCATATCTCTGGACCTACATGACCGATTTGATCATCTTCGA +ACTTAGCCTTCCTTTNTTAACG +>EYKX4VC01BCEIV length=47 xy=0434_0757 region=1 run=R_2007_11_07_16_15_57_ +TGACGTCGTGCCGAGCTACGACAATGCCGACATGGTGATCGTTAACA +>EYKX4VC01BWERM length=83 xy=0662_0304 region=1 run=R_2007_11_07_16_15_57_ +CGGTCGGCCTCACCATGGAGAAGATCCCGCCCCGGCCGAGGTCATGGTGGATCTCGGCCA +GGGCGTGCTGATGAAGTTCAAAT +>EYKX4VC01BT2O7 length=69 xy=0635_1945 region=1 run=R_2007_11_07_16_15_57_ +AGCGTTTCTCCAGCCGGTCGGCTACGCCGTTTGCCCCTGAAAGACGCTGTTCAGACCGAA +CGCGGTAAA +>EYKX4VC01BO0UO length=222 xy=0577_3838 region=1 run=R_2007_11_07_16_15_57_ +AGACCTGGGACAGCGGCGGGCTGCTGAAGCCGCAGGCGATAGAGGACAAACTGCAGTACC +GCTTCTGGCTGCACTATGCCGAAGGCTCGCTGATGCCGCTGCTGTTAATGAAGCTGGTGT +TCGCCAGCCTGGGTAAACCCCCTGTGCCCTTTGGCGTCCGCTCGCTGGGCGCCCTGCTGG +GCAAGGGCATTCAGAAAGCGTGGCTGGATCCCCAGCTGGCCA +>EYKX4VC01CBCPK length=83 xy=0832_1158 region=1 run=R_2007_11_07_16_15_57_ +CGGTCGGCCTCACCATGGAGAAGATCCCGCCCCGGCCGAGGTCATGGTGGATCTCGGCCA +GGGCGTGCTGATGAAGTTCAAAT +>EYKX4VC01B474S length=54 xy=0762_2010 region=1 run=R_2007_11_07_16_15_57_ +AGCAGTTTTCCAGCGCTTTCGAAGAGCGCTGGCGCGCGCGGGCTTCCAGCATAT +>EYKX4VC01BB4QL length=57 xy=0431_0363 region=1 run=R_2007_11_07_16_15_57_ +GGGGAGGAGCTAATAATATGCTCTTGGGGAGGAGCTAATTATATGCTCTTGGGGAGG +>EYKX4VC01BJ37M length=64 xy=0522_0192 region=1 run=R_2007_11_07_16_15_57_ +TCGAGTATGTATCAAGGACTACATACAAATTTGCCAAAAGAGATTATGCACTATCCCGAC +TTCC +>EYKX4VC01BV9R8 length=54 xy=0660_2038 region=1 run=R_2007_11_07_16_15_57_ +AAAACTCGGAGAAACTATTCAGCAGCACTGCGTTTCGCTGAATTTTAGACCGTT +>EYKX4VC01CEPP8 length=60 xy=0870_2350 region=1 run=R_2007_11_07_16_15_57_ +CTGGGTGGGTGCACTACAGGAACGTCATTTGTTCAATCCTCACGTTGTTGTTAGTGTCAG +>EYKX4VC01BTLME length=78 xy=0630_0292 region=1 run=R_2007_11_07_16_15_57_ +TTATCCACACGCTGTCCGGATCCAGCGCCAGGCGCCGACGCTGGACTTCCGCCGCCTGCG +CCCAGTTGCCCTGACTTC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/extract_genomic_dna_out1.fasta Thu Oct 29 22:14:42 2015 -0400 @@ -0,0 +1,456 @@ +>hg17_chr1_147962192_147962580_- +ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG +GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT +GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT +CCCTGTTCGTGAGGTCTGTCCAGTGACCCATCGTCCAGCCCTATACCGGG +ACCCTGTTACAGACATACCCTATGCCACTGCTCGAGCCTTCAAGATCATT +CGTGAGGCTTACAAGAAGTACATTACTGCCCATGGACTGCCGCCCACTGC +CTCAGCCCTGGGCCCCGGCCCGCCACCTCCTGAGCCCCTCCCTGGCTCTG +GGCCCCGAGCCTTGCGCCAGAAAATTGTCATTAAATGA +>hg17_chr1_147984545_147984630_+ +ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTT +TGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG +>hg17_chr1_148078400_148078582_- +GTTCTCAGCTTCCTTGCTTCCATGGCTCCAGCACCATTCGAAACCTCAAA +GAGAGGTTCCACATGAGCATGACTGAGGAGCAGCTGCAGCTGCTGGTGGA +GCAGATGGTGGATGGCAGTATGCGGTCTATCACCACCAAACTCTATGACG +GCTTCCAGTACCTCACCAACGGCATCATGTGA +>hg17_chr1_148185136_148185276_+ +ATGGAAGCGTTTTTGGGGTCGCGGTCCGGACTTTGGGCGGGGGGTCCGGC +CCCAGGACAGTTTTACCGCATTCCGTCCACTCCCGATTCCTTCATGGATC +CGGCGTCTGCACTTTACAGAGGTCCAATCACGCGGACCCA +>hg17_chr10_55251623_55253124_- +TCTTTTCCTTCTCTACCATTTTCAACAAAGCAGGGGAAATAACTCAGTCT +CAGAAGACAGGAAACATCAACAAGTTGTGATGCCCTTTTCTTCCAATACT +ATTGAGGCTCACAAGTCAGCTCATGTAGACGGATCACTTAAGAGCAACAA +ACTGAAGTCTGCAAGAAAATTCACATTTCTATCTGATGAGGATGACTTAA +GTGCCCATAATCCCCTTTATAAGGAAAACATAAGTCAAGTATCAACAAAT +TCAGACATTTCACAGAGAACAGATTTTGTAGACCCATTTTCACCCAAAAT +ACAAGCCAAGAGTAAGTCTCTGAGGGGCCCAAGAGAAAAGATTCAGAGGC +TGTGGAGTCAGTCAGTCAGCTTACCCAGGAGGCTGATGAGGAAAGTTCCA +AATAGACCAGAGATCATAGATCTGCAGCAGTGGCAAGGCACCAGGCAGAA +AGCTGAAAATGAAAACACTGGAATCTGTACAAACAAAAGAGGTAGCAGCA +ATCCATTGCTTACAACTGAAGAGGCAAATTTGACAGAGAAAGAGGAAATA +AGGCAAGGTGAAACACTGATGATAGAAGGAACAGAACAGTTGAAATCTCT +CTCTTCAGACTCTTCATTTTGCTTTCCCAGGCCTCACTTCTCATTCTCCA +CTTTGCCAACTGTTTCAAGAACTGTGGAACTCAAATCAGAACCTAATGTC +ATCAGTTCTCCTGCTGAGTGTTCCTTGGAACTTTCTCCTTCAAGGCCTTG +TGTTTTACATTCTTCACTCTCTAGGAGAGAGACACCTATTTGTATGTTAC +CTATTGAAACCGAAAGAAATATTTTTGAAAATTTTGCCCATCCACCAAAC +ATCTCTCCTTCTGCCTGtccccttccccctcctcctcctatttctcctcc +ttctcctcctcctgctcctgctcctcttgctcctcctcctgacatttctc +ctttttctcttttttgtcctcctccctctcctccttctatccctcttcct +cttcctcctcctACATTTTTTCCACTTTCCGTTTCAACGTCTGGTCCCCC +AACAccacctcttctacctccatttccaactcctcttcctccaccacctc +cttctattccttgccctccacctccttcAGCTTCATTTCTGTCCACAGAG +TGTGTCTGTATAACAGGTGTTAAATGCACGACCAACTTGATGCCTGCCGA +GAAAATTAAGTCCTCTATGACACAGCTATCAACAACGACAGTGTGTAAAA +CAGACCCTCAGAGAGAACCAAAAGGCATCCTCAGACACGTTAAAAACTTA +GCAGAACTTGAAAAATCAGTAGCTAACATGTACAGTCAAATAGAAAAAAA +CTATCTACGCACAAATGTTTCAGAACTTCAAACTATGTGCCCTTCAGAAG +TAACAAATATGGAAATCACATCTGAACAAAACAAGGGGAGTTTGAACAAT +ATTGTCGAGGGAACTGAAAAACAATCTCACAGTCAATCTACTTCACTGTA +A +>hg17_chr11_116124407_116124501_- +ATCCAATGGATTTGAACAGAAGCGCTTTGCCAGGCTTGCCAGCAAGAAGG +CAGTGGAGGAACTTGCCTACAAATGGAGTGTTGAGGATATGTAA +>hg17_chr11_116206508_116206563_+ +ATGCAGCCCCGGGTACTCCTTGTTGTTGCCCTCCTGGCGCTCCTGGCCTC +TGCCC +>hg17_chr11_116211733_116212337_- +CCTAAAGCTCCTTGACAACTGGGACAGCGTGACCTCCACCTTCAGCAAGC +TGCGCGAACAGCTCGGCCCTGTGACCCAGGAGTTCTGGGATAACCTGGAA +AAGGAGACAGAGGGCCTGAGGCAGGAGATGAGCAAGGATCTGGAGGAGGT +GAAGGCCAAGGTGCAGCCCTACCTGGACGACTTCCAGAAGAAGTGGCAGG +AGGAGATGGAGCTCTACCGCCAGAAGGTGGAGCCGCTGCGCGCAGAGCTC +CAAGAGGGCGCGCGCCAGAAGCTGCACGAGCTGCAAGAGAAGCTGAGCCC +ACTGGGCGAGGAGATGCGCGACCGCGCGCGCGCCCATGTGGACGCGCTGC +GCACGCATCTGGCCCCCTACAGCGACGAGCTGCGCCAGCGCTTGGCCGCG +CGCCTTGAGGCTCTCAAGGAGAACGGCGGCGCCAGACTGGCCGAGTACCA +CGCCAAGGCCACCGAGCATCTGAGCACGCTCAGCGAGAAGGCCAAGCCCG +CGCTCGAGGACCTCCGCCAAGGCCTGCTGCCCGTGCTGGAGAGCTTCAAG +GTCAGCTTCCTGAGCGCTCTCGAGGAGTACACTAAGAAGCTCAACACCCA +GTGA +>hg17_chr11_1812377_1812407_+ +ATGCTCCACCTGCATGGCTGGCAAACCATG +>hg17_chr12_38440094_38440321_- +GAGCTTTCTTCCTCTATGCTGGATTTGCTGCTGTGGGACTCCTTTTCATC +TATGGCTGTCTTCCTGAGACCAAAGGCAAAAAATTAGAGGAAATTGAATC +ACTCTTTGACAACAGGCTATGTACATGTGGCACTTCAGATTCTGATGAAG +GGAGATATATTGAATATATTCGGGTAAAGGGAAGTAACTATCATCTTTCT +GACAATGATGCTTCTGATGTGGAATAA +>hg17_chr13_112381694_112381953_+ +ATGAACTCACCAGAGGCGAGGCTCTGCGTTGCTCAATGCAGAGACTCTTA +CCCAGGGTGTCAGCCTCTGAAAGATACACGTGCCTGGGCCTCTTCCCTGA +AGATGGACCCGGCAGGTCTGGAGGGAGGCCCCCGTGATGAATCCCGTGAT +GAGCCGCCGATCCGAGCTCAGGCTGCGTCATGGGACCAGCCACAAGGTTG +CCTGACCTATAAAGGTCGCAGGAGTGCCTCAGGGACACAGAAGCAGTTAC +AGCTGCCAG +>hg17_chr14_98710240_98712285_- +GTAAAGATGAGCCTTCCAGCTACATTTGCACAACATGCAAGCAGCCCTTC +AACAGCGCGTGGTTCCTGCTGCAGCACGCGCAGAACACGCACGGCTTCCG +CATCTACCTGGAGCCCGGGCCGGCCAGCAGCTCGCTCACGCCGCGGCTCA +CCATCCCGCCGCCGCTCGGGCCGGAGGCCGTGGCGCAGTCCCCGCTCATG +AATTTCCTGGGCGACAGCAACCCCTTCAACCTGCTGCGCATGACGGGCCC +CATCCTGCGGGACCACCCGGGCTTCGGCGAGGGCCGCCTGCCGGGCACGC +CGCCTCTCTTCAGTCCCCCGCCGCGCCACCACCTGGACCCGCACCGCCTC +AGTGCCGAGGAGATGGGGCTCGTCGCCCAGCACCCCAGTGCCTTCGACCG +AGTCATGCGCCTGAACCCCATGGCCATCGACTCGCCCGCCATGGACTTCT +CGCGGCGGCTCCGCGAGCTGGCGGGCAACAGCTCCACGCCGCCGCCCGTG +TCCCCGGGCCGCGGCAACCCTATGCACCGGCTCCTGAACCCCTTCCAGCC +CAGCCCCAAGTCCCCGTTCCTGAGCACGCCGCCGCTGCCGCCCATGCCCC +CTGGCGGCACGCCGCCCCCGCAGCCGCCAGCCAAGAGCAAGTCGTGCGAG +TTCTGCGGCAAGACCTTCAAGTTCCAGAGCAATCTCATCGTGCACCGGCG +CAGTCACACGGGCGAGAAGCCCTACAAGTGCCAGCTGTGCGACCACGCGT +GCTCGCAGGCCAGCAAGCTCAAGCGCCACATGAAGACGCACATGCACAAG +GCCGGCTCGCTGGCCGGCCGCTCCGACGACGGGCTCTCGGCCGCCAGCTC +CCCCGAGCCCGGCACCAGCGAGCTGGCGGGCGAGGGCCTCAAGGCGGCCG +ACGGTGACTTCCGCCACCACGAGAGCGACCCGTCGCTGGGCCACGAGCCg +gaggaggaggacgaggaggaggaggaggaggaggaggagCTGCTACTGGA +GAACGAGAGCCGGCCCGAGTCGAGCTTCAGCATGGACTCGGAGCTGAGCC +GCAACCGCGAGAACGGCGGTGGTGGGGTgcccggggtcccgggcgcgggg +ggcggcgcggccAAGGCGCTGGCTGACGAGAAGGCGCTGGTGCTGGGCAA +GGTCATGGAGAACGTGGGCCTAGGCGCACTGCCGCAGTACGGCGAGCTCC +TGGCCGACAAGCAGAAGCGCGGCGCCTTCCTGAAGCGTGCggcgggcggc +ggggacgcgggcgacgacgacgacgcgggcggctgcggggacgcgggcgc +gggcggcgcggtcaacgggcgcgggggcggCTTCGCGCCAGGCACCGAGC +CCTTCCCCGGGCTCTTCCCGCGCAAGCCCGCGCCGCTGCCCAGCCCCGGG +CTCAACAGCGCCGCCAAGCGCATCAAGGTGGAGAAGGACCTGGAGCTGCC +GCCCGCCGCGCTCATCCCGTCCGAGAACGTGTACTCGCAGTGGCTGGTGG +GCTACGCGGCGTCGCGGCACTTCATGAAGGACCCCTTCCTGGGCTTCACG +GACGCACGACAGTCGCCCTTCGCCACGTCGTCCGAGCACTCGTCCGAGAA +CGGCAGCCTGCGCTTCTCCACGCCGCCCGGGGACCTGCTGGACGGCGGCC +TCTCGGGCCGCAGCGGCACGGCCAGCGGAGGCAGCACCCCGCACCTgggc +ggcccgggccccgggcggcccAGCTCCAAGGAGGGCCGCCGCAGCGACAC +GTGCGAGTACTGCGGCAAGGTGTTCAAGAACTGCAGCAACTTGACGGTGC +ACCGGCGGAGCCACACCGGCGAGCGGCCTTACAAGTGCGAGCTGTGCAAC +TACGCGTGCGCGCAGAGCAGCAAGCTCACGCGCCACATGAAGACGCACGG +GCAGATCGGCAAGGAGGTGTACCGCTGCGACATCTGCCAGATGCCCTTCA +GCGTCTACAGCACCCTGGAGAAACACATGAAAAAGTGGCACGGCGAGCAC +TTGCTGACTAACGACGTCAAAATCGAGCAGGCCGAGAGGAGCTAA +>hg17_chr15_41486872_41487060_- +ATATTGCTTTAGGGGTATTTGATGTGGTGGTGACGGACCCCTCATGCCCA +GCCTCGGTGCTGAAGTGTGCTGAAGCATTGCAGCTGCCTGTGGTGTCACA +AGAGTGGGTGATCCAGTGCCTCATTGTTGGGGAGAGAATTGGATTCAAGC +AGCATCCAAAATATAAACACGATTATGTTTCTCACTAA +>hg17_chr15_41673708_41673857_+ +ATGGCTGGTCCCTTCTCCCGTCTGCTGTCCGCCCGCCCGGGACTCAGGCT +CCTGGCTTTGGCCGGAGCGGGGTCTCTAGCCGCTGGGTTTCTGCTCCGAC +CGGAACCTGTACGAGCTGCCAGTGAACGACGGAGGCTGTATCCCCCGAG +>hg17_chr15_41679161_41679250_- +GTCGAAGTACAGCCTGGGGCCTCCAGGACTGGTCACGACCTTCCTGGTCC +CTGGTATTGACTATCAGCTTCCTTGGCCACCTGCTATGA +>hg17_chr15_41826029_41826196_+ +ATGCGCCTCCGCCGCCTAGCGCTGTTCCCGGGTGTGGCGCTGCTTCTTGC +CGCGGCCCGCCTCGCCGCTGCCTCCGACGTGCTAGAACTCACGGACGACA +ACTTCGAGAGTCGCATCTCCGACACGGGCTCTGCGGGCCTCATGCTCGTC +GAGTTCTTCGCCCCCTG +>hg17_chr16_142908_143003_+ +ATGTCTCTGACCAAGACTGAGAGGACCATCATTGTGTCCATGTGGGCCAA +GATCTCCACGCAGGCCGACACCATCGGCACCGAGACTCTGGAGAG +>hg17_chr16_179963_180135_- +GTCACGCTCCCGGGATCGGCGTCGGAGGCGGTCAAGATCTACCTCCCGAG +AGCGACGGAAATTGTCCCGGTCCCGGTCCCGAGATAGACATCGGCGCCAC +CGCAGCCGTTCCCGGAGCCACAGCCGGGGACATCGTCGGGCTTCCCGGGA +CCGAAGTGCGAAATACAAGTAA +>hg17_chr16_244413_244681_+ +ATGTTGGACCACAAGGACTTAGAGGCCGAAATCCACCCCTTGAAAAATGA +AGAAAGAAAATCGCAGGAAAATCTGGGAAATCCATCAAAAAATGAGGATA +ACGTGAAAAGCGCGCCTCCACAGTCCCGGCTCTCCCGGTGCCGAGCGGCG +GCGTTTTTTCTTTCATTGTTTCTCTGCCTTTTTGTGGTGTTCGTCGTCTC +ATTCGTCATCCCGTGTCCAGACCGGCCGGCGTCACAGCGAATGTGGAGGA +TAGACTACAGTGCCGCTG +>hg17_chr16_259268_259383_- +CGTGTTCCCGTTTACGTGGAGGCCACGGCACTCGAGCCCCAGCCCTGCAC +TCCTTCCCACCCCTGTGGAGCCCACAGCGGCTTGTGGCCCTGGGGGTGGA +GATGGGGTGGCCTAG +>hg17_chr18_23786114_23786321_- +GGCCTTAAAGCGGCTGACAATGACCCCACAGCTCCACCATATGACTCCCT +GTTAGTGTTTGACTATGAAGGCAGTGGCTCCACTGCTGGGTCCTTGAGCT +CCCTTAATTCCTCAAGTAGTGGTGGTGAGCAGGACTATGATTACCTGAAC +GACTGGGGGCCACGGTTCAAGAAACTTGCTGACATGTATGGTGGAGGTGA +TGACTGA +>hg17_chr18_59406881_59407046_+ +ATGGATTCACTTGGCGCCGTCAGCACTCGACTTGGGTTTGATCTTTTCAA +AGAGCTGAAGAAAACAAATGATGGCAACATCTTCTTTTCCCCTGTGGGCA +TCTTGACTGCAATTGGCATGGTCCTCCTGGGGACCCGAGGAGCCACCGCT +TCCCAGTTGGAGGAG +>hg17_chr18_59455932_59456337_- +CTTGAAGAGAAACTCACTGCTGAGAAATTGATGGAATGGACAAGTTTGCA +GAATATGAGAGAGACATGTGTCGATTTACACTTACCTCGGTTCAAAATGG +AAGAGAGCTATGACCTCAAGGACACGTTGAGAACCATGGGAATGGTGAAT +ATCTTCAATGGGGATGCAGACCTCTCAGGCATGACCTGGAGCCACGGTCT +CTCAGTATCTAAAGTCCTACACAAGGCCTTTGTGGAGGTCACTGAGGAGG +GAGTGGAAGCTGCAGCTGCCACCGCTGTAGTAGTAGTCGAATTATCATCT +CCTTCAACTAATGAAGAGTTCTGTTGTAATCACCCTTTCCTATTCTTCAT +AAGGCAAAATAAGACCAACAGCATCCTCTTCTATGGCAGATTCTCATCCC +CATAG +>hg17_chr18_59600586_59600754_+ +ATGGCCTCCCTTGCTGCAGCAAATGCAGAGTTTTGCTTCAACCTGTTCAG +AGAGATGGATGACAATCAAGGAAATGGAAATGTGTTCTTTTCCTCTCTGA +GCCTCTTCGCTGCCCTGGCCCTGGTCCGCTTGGGCGCTCAAGATGACTCC +CTCTCTCAGATTGATAAG +>hg17_chr19_59068595_59069564_+ +ATGCCAGTGACGGTAACCCGCACCACCATCACAACCACCACGACGTCATC +TTCGGGCCTGGGGTCCCCCATGATCGTGGGGTCCCCTCGGGCCCTGACAC +AGCCCCTGGGTCTCCTTCGCCTGCTGCAGCTGGTGTCTACCTGCGTGGCC +TTCTCGCTGGTGGCTAGCGTGGGCGCCTGGACGGGGTCCATGGGCAACTG +GTCCATGTTCACCTGGTGCTTCTGCTTCTCCGTGACCCTGATCATCCTCA +TCGTGGAGCTGTGCGGGCTCCAGGCCCGCTTCCCCCTGTCTTGGCGCAAC +TTCCCCATCACCTTCGCCTGCTATGCGGCCCTCTTCTGCCTCTCGGCCTC +CATCATCTACCCCACCACCTATGTCCAGTTCCTGTCCCACGGCCGTTCGC +GGGACCACGCCATCGCCGCCACCTTCTTCTCCTGCATCGCGTGTGTGGCT +TACGCCACCGAAGTGGCCTGGACCCGGGCCCGGCCCGGCGAGATCACTGG +CTATATGGCCACCGTACCCGGGCTGCTGAAGGTGCTGGAGACCTTCGTTG +CCTGCATCATCTTCGCGTTCATCAGCGACCCCAACCTGTACCAGCACCAG +CCGGCCCTGGAGTGGTGCGTGGCGGTGTACGCCATCTGCTTCATCCTAGC +GGCCATCGCCATCCTGCTGAACCTGGGGGAGTGCACCAACGTGCTACCCA +TCCCCTTCCCCAGCTTCCTGTCGGGGCTGGCCTTGCTGTCTGTCCTCCTC +TATGCCACCGCCCTTGTTCTCTGGCCCCTCTACCAGTTCGATGAGAAGTA +TGGCGGCCAGCCTCGGCGCTCGAGAGATGTAAGCTGCAGCCGCAGCCATG +CCTACTACGTGTGTGCCTGGGACCGCCGACTGGCTGTGGCCATCCTGACG +GCCATCAACCTACTGGCGTATGTGGCTGACCTGGTGCACTCTGCCCACCT +GGTTTTTGTCAAGGTCTAA +>hg17_chr19_59236026_59236146_- +ACGGCAGACCCCCAAGGAGTGACCTATGCTGAGCTAAGCACCAGCGCCCT +GTCTGAGGCAGCTTCAGACACCACCCAGGAGCCCCCAGGATCTCATGAAT +ATGCGGCACTGAAAGTGTAG +>hg17_chr19_59297998_59298008_+ +ATGGCTGCGA +>hg17_chr19_59302168_59302288_- +ATTAAGGTTGAGGAAGACTTTGGCTTTGAAGCAGATGAGGCCCTGGATTC +CAGTTGGGTTTCTCGGGGTCCAGACAAACTGCTGCCCTACCCGACCCTGG +CCAGCCCAGCCTCTGACTGA +>hg17_chr2_118288583_118288668_+ +ATGTCACACCTGCCGATGAAACTCCTGCGTAAGAAGATCGAGAAGCGGAA +CCTCAAATTGCGGCAGCGGAACCTAAAGTTTCAGG +>hg17_chr2_118394148_118394202_- +GAGGGCCGCAAGAACGAGATGCTGCTGTCCAAGGTGAAAGCGAAGGCCTC +CTGA +>hg17_chr2_220190202_220190242_+ +ATGCTCAAAGCGGTGATCCTGATTGGAGGCCCTCAAAAGG +>hg17_chr2_220229609_220230869_- +TGGGAGATCCAGAATACCAGCCATCTGGCCGTTGATGGGGACCAGGCAGC +TGCTTGGCCCGTGGGTATTCCAGCACCATCCCGCCCGGCCTCCCGCTTTG +AGGTGCTGCGCTGGGACTACTTCACGGAGCAGCACGCTTTCTCCTGCGCC +GATGGCTCACCCCGCTGCCCACTGCGTGGGGCTGACCGGGCTGATGTGGC +CGATGTTCTGGGGACAGCTCTAGAGGAGCTGAACCGCCGCTACCACCCGG +CCTTGCGGCTCCAGAAGCAGCAGCTGGTGAATGGCTACCGACGCTTTGAT +CCGGCCCGGGGTATGGAATACACGCTGGACTTGCAGCTGGAGGCACTGAC +CCCCCAGGGAGGCCGCCGGCCCCTCACTCGCCGAGTGCAGCTGCTCCGGC +CGCTGAGCCGCGTGGAGATCTTGCCTGTGCCCTATGTCACTGAGGCCTCA +CGTCTCACTGTGCTGCTGCCTCTAGCTGCGGCTGAGCGTGACCTGGCCCC +TGGCTTCTTGGAGGCCTTTGCCACTGCAGCACTGGAGCCTGGTGATGCTG +CGGCAGCCCTGACCCTGCTGCTACTGTATGAGCCGCGCCAGGCCCAGCGC +GTGGCCCATGCAGATGTCTTCGCACCTGTCAAGGCCCACGTGGCAGAGCT +GGAGCGGCGTTTCCCCGGTGCCCGGGTGCCATGGCTCAGTGTGCAGACAG +CCGCACCCTCACCACTGCGCCTCATGGATCTACTCTCCAAGAAGCACCCG +CTGGACACACTGTTCCTGCTGGCCGGGCCAGACACGGTGCTCACGCCTGA +CTTCCTGAACCGCTGCCGCATGCATGCCATCTCCGGCTGGCAGGCCTTCT +TTCCCATGCATTTCCAAGCCTTCCACCCAGCTGTGGCCCCACCACAAGGG +CCTGGGCCCCCAGAGCTGGGCCGTGACACTGGCCGCTTTGATCGCCAGGC +AGCCAGCGAGGCCTGCTTCTACAACTCCGACTATGTGGCAGCCCGTGGGC +GCCTGGCGGCAGCCTCAGAACAAGAAGAGGAGCTGCTGGAGAGCCTGGAT +GTGTACGAGCTGTTCCTCCACTTCTCCAGTCTGCATGTGCTGCGGGCGGT +GGAGCCGGCGCTGCTGCAGCGCTACCGGGCCCAGACGTGCAGCGCGAGGC +TCAGTGAGGACCTGTACCACCGCTGCCTCCAGAGCGTGCTTGAGGGCCTC +GGCTCCCGAACCCAGCTGGCCATGCTACTCTTTGAACAGGAGCAGGGCAA +CAGCACCTGA +>hg17_chr20_33330413_33330423_- +CCTCACCTGA +>hg17_chr20_33513606_33513792_+ +ATGGAGACAAGAAGCCCTGGGTTGAACAACATGAAGCCCCAGTCACTGCA +GCTGGTACTGGAAGAGCAGGTGCTGGCACTACAGCAGCAGATGGCAGAGA +ATCAGGCAGCCTCCTGGCGGAAGCTGAAGAACTCCCAGGAGGCCCAGCAG +AGACAAGCAACCCTTGTGAGGAAGCTGCAGGCCAAG +>hg17_chr20_33579500_33579527_- +ATTTTGGAAGATGGTCTGGTTCCCTAG +>hg17_chr20_33593260_33593348_+ +ATGGAGGCGCTGGGGAAGCTGAAGCAGTTCGATGCCTACCCCAAGACTTT +GGAGGACTTCCGGGTCAAGACCTGCGGGGGCGCCACCG +>hg17_chr21_32707032_32707192_+ +ATGCTTCTGCCGGGACGCGCACGCCAACCGCCGACGCCCCAGCCCGTGCA +GCATCCCGGCCTCCGCCGGCAGGTAGAGCCGCCGGGGCAGCTCCTGCGCC +TCTTCTACTGCACTGTCCTGGTCTGCTCCAAAGAGATCTCAGCGCTCACC +GACTTCTCTG +>hg17_chr21_32869641_32870022_- +ATGGAGCGCCCTCTCATCTGGCACCTTCCTGGCCTCTTTCCCAGGCCCCA +GTTCTGTCCATGCAGCTGTGGGTGCTTCCTGCATTGCGGGTCTCACGGGG +AGGAGACGAGAGTGCCCCTGGTTGAGTCAGGAAAGAATTCTATCTTCACG +TCGCTGCCAGCAAATGACCACAGCAGCTTCACGACCTCTGCAGGAACCTA +TCTTGGTAAAGAAACGGGGCCTATGTGGTGGCCGAGCCTCAGGTGTGGCC +GAGCTTCAGGTGTGGCCCTTATGCACAGCACAGCCCAAGCCTGTGGGCAC +CACTCGCCCTGGGCTGCCTGGCACCTGGACTCCTTCCCATCCTTGGCCGA +GGTCTGCGTGGCCCTTCAGGGCCGAATCTGA +>hg17_chr21_33321040_33322012_+ +ATGGACTCGGACGCCAGCCTGGTGTCCAGCCGCCCGTCGTCGCCAGAGCC +CGATGACCTTTTTCTGCCGGCCCGGAGTAAGGGCAGCAGCGGCAGCGCCT +TCACTGGGGGCACCGTGTCCTCGTCCACCCCGAGTGACTGCCCGCCGGAG +CTGAGCGCCGAGCTGCGCGGCGCTATGGGCTCTGCGGGCGCGCATCCTGG +GGACAAGCTAGGAGGCAGTGGCTTCAAGTCATCCTCGTCCAGCACCTCGT +CGTCTACGTCGTCGGCGGCTGCGTCGTCCACCAAGAAGGACAAGAAGCAA +ATGACAGAGCCGGAGCTGCAGCAGCTGCGTCTCAAGATCAACAGCCGCGA +GCGCAAGCGCATGCACGACCTCAACATCGCCATGGATGGCCTCCGCGAGG +TCATGCCGTACGCACACGGCCCTTCGGTGCGCAAGCTTTCCAAGATCGCC +ACGCTGCTGCTGGCGCGCAACTACATCCTCATGCTCACCAACTCGCTGGA +GGAGATGAAGCGACTGGTGAGCGAGATCTACGGGGGCCACCACGCTGGCT +TCCACCCGTCGGCCTGCGGCGGCCTGGCGCACTCCGCGCCCCTGCCCGCC +GCCACCGCGCACCCGGCAGCAGCAGCGCACGCCGCACATCACCCCGCGGT +GCACCACCCCATCctgccgcccgccgccgcagcggctgctgccgccgctg +cagccgcggctgTGTCCAGCGCCTCTCTGCCCGGATCCGGGCTGCCGTCG +GTCGGCTCCATCCGTCCACCGCACGGCCTACTCAAGTCTCCGTCTGCTgc +cgcggccgccccgctggggggcgggggcggcggcAGTGGGGCGAGCGGGG +GCTTCCAGCACTGGGGCGGCATGCCCTGCCCCTGCAGCATGTGCCAGGTG +CCGCCGCCGCACCACCACGTGTCGGCTATGGGCGCCGGCAGCCTGCCGCG +CCTCACCTCCGACGCCAAGTGA +>hg17_chr21_33744994_33745040_- +CACTCTGATCTACAAATTTGGAAGAACCGAAGAGCTATGGACCTGA +>hg17_chr22_30120223_30120265_+ +ATGAGCAGCACCTTAGCTAAGATCGCGGAGATAGAAGCAGAG +>hg17_chr22_30160419_30160661_- +TTCTGCATCCTCCAGGCTCTGGTTCCCATGCAGCAGCTGTCAGCGTTCAG +ACAACCCCTCAGAACGTGCCCAGCCGGTCAGGCCTGCCCCACATGCACTC +CCAGCTGGAGCATCGCCCCAGCCAGAGGAGCAGCTCCCCTGTGGGCCTTG +CCAAATGGTTTGGCTCAGATGTGCTACAGCAACCCCTGCCCTCCATGCCC +GCCAAAGTTATCAGTGTAGATGAATTGGAATACCGACAGTGA +>hg17_chr22_30665273_30665360_+ +ATGGGGGACCGGGAGCAGCTGCTGCAGCGGGCGCGGCTGGCCGAGCAGGC +GGAGCGCTACGACGACATGGCCTCCGCTATGAAGGCG +>hg17_chr22_30939054_30939266_- +ATTATCCTGAGAAATCACGTGGATGCCTCAAGAAAGCTTATGACTTGTTC +TGCGGTTTGCAGAAGGGACCCAAGCTAACCAAGGAGGAGGAGGAAGCCTT +GAGCAAGAAGCTCACAGACACGTCTGAGAGGCCCTCGTGGAGGACAATAG +TGAACATCAACGCCATCCTCCTCCTGGCTGTGGTGGTCTTTATTCACGGC +TACTATGCCTGA +>hg17_chr5_131424298_131424460_+ +ATGAGCCGCCTGCCCGTCCTGCTCCTGCTCCAACTCCTGGTCCGCCCCGG +ACTCCAAGCTCCCATGACCCAGACAACGCCCTTGAAGACAAGCTGGGTTA +ACTGCTCTAACATGATCGATGAAATTATAACACACTTAAAGCAGCCACCT +TTGCCTTTGCTG +>hg17_chr5_131556601_131556672_- +TCTCCAATAAGTGGTTCCATGAACGAGGACAGGAGTTCTTGAGACCTTGT +GGATCAACAGAAGTTGACTGA +>hg17_chr5_131621326_131621419_+ +ATGCCCCATTCCGTGACCCTGCGCGGGCCTTCGCCCTGGGGCTTCCGCCT +GGTGGGCGGCCGGGACTTCAGCGCGCCCCTCACCATCTCACGG +>hg17_chr5_131847541_131847666_- +GGGATATTGGGCTGAGTCTACAGCGTGTCTTCACAGATCTGAAGAACATG +GATGCCACCTGGCTGGACAGCCTGCTGACCCCAGTCCGGTTGCCCTCCAT +CCAGGCCATTCCCTGTGCACCGTAG +>hg17_chr6_108299600_108299744_- +TTGGAAGTTCATGAGGCTAAGCCTGTGCCAGAAAATCACCCACAGTGGGA +TACAGCAATAGAGGGGGATGAAGACCAGGAGGACAGTGAGGGCTTTGAAG +ATAGCTTTGAGGAAGAAGAGGAGGAAGAAGAAGATGATGACTAA +>hg17_chr6_108594662_108594687_+ +ATGAGCAAGCCAGCCGGATCAACAA +>hg17_chr6_108640045_108640151_- +GGTCGCTGGTCATCCTCTGGCACAGAACGAACGTTGTCTTCACATGTTTT +TACAAGATGAAATAATAGATAAAAGCTATACTCCATCTAAAATAAGACAT +GCCTGA +>hg17_chr6_108722976_108723115_+ +ATGGCGGCCTCCTGGTCGCTCTTGGTTACCCTGCGCCCCTTAGCACAGAG +CCCGCTGAGAGGGAGATGTGTTGGGTGCGGGGCCTGGGCCGCCGCTCTCG +CTCCTCTGGCCACCGCCCCTGGGAAGCCCTTTTGGAAAG +>hg17_chr7_113660517_113660685_+ +ATGATGCAGGAATCTGCGACAGAGACAATAAGCAACAGTTCAATGAATCA +AAATGGAATGAGCACTCTAAGCAGCCAATTAGATGCTGGCAGCAGAGATG +GAAGATCAAGTGGTGACACCAGCTCTGAAGTAAGCACAGTAGAACTGCTG +CATCTGCAACAACAGCAG +>hg17_chr7_116512159_116512389_- +GCTCCCTGGGTACAGCAGGCCGTGTGTGCAACCTGACTTCCCGGGGCATG +GACAGCTGTGAAGTCATGTGCTGTGGGAGAGGCTACGACACCTCCCATGT +CACCCGGATGACCAAGTGTGGGTGTAAGTTCCACTGGTGCTGCGCCGTGC +GCTGTCAGGACTGCCTGGAAGCTCTGGATGTGCACACATGCAAGGCCCCC +AAGAACGCTGACTGGACAACCGCTACATGA +>hg17_chr7_116714099_116714152_+ +ATGCAGAGGTCGCCTCTGGAAAAGGCCAGCGTTGTCTCCAAACTTTTTTT +CAG +>hg17_chr7_116945541_116945787_- +GAGGTCAGTCCTCTCAGCAGCCATCAAACTACTGAATGCAGCAACAGTAA +ATCAAAGACTGAGTTGGGTGTTTCAAGAGTTAAATCTTTTCTTCCTGTTC +CTAGAAGTAAAGTCACCCAGTGTTCCCAGAACACCAAAAGAAGCAGCAGC +AGCAGTAATACAAGGCAAATAGAAATCAACAACAACTCAAAAGAAGTGAA +TTGGAACTTACACAAAAATGAACACCTAGAAAAACCTAACAAATAG +>hg17_chr8_118881131_118881317_- +ACTTCTCGGGCTTCCCGTTGGGCTGACCCTGACCACTTTGCCCAGCGACA +GAGCTGCATGAATACGTTTGCCAGCTGGTTTGGCTACATGCCGCTGATCC +ACTCTCAGATGAGGCTCGACCCCGTCCTCTTTAAAGACCAGGTCTCTATT +TTGAGGAAGAAATACCGAGACATTGAGCGACTTTGA +>hg17_chr9_128764156_128764189_+ +ATGGCCTGCCTGAGCCCCTCGCAGCTCCAGAAG +>hg17_chr9_128787519_128789136_- +ATGACCCGAGAGTGCCCATCTCCGGCCCCGGGGCCTGGGGCTCCGCTGAG +TGGATCGGTGCTGGCAGAGGCGGCAGTAGTGTTTGCAGTGGTGCTGAGCA +TCCACGCAACCGTATGGGACCGATACTCGTGGTGCGCCGTGGCCCTCGCA +GTGCAGGCCTTCTACGTCCAATACAAGTGGGACCGGCTGCTACAGCAGGG +AAGCGCCGTCTTCCAGTTCCGAATGTCCGCAAACAGTGGCCTATTGCCCG +CCTCCATGGTCATGCCTTTGCTTGGACTAGTCATGAAGGAGCGGTGCCAG +ACTGCTGGGAACCCGTTCTTTGAGCGTTTTGGCATTGTGGTGGCAGCCAC +TGGCATGGCAGTGGCCCTCTTCTCATCAGTGTTGGCGCTCGGCATCACTC +GCCCAGTGCCAACCAACACTTGTGTCATCTTGGGCTTGGCTGGAGGTGTT +ATCATTTATATCATGAAGCACTCGTTGAGCGTGGGGGAGGTGATCGAAGT +CCTGGAAGTCCTTCTGATCTTCGTTTATCTCAACATGATCCTGCTGTACC +TGCTGCCCCGCTGCTTCACCCCTGGTGAGGCACTGCTGGTATTGGGTGGC +ATTAGCTTTGTCCTCAACCAGCTCATCAAGCGCTCTCTGACACTGGTGGA +AAGTCAGGGGGACCCAGTGGACTTCTTCCTGCTGGTGGTGGTAGTAGGGA +TGGTACTCATGGGCATTTTCTTCAGCACTCTGTTTGTCTTCATGGACTCA +GGCACCTGGGCCTCCTCCATCTTCTTCCACCTCATGACCTGTGTGCTGAG +CCTTGGTGTGGTCCTACCCTGGCTGCACCGGCTCATCCGCAGGAATCCCC +TGCTCTGGCTTCTTCAGTTTCTCTTCCAGACAGACACCCGCATCTACCTC +CTAGCCTATTGGTCTCTGCTGGCCACCTTGGCCTGCCTGGTGGTGCTGTA +CCAGAATGCCAAGCGGTCATCTTCCGAGTCCAAGAAGCACCAGGCCCCCA +CCATCGCCCGAAAGTATTTCCACCTCATTGTGGTAGCCACCTACATCCCA +GGTATCATCTTTGACCGGCCACTGCTCTATGTAGCCGCCACTGTATGCCT +GGCGGTCTTCATCTTCCTGGAGTATGTGCGCTACTTCCGCATCAAGCCTT +TGGGTCACACTCTACGGAGCTTCCTGTCCCTTTTTCTGGATGAACGAGAC +AGTGGACCACTCATTCTGACACACATCTACCTGCTCCTGGGCATGTCTCT +TCCCATCTGGCTGATCCCCAGACCCTGCACACAGAAGGGTAGCCTGGGAG +GAGCCAGGGCCCTCGTCCCCTATGCCGGTGTCCTGGCTGTGGGTGTGGGT +GATACTGTGGCCTCCATCTTCGGTAGCACCATGGGGGAGATCCGCTGGCC +TGGAACCAAAAAGACTTTTGAGGGGACCATGACATCTATATTTGCGCAGA +TCATTTCTGTAGCTCTGATCTTAATCTTTGACAGTGGAGTGGACCTAAAC +TACAGTTATGCTTGGATTTTGGGGTCCATCAGCACTGTGTCCCTCCTGGA +AGCATACACTACACAGATAGACAATCTCCTTCTGCCTCTCTACCTCCTGA +TATTGCTGATGGCCTAG +>hg17_chr9_128882427_128882523_+ +ATGGCGTTCCGGAGGGCCGAGGGCACGTCTATGATCCAGGCCCTGGCCAT +GACGGTGGCCGAGATCCCCGTGTTCCTGTACACGACGTTTGGGCAG +>hg17_chr9_128937229_128937445_- +GTCCCTGCCAAGACAGACTGTGTCATGTTCTTCGGGCCCGTGGTCCCCGA +CGGCTACGGTGTCTGCTATAACCCCATGGAGGCCCACATCAACTTCTCCC +TGTCGGCCTACAACAGCTGCGCGGAGACCAACGCCGCCCGCCTGGCGCAT +TACCTGGAGAAGGCGCTCCTGGACATGCGTGCCCTGCTGCAGAGCCACCC +CCGGGCCAAGCTCTGA +>hg17_chrX_122745047_122745924_+ +ATGACTTTTAACAGTTTTGAAGGATCTAAAACTTGTGTACCTGCAGACAT +CAATAAGGAAGAAGAATTTGTAGAAGAGTTTAATAGATTAAAAACTTTTG +CTAATTTTCCAAGTGGTAGTCCTGTTTCAGCATCAACACTGGCACGAGCA +GGGTTTCTTTATACTGGTGAAGGAGATACCGTGCGGTGCTTTAGTTGTCA +TGCAGCTGTAGATAGATGGCAATATGGAGACTCAGCAGTTGGAAGACACA +GGAAAGTATCCCCAAATTGCAGATTTATCAACGGCTTTTATCTTGAAAAT +AGTGCCACGCAGTCTACAAATTCTGGTATCCAGAATGGTCAGTACAAAGT +TGAAAACTATCTGGGAAGCAGAGATCATTTTGCCTTAGACAGGCCATCTG +AGACACATGCAGACTATCTTTTGAGAACTGGGCAGGTTGTAGATATATCA +GACACCATATACCCGAGGAACCCTGCCATGTATAGTGAAGAAGCTAGATT +AAAGTCCTTTCAGAACTGGCCAGACTATGCTCACCTAACCCCAAGAGAGT +TAGCAAGTGCTGGACTCTACTACACAGGTATTGGTGACCAAGTGCAGTGC +TTTTGTTGTGGTGGAAAACTGAAAAATTGGGAACCTTGTGATCGTGCCTG +GTCAGAACACAGGCGACACTTTCCTAATTGCTTCTTTGTTTTGGGCCGGA +ATCTTAATATTCGAAGTGAATCTGATGCTGTGAGTTCTGATAGGAATTTC +CCAAATTCAACAAATCTTCCAAGAAATCCATCCATGGCAGATTATGAAGC +ACGGATCTTTACTTTTGGGACATGGATATACTCAGTTAACAAGGAGCAGC +TTGCAAGAGCTGGATTTTATGCTTTAG +>hg17_chrX_152648964_152649196_- +TGACAACGAGGAGAAGGCCTTTGGCAGCAGCCAGCCATCGCTCAACGGGG +ACATCAAGCCCCTGGGCAGTGACGACAGCCTGGCCGATTATGGGGGCAGC +GTGGATGTTCAGTTCAACGAGGATGGTTCGTTCATTGGCCAGTACAGTGG +CAAGAAGGAGAAGGAGGCGGCAGGGGGCAATGACAGCTCAGGGGCCACTT +CCCCCATCAACCCTGCCGTGGCCCTAGAATAG +>hg17_chrX_152691446_152691471_+ +ATGCTCATGGCGTCCACCACTTCCG +>hg17_chrX_152694029_152694263_- +GCTGTGGCACAGAACATGGACTCTGTGTTTAAGGAGCTCTTGGGAAAGAC +CTCTGTCCGCCAGGGCCTTGGGCCAGCATCTACCACCTCTCCCAGTCCTG +GGCCCCGAAGCCCAAAGGCCCCGCCCAGCAGCCGCCTGGGCAGGAACAAA +GGCTTCTCCCGGGGCCCTGGGGCCCCAGCCTCACCCTCAGCTTCCCACCC +CCAGGGCCTAGACACGACCCCCAAGCCACACTGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fasta_tool_compute_length_1.out Thu Oct 29 22:14:42 2015 -0400 @@ -0,0 +1,18 @@ +EYKX4VC01B65GS length=54 xy=0784_1754 region=1 run=R_2007_11_07_16_15_57_ 54 +EYKX4VC01BNCSP length=187 xy=0558_3831 region=1 run=R_2007_11_07_16_15_57_ 187 +EYKX4VC01CD9FT length=115 xy=0865_1719 region=1 run=R_2007_11_07_16_15_57_ 115 +EYKX4VC01B8FW0 length=95 xy=0799_0514 region=1 run=R_2007_11_07_16_15_57_ 95 +EYKX4VC01BCGYW length=115 xy=0434_3926 region=1 run=R_2007_11_07_16_15_57_ 115 +EYKX4VC01AZXC6 length=116 xy=0292_0280 region=1 run=R_2007_11_07_16_15_57_ 116 +EYKX4VC01CATH5 length=82 xy=0826_0843 region=1 run=R_2007_11_07_16_15_57_ 82 +EYKX4VC01BCEIV length=47 xy=0434_0757 region=1 run=R_2007_11_07_16_15_57_ 47 +EYKX4VC01BWERM length=83 xy=0662_0304 region=1 run=R_2007_11_07_16_15_57_ 83 +EYKX4VC01BT2O7 length=69 xy=0635_1945 region=1 run=R_2007_11_07_16_15_57_ 69 +EYKX4VC01BO0UO length=222 xy=0577_3838 region=1 run=R_2007_11_07_16_15_57_ 222 +EYKX4VC01CBCPK length=83 xy=0832_1158 region=1 run=R_2007_11_07_16_15_57_ 83 +EYKX4VC01B474S length=54 xy=0762_2010 region=1 run=R_2007_11_07_16_15_57_ 54 +EYKX4VC01BB4QL length=57 xy=0431_0363 region=1 run=R_2007_11_07_16_15_57_ 57 +EYKX4VC01BJ37M length=64 xy=0522_0192 region=1 run=R_2007_11_07_16_15_57_ 64 +EYKX4VC01BV9R8 length=54 xy=0660_2038 region=1 run=R_2007_11_07_16_15_57_ 54 +EYKX4VC01CEPP8 length=60 xy=0870_2350 region=1 run=R_2007_11_07_16_15_57_ 60 +EYKX4VC01BTLME length=78 xy=0630_0292 region=1 run=R_2007_11_07_16_15_57_ 78
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fasta_tool_compute_length_2.out Thu Oct 29 22:14:42 2015 -0400 @@ -0,0 +1,65 @@ +hg17_chr1_147962192_147962580_- 388 +hg17_chr1_147984545_147984630_+ 85 +hg17_chr1_148078400_148078582_- 182 +hg17_chr1_148185136_148185276_+ 140 +hg17_chr10_55251623_55253124_- 1501 +hg17_chr11_116124407_116124501_- 94 +hg17_chr11_116206508_116206563_+ 55 +hg17_chr11_116211733_116212337_- 604 +hg17_chr11_1812377_1812407_+ 30 +hg17_chr12_38440094_38440321_- 227 +hg17_chr13_112381694_112381953_+ 259 +hg17_chr14_98710240_98712285_- 2045 +hg17_chr15_41486872_41487060_- 188 +hg17_chr15_41673708_41673857_+ 149 +hg17_chr15_41679161_41679250_- 89 +hg17_chr15_41826029_41826196_+ 167 +hg17_chr16_142908_143003_+ 95 +hg17_chr16_179963_180135_- 172 +hg17_chr16_244413_244681_+ 268 +hg17_chr16_259268_259383_- 115 +hg17_chr18_23786114_23786321_- 207 +hg17_chr18_59406881_59407046_+ 165 +hg17_chr18_59455932_59456337_- 405 +hg17_chr18_59600586_59600754_+ 168 +hg17_chr19_59068595_59069564_+ 969 +hg17_chr19_59236026_59236146_- 120 +hg17_chr19_59297998_59298008_+ 10 +hg17_chr19_59302168_59302288_- 120 +hg17_chr2_118288583_118288668_+ 85 +hg17_chr2_118394148_118394202_- 54 +hg17_chr2_220190202_220190242_+ 40 +hg17_chr2_220229609_220230869_- 1260 +hg17_chr20_33330413_33330423_- 10 +hg17_chr20_33513606_33513792_+ 186 +hg17_chr20_33579500_33579527_- 27 +hg17_chr20_33593260_33593348_+ 88 +hg17_chr21_32707032_32707192_+ 160 +hg17_chr21_32869641_32870022_- 381 +hg17_chr21_33321040_33322012_+ 972 +hg17_chr21_33744994_33745040_- 46 +hg17_chr22_30120223_30120265_+ 42 +hg17_chr22_30160419_30160661_- 242 +hg17_chr22_30665273_30665360_+ 87 +hg17_chr22_30939054_30939266_- 212 +hg17_chr5_131424298_131424460_+ 162 +hg17_chr5_131556601_131556672_- 71 +hg17_chr5_131621326_131621419_+ 93 +hg17_chr5_131847541_131847666_- 125 +hg17_chr6_108299600_108299744_- 144 +hg17_chr6_108594662_108594687_+ 25 +hg17_chr6_108640045_108640151_- 106 +hg17_chr6_108722976_108723115_+ 139 +hg17_chr7_113660517_113660685_+ 168 +hg17_chr7_116512159_116512389_- 230 +hg17_chr7_116714099_116714152_+ 53 +hg17_chr7_116945541_116945787_- 246 +hg17_chr8_118881131_118881317_- 186 +hg17_chr9_128764156_128764189_+ 33 +hg17_chr9_128787519_128789136_- 1617 +hg17_chr9_128882427_128882523_+ 96 +hg17_chr9_128937229_128937445_- 216 +hg17_chrX_122745047_122745924_+ 877 +hg17_chrX_152648964_152649196_- 232 +hg17_chrX_152691446_152691471_+ 25 +hg17_chrX_152694029_152694263_- 234
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fasta_tool_compute_length_3.out Thu Oct 29 22:14:42 2015 -0400 @@ -0,0 +1,18 @@ +EYKX4VC01B65GS 54 +EYKX4VC01BNCSP 187 +EYKX4VC01CD9FT 115 +EYKX4VC01B8FW0 95 +EYKX4VC01BCGYW 115 +EYKX4VC01AZXC6 116 +EYKX4VC01CATH5 82 +EYKX4VC01BCEIV 47 +EYKX4VC01BWERM 83 +EYKX4VC01BT2O7 69 +EYKX4VC01BO0UO 222 +EYKX4VC01CBCPK 83 +EYKX4VC01B474S 54 +EYKX4VC01BB4QL 57 +EYKX4VC01BJ37M 64 +EYKX4VC01BV9R8 54 +EYKX4VC01CEPP8 60 +EYKX4VC01BTLME 78