Mercurial > repos > sanbi-uwc > repeatmasker
changeset 0:589811e6b4db draft default tip
planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit da14cd54b0606513434a6e5bf2ed2881ab2220ec-dirty
author | sanbi-uwc |
---|---|
date | Sun, 22 Apr 2018 02:00:59 -0400 |
parents | |
children | |
files | repeatmasker.xml test-data/repeats.fasta test-data/small.fasta test-data/small.fasta.align test-data/small.fasta.cat test-data/small.fasta.gff test-data/small.fasta.log test-data/small.fasta.masked test-data/small.fasta.poly test-data/small.fasta.stats |
diffstat | 10 files changed, 1063 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repeatmasker.xml Sun Apr 22 02:00:59 2018 -0400 @@ -0,0 +1,194 @@ +<tool id="repeatmasker" name="RepeatMasker" version="0.0.1" profile="17.01"> + <description>RepeatMasker</description> + + <requirements> + <requirement type="package">repeatmasker</requirement> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ + RM_LIB_PATH=\$(dirname \$(which RepeatMasker))/../share/RepeatMasker/Libraries && + mkdir lib && + export REPEATMASKER_LIB_DIR=\$(pwd)/lib && + for file in \$(ls \$RM_LIB_PATH) ; do ln -s \$RM_LIB_PATH/\$file lib/\$file ; done && + #if $repeat_source.source_type == "repbase": + cp '${repeat_source.repbase_file}' lib/RMRBSeqs.embl && + #end if + RepeatMasker -dir \$(pwd) + #if $repeat_source.source_type == "library": + -lib '${repeat_source.repeat_lib}' + -cutoff '${repeat_source.cutoff}' + #else if $repeat_source.source_type == "repbase": + -species '${repeat_source.species}' + #end if + -parallel \${GALAXY_SLOTS:-1} + '${gff}' + '${ignore_n_stretches}' + #if $advanced.show == 'yes': + '${advanced.is_only}' + '${advanced.is_clip}' + '${advanced.no_is}' + '${advanced.rodspec}' + '${advanced.primspec}' + '${advanced.nolow}' + '${advanced.noint}' + '${advanced.norna}' + '${advanced.alu}' + '${advanced.div}' + '${advanced.search_speed}' + '${advanced.frag}' + '${advanced.maxsize}' + #if $advanced.gc != -1: + '${advanced.gc}' + #end if + '${advanced.gccalc}' + '${advanced.nocut}' + '${advanced.keep_alignments}' + '${advanced.invert_alignments}' + '${advanced.xout}' + '${advanced.xsmall}' + '${advanced.poly}' + #end if + '${input_fasta}' && + ls && + #if $advanced.show == 'no' or ($advanced.show == 'yes' and $advanced.is_only != '-is_only'): + mv \$(basename '${input_fasta}').masked '${output_masked_genome}' && + sed -r 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g ; 1,2c SW score\t% div.\t% del.\t% ins.\tquery sequence\tpos in query: begin\tend\t(left)\trepeat\tclass/family\tpos in repeat: begin\tend\t(left)\tID' \$(basename '${input_fasta}').out >'${output_log}' && + mv \$(basename '${input_fasta}').tbl '${output_table}' && + #if $gff == '-gff': + mv \$(basename '${input_fasta}').out.gff '${output_gff}' && + #end if + #if $advanced.show == 'yes' and $advanced.keep_alignments == '-ali': + mv \$(basename '${input_fasta}').align '${output_alignment}' && + #end if + #if $advanced.show == 'yes' and $advanced.poly == '-poly': + sed -r 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g' \$(basename '${input_fasta}').polyout >'${output_polymorphic}' && + #end if + #end if + mv \$(basename '${input_fasta}').cat '${output_repeat_catalog}' + ]]> + </command> + + <inputs> + <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" /> + <conditional name="repeat_source"> + <param label="Repeat library source" name="source_type" type="select"> + <option selected="true" value="repbase">RepBase</option> + <option value="library">Custom library of repeats</option> + </param> + <when value="repbase"> + <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" /> + <param name="species" type="text" value="homo sapiens" label="Repeat source species" help="Source species (or clade name) used to select repeats from RepBase" /> + </when> + <when value="library"> + <param name="repeat_lib" type="data" format="fasta" label="Custom library of repeats" /> + <param name="cutoff" type="integer" argument="-cutoff" value="225" label="Cutoff score for masking repeats" /> + </when> + </conditional> + <param name="gff" type="boolean" argument="-gff" truevalue="-gff" falsevalue="" label="Output annotation of repeats in GFF format" checked="false" /> + <param name="ignore_n_stretches" type="boolean" argument="-excln" falsevalue="" label="Ignore stretches of Ns when computing statistics" checked="true" help="Scaffolds are sometimes joined with stretches of 25 or more Ns. This option ignores them when calculating repeat statistics" /> + <conditional name="advanced"> + <param name="show" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Show advanced options" /> + <when value="yes"> + <param name="is_only" argument="-is_only" type="boolean" truevalue="-is_only" falsevalue="" checked="false" label="Only clip E coli insertion elements" /> + <param name="is_clip" argument="-is_clip" type="boolean" truevalue="-is_clip" falsevalue="" checked="false" label="Clip IS elements before analysis" help="Normally RepeatMasker will report on IS element, with this option selected it will clip them before analysis" /> + <param name="no_is" argument="-no_is" type="boolean" truevalue="-no_is" falsevalue="" checked="false" label="Skip bacterial insertion element check" /> + <param name="rodspec" argument="-rodspec" type="boolean" truevalue="-rodspec" falsevalue="" checked="false" label="Only check for rodent specific repeats" help="If this option is select a check for rodent specific repeats is done instead of a full RepeatMasker run" /> + <param name="primspec" argument="-primspec" type="boolean" truevalue="-primspec" falsevalue="" checked="false" label="Only check for primate specific repeats" help="If this option is select a check for primate specific repeats is done instead of a full RepeatMasker run" /> + <param name="nolow" argument="-nolow" type="boolean" truevalue="-nolow" falsevalue="" checked="false" label="No low complexity masking" help="Skip masking of simple tandem repeats and low complexity regions." /> + <param name="noint" argument="-noint" type="boolean" truevalue="-noint" falsevalue="" checked="false" label="No interspersed repeat masking" help="Only mask simple repeats, skip masking of interspersed repeats." /> + <param name="norna" argument="-norna" type="boolean" truevalue="-norna" falsevalue="" checked="false" label="No repeat-like-RNA masking" help="Skip masking of small pol III transcribed RNA (these are masked by default because they resemble SINEs)" /> + <param name="alu" argument="-alu" type="boolean" truevalue="-alu" falsevalue="" checked="false" label="Limit masking to (primate) Alu repeats" /> + <param name="div" argument="-div" type="boolean" truevalue="-div" falsevalue="" checked="false" label="Limit masking to less diverged (younger) repeats" /> + <param type="select" name="search_speed" label="Search speed vs sensitiviy trade-off"> + <option value="">Default</option> + <option value="-q">Quick (5-10% less sensitive, 3-4 times speedup)</option> + <option value="-qq">Rush (10% less sensitive)</option> + <option value="-s">Slow (0-5% more sensitive, 2.5 times slowdown)</option> + </param> + <param name="frag" type="integer" argument="-frag" value="40000" label="Maximum contiguous sequence searched" help="Maximum length of sequencing that is search without fragmenting" /> + <param name="maxsize" type="integer" argument="-maxsize" value="4000000" label="Maximum length for IS or repeat clipped sequences" /> + <param name="gc" type="integer" argument="-gc" value="-1" label="Select matrices for this GC%" help="Valid values are a percentage or -1 to choose the default" /> + <param name="gccalc" type="boolean" argument="-gccalc" truevalue="-gcccalc" falsevalue="" checked="false" label="Calculate GC % for all sequences" help="By default RepeatMasker skips calculating GC % for small sequences" /> + <param name="nocut" type="boolean" argument="-nocut" truevalue="-nocut" falsevalue="" checked="false" label="Skips cutting of repeats" /> + <param name="xout" type="boolean" argument="-x" truevalue="-x" falsevalue="" checked="false" label="Mask with X instead of N characters" /> + <param name="keep_alignments" type="boolean" argument="-ali" truevalue="-ali" falsevalue="" checked="false" label="Output alignments file" /> + <param name="invert_alignments" type="boolean" argument="-inv" truevalue="-inv" falsevalue="" checked="false" label="Invert alignments in alignment file" help="Show alignments in the orientation of the repeat sequence, not the query sequence" /> + <param name="xsmall" type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" /> + <param name="poly" type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" /> + </when> + <when value="no"> + </when> + </conditional> + </inputs> + <outputs> + <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}"> + <filter>not advanced['show'] or (advanced['show'] and not advanced['is_only'])</filter> + </data> + <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}"> + <filter>not advanced['show'] or (advanced['show'] and not advanced['is_only'])</filter> + </data> + <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}"> + <filter>not advanced['show'] or (advanced['show'] and not advanced['is_only'])</filter> + </data> + <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" /> + <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}"> + <filter>(not advanced['show'] or (advanced['show'] and not advanced['is_only'])) and advanced['show'] and advanced['keep_alignments']</filter> + </data> + <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}"> + <filter>(not advanced['show'] or (advanced['show'] and not advanced['is_only'])) and advanced['show'] and advanced['poly']</filter> + </data> + <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}"> + <filter>(not advanced['show'] or (advanced['show'] and not advanced['is_only'])) and gff is True</filter> + </data> + </outputs> + <tests> + <test> + <param name="input_fasta" value="small.fasta" ftype="fasta" /> + <param name="source_type" value="library" /> + <param name="repeat_lib" value="repeats.fasta" ftype="fasta" /> + <output name="output_masked_genome" file="small.fasta.masked" /> + <output name="output_table" file="small.fasta.stats" lines_diff="2" /> + <output name="output_repeat_catalog" file="small.fasta.cat" /> + <output name="output_log" file="small.fasta.log" /> + </test> + <test> + <param name="input_fasta" value="small.fasta" ftype="fasta" /> + <param name="source_type" value="library" /> + <param name="gff" value="-gff" /> + <param name="show" value="yes" /> + <param name="keep_alignments" value="-ali" /> + <param name="poly" value="-poly" /> + <param name="repeat_lib" value="repeats.fasta" ftype="fasta" /> + <output name="output_masked_genome" file="small.fasta.masked" /> + <output name="output_table" file="small.fasta.stats" lines_diff="4" /> + <output name="output_repeat_catalog" file="small.fasta.cat" /> + <output name="output_log" file="small.fasta.log" /> + <output name="output_alignment" file="small.fasta.align" /> + <output name="output_polymorphic" file="small.fasta.poly" /> + <output name="output_gff" file="small.fasta.gff" lines_diff="2" /> + </test> + </tests> + <help><![CDATA[ +RepeatMasker is a program that screens DNA for interspersed repeats and low +complexity DNA sequences. The database of repeats to screen for can be +provided as a FASTA file or downloaded from RepBase_. If the RepBase option is +chosen the RepBaseRepeatMaskerEdition file should be downloaded and +unpacked, and the enclosed EMBL format file ('RMRBSeqs.embl') should +be uploaded to Galaxy for use with this tool. + +Further documentation is available on the RepeatMasker homepage_. + +.. _RepBase: http://www.girinst.org/repbase/ +.. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html + ]]> + </help> + <citations> + <citation type="bibtex"> + @misc{RepeatMasker, + title = {RepeatMasker Open-4.0}, + howpublished = {\url{http://www.repeatmasker.org}}, + author = {Smit, AFA and Hubley, R and Green, P.}, + year = {2013-2015}} + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/repeats.fasta Sun Apr 22 02:00:59 2018 -0400 @@ -0,0 +1,64 @@ +>Asian_seabass_ONSAT_SB_Concensus_Lenght_170_bp +CGAAAAATTTAATAATTTAGGGGTCTTGAGCATGGGCGTGGTAAAATGCCCTCGGTAGCG +CCACCTACATTTTTAAACGGAACAGCCCCTCAAGCCCGTTGCGCCTAAAAATCTGAAAAT +CTGCACACATATGTAACATCCCATGACGCACCAAAAAGTCTCTTGGAGCCA +>Asian_seabass_MOSAT_SB_T_34a_satellite_DNA +TTTGTGACATCACTACATAGTTTGTTGAAAACGTAC +>Asian_seabass_MOSAT_SB_T_34b_satellite_DNA +TTTGTGACATCACACATAGTTGTGGGTCAGTAC +>Sat_38 +AAAAAATGTCATAGTATAGTATGGCGTCAAAAAACATG +>Asian_seabass_Sat_217_Consensus_Length_217_bp +AGTAAACAAGCATTATGGTTGAAACCATAATTTCCTGTCGGGAGAGCCTTTCCCTCTTTT +GTGCACTGTATGCAATCCCAGAGTGTGAATAAGCGCTTTTCCAGCGTTTTGAGGCTTATT +CAGCTCAGAATGGCTTAATACTGCACTATCTGACCAGGACAAAGACAAACTAAGAACTCA +GCCACACGGACATGAAAGTTGTTTTACTTACAATATT +>Asian_seabass_Sat_217_Consensus_Length_427_bp +CCAGCGTTTTGAGGCTTATTCAGCTCAGAATGGCTTAATACTGCACTATCTGACCAGGAC +AAGACAAACTAAGAACTCAACACACTGACATGAAAGTTTCTATATTTCTATTAAAATAAA +CAAACATTATGGTTGAAACCATAATTTCATTTCGGGAGAGCCTTTCCCTCTTTGTGGCAC +TGTATGTAATCTGAAGTGTGAATAACGCTTTTCCCGCGTTTGAGGCTTATTCAGCTCAGA +ATGGCTTAATACTGCACTATCTGACCAGGACAAGACAAACTAAGAACTCAGCCACACGGA +CATGAAAGTTGTTTACTTTACAATATTAGTAAACAAGCATTATGGTTGAAACCATAATTT +CCTGTCGGGAAGAGCTTCCCTCTTTTTGTGCACTGTATGCAATCCCAGAGTGTGAATAAG +CGCTTTT +>Asian_seabass_Sat_LM_Consensus_Length_453_bp +CCCAACAAAATAGCACTTACTGCTTTCCAAATTCAAGTCTGTGCCTGTGTTAGGGTAATG +TTGGTGATAAATTGATGTACTTAGCTTATCCTGTGAAAATGGTGTAAAATGGCAGTCTCC +ATGTTTGTTGAAGAAACTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAG +AAATCATGAGTGGCTCTTATTGAAGTGATCAGAGTTGGACTCCATTAAGAAAATGGCTTT +CAATGTAGTCAGTTGCTTTGTCACTTAAAACTGCCCCAACAAGTAGCACTTACTGCAATT +TGGTAGTCAAAGTCAATGCTCAAGTTAAGGGTAATAGTAGTGACATAATAGAAAAATCTC +TAAATATATTTGCTGGCAGCTTTAATATACAGAATGAGTGCCATGATGAATTCTTGATAC +AGAACAGGGACTTCCAAAATCAGCCAACACTAA +>Asian_seabass_Sat_LM_Consensus_Length_218_bp +CCCAACAAAATAGCACTTACTGCTTTCCAAATTCAAGTCTGTGCCTGTGTTAGGGTAATG +TTGGTGATAAATTGATGTACTTAGCTTATCCTGTGAAAATGGTGTAAAATGGCAGTCTCC +TGTTTGTTGAAGAAACTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAGA +AATCATGAGTGGCTCTTATTGAAGTGATCAGAGTTGG +>Asian_seabass_Sat_LM Consensus_Length_150_bp +CTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAGAAATCATGAGTGGCTC +TTATTGAAGTGATCAGAGTTGGACTCCATTAAGAAAATGGCTTTCAATGTAGTCAGTTGC +TTTGTCACTTAAAACTGCCCCAACAAGTAGCA +>Asian_seabass_Sat_Unk_Consensus_Length_341_bp +GTTTGGGTAAAAATATTGTCTATTTACGAGCTATCCTCTATATATTTTTGATCTGATTAA +TAAATACCTCTAGCCCACAAACTGTGGTATTTTGCTATGTGGCAGTTCACCCGAAAGTCC +AGTAGCATTTATCGCATTTTCTAAAGATAGTCAGTGCCTGAAAGTTTGAGGCAGATAAAC +AAATTGTTCAAGTAAGAACTATATCTTTCTTATGATTTTACCGCAATCATACAGGTTGTT +TCTTGTGGTCTGCTGGGCATTGTATCCCTTTGTTGTATGGATTTTTCCTTTCTTTAATGA +TCTCCTCCCTGGAGTTTGTAATCCCTGTTTGTAGTGGAATT +>Asian_seabass_Sat_Unk_Consensus_Length_789_bp +CAAAAAAATGGAAAAAAAAAAAGTGGCTCATTTGAAGTGAATCAGAGTTGGGTAACATTG +TCCTTAATTACAGCTATGCTTATATACTTGATCTGATAATAATACTCTAGCCACAATCTG +TGGGTATTCTGCTATTTGCAGTCCACCCAAAGTCATAGCATTTAATCCATTCTAGGAAGA +TACGTCAGTGCTGAGTTGAGCCAATAACACCAAATTGTCATAAGAACTATTATTTTCTTT +ATGATTTTTACGCATCAGTACAGGTGTTTTCTTGTGGTTTCTGCTGCATGTATCCCTGTT +GTAAGTGGATTCTCCTTTGCTTATGATCTCCTCCGCTGCGTTGTATCCTGTTTGTAGTGG +ATTTCCTTGCACTGATTCTGCCCGCTCTTAATACTGAATGAACGCCCTCCCACCAGTAGT +ACTGCCACCTTTGTTTTTTCACAAAGTGTTCAATGGCCTTGATAATGACTTGGTAACTAC +ATCACCACTTTTTTGTACGTAATCAACAGACAATCACCCATTAAATTCATGCGGCCATTA +GGCAGCTTGATGAGCTGGACATTTGACCTCCCCAATGAATCTTTGCTGATGGGAAACTTA +TTGCCACCATAAAGTGAACCAGCATTGCTATGTCCATGCTGTAATTTGAAAACCCAATAC +AAAGAAGCCCTGCATATATACCTCCCAAATTGGCTTCTGAGACCCGGGCGTAATCACTGC +ATGGCCATAGATAAACATTGAAATCTTCTGTTGCAGTCAGTTTGCTTTTTGTCAACTTTA +AAAACTGCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta Sun Apr 22 02:00:59 2018 -0400 @@ -0,0 +1,238 @@ +>scaffold_1 +TGCTTTTCTGAGATTTATGTCAGCTCCCCATTGGGAGATACAGGAGAATCCAATATATTC +CTGTCTCGCTTCTTTTATATTTATCCTTTCTACACCTGACATCCTCAGTGATTGAAGTGA +CAAAAAGTGGAGCGCACGCAGTGGTCATTACCCTCCAATGGTACTTCTAATTAAGGAAAG +ATTTTTTGCATTCACTGAGCAAAACACTTATTTGCATGAATGGAAAATCATAAATGAGGG +GTCCATTAAACAAATTTCCTAATTGTTTGCTTTTTTTTCTTCCTACTTCACCATCCCTCT +AAAGCTATTACTCTCTATTCTACCCCATTATCGAGTGCACAAACACCAGCCCCAAATTGT +CTTATCTCTGCTTTGATAAATGATATTTTTTTCTCTTATAATTGTGTTTCTCTCTAGATG +TGCGGTGTCCATCAAGACACTGGATGTCACGTGGTCACTCCTGTGAAGAGCGAACTGTGT +GGAACCCGAAGTACTGTGTGGTTGCTGACTGTCAGATGCTGCTTCTGAATGAGGAGGAGG +TGGTGAGAGCAGACAGCACAATCTCTTGTCTGGCATGTTCCTCTGTGTCACCATTGTCTC +TCTCATCTGCTCGTGTGTGTGTGTGTGTGTGTCCTTGTCAATGTCATTACACAATTTATG +TTGGTGCACTGCATGTCAGAGTCTGAACTTTGTTGGACATCTACTTTCTGTCACCTGATA +CCTCATACTTTACACTCTAATCCCTTTTTCCTTTCTAACTGCTGCTGTGTTAAGCTGTCA +TATTAATATACTAAATAATGATAATATTAATTCTAATAATGATAGTGAATGGAGATTCAC +AATGAAGAAACACAAGGTCCAGACTGAACCCCTGTGCTGTGTGAAGAGTTCACTCTACAT +TTACATTCATCCCACATCAGAGACCAGCAATAGCCAGGTTAAGCAGAGCAAGGCTGAGTC +TGGCTCTTTGCCTCTGTTGTTTTGATTAGTGTCACACTGGATTATGCTAGTTTGTGTGAG +ATTAGAGGTCCTGTTGTTACGAGCAAATCAGCAGTCAGCAGGGTAATCTAAATTACATTG +TTCTGTCGGCAATATCCTCTCCTCGCTCAACAGCTTTGTGTGAGCTCATATTTAAAGCCC +CCACAGTAGTGCAGTGCATAAAATCATGCAGATACAGATCAGAAGCTTCAGGTAATGTTT +ACATCAAACATCAGGGGGAAAATGTGATCTCAGTGCCTCTGACCGTGGTGTGATTGTTGG +TGCCGGACAGGCTGGTTTGAGTATTTCTAAAACTGCTGATCTCCTGGGATTTTCACACAC +GGCATTCTCTAGAGTTTACTCAGAACGGTGTGGAAAGCAAAAAAACATCCCGCGAGCTGC +AATCCTGCGGATGGAAACACCTTGTTGATGGGATATGTCAGAGGAATGGCCAGGCTGGAT +CAATCTGACAGAAAGGCGGTAACTCAGATAAGCACTCTCTACAACTGTGGTGAGCAGAAA +AGCGTTCTCAGAATGCACAACACGTTCAACCTTGAGGAAGATGTTGCTACAGCAGCAGAT +GAAGACCACGTCTGGTTCCACTCCTGTCAGCCAAGAACATCTCAGACTGCACAGGACACC +AAAACAAAAACACATCCTGTTTTCTACTGGTGGTAGAGTCACAATTTGGCAATAAGATAA +ATCCATGGACCCAACTTGCCTTGTGTCAATAGACCAAGCTGCTGGTGGTCTTGGGGAATG +TTTTCTTTACACTTTCACACTATCGGCCTATATAAAATCCTATTATAAACTGCTGTACAG +TGCATTGGGTTTGATAAGCCCTCTAAGTCTGTATATCATGATCTCATTGATACCTGCGGC +AATACACCCCCACAAACGCAGCTCCCTGCATTTTAAGTATTATAGAGAGTAATTTCACTG +TCCATGTCTTTTTTGATCATAAAGCAGGTCTAGGTGCTGTATACAACACTGTATTGAAAT +GCTGAAGCAGGTCTAGGTGCTGTATACAACACTGTATTGAAATGCTGAAGCCAATATATC +AATCTATCTGTCTATATTTTATGAAATATCAGAGCATTTTAGTCCAAATTTGTCTGTTTT +GTTCATTGCAATGCTGACTGCTTAACATTTTCCTCAGTGGTGGCCATCACCACAGCATTT +ACTGGTGAGTTTTACCAAGCTACAGTGGGCCACAATGAGTTACCTGTTATTGGCCTGGCT +CTATGGCATGCAGAAAACAGTCAGCCAATCAGAGGACAGACTCAGAGACAGACACAAAGT +GCCCTGTTCTTGTTAGAGCAGAGATAAATGAGACAGAGGAGATGTAAAACTATACTGAGA +TGGTTTTTTGGTTCTTAAAACCACAAATATATTGTATTACGGATATCAAAACTTCAAATA +AAACCAGTAAAGTGTAAAACATGGGACCTCTAAATGAAGGGCTGTTTTGTTTCGTAGTTC +TAGATTATGTCACTATGTGGCCTCGGTCTCTGTCTGCGTAAAGCGGCTACATGTAGAATT +TGACCCACTTTGGTGCCCACATATGGTAACTAAAACACTACAGACAGTATGCACTCCAAC +CCTATATAATTTATGTCCTTCAGTGAAGCCTACAAAATGTGCAGCATGATTTGCTATGAT +TAATACCTAGAATTATATTAAATTTTAGAAGTTTAGAGAGATGGCTTATACCTTTTTCTG +AGCATGGAGTCATCTCATGTAATTTGTCACATACCTTTGGCCAGCTTAAACCTGGTCATT +CCAGTGAGAGCTGCTCACCCAAATGTTTCTGTATGCACCTCCTGCTGTGGGTGCCAATAG +CTCATGCAAGTTGGTGTCTTTCAACTAAGGCCACTATGTGAGGACCATCAGCAGCAAATT +AAATTAGAAGTGCCTTGGAGTTTCTCAGGCCCTCTTCTATGATCATTAAGGTCTCAGTGA +CAGAACAGTGCCCCTTTGAGGGACTATGGCATCTTGAGCACTTAAGTTGGCCGAATGTTT +GATCTCAAATATGAGTGCACTGTTTCAAAGTCACAAAAGCCTCTGGACAAGGCATAGTTA +CTGGGCTTGGTTAACCTTTTTTTCATACCGGCGTGACTGCTGGAACTTGGAACATGTAGC +CTAAGTTTCCCTTTGTCTGCAATTACTGAGGTATGCCATGTTGAAATAGAACAATAATTG +CAGGGAGGAATTATTCCAATCGACCCATCGGCAGGCTTTCCAATACAGTGCTTTTTGTTT +CACAGCAACAAAGAACAAGAGCTTTTAAATGCATCCATCTGGACCCTGACTATGAAAGGT +CATTGGTAACGCTGCTGGATTTCTGTTATGTCAGCAAACTGCAGGTCAAAGCGATGATAC +AGTCTTATAATGGCTTTCTCCAAGTGAAGGTGGGATAACATGGTTTAAACAAGCTCAGTT +AAATGGATCAGAGCATCAACTGCTGTGAACAAGCAATCACACGGTCAAGGTCAAACATTT +ACACTTCTGAGAGATCTGGAGAGTATGTCATGGCAGTATTGCTTTTCAGTTATTTCAGTA +ACTGATTACGCCTTGTACTTAGTCTCAGAATTAGTGATTTGATTCAAAGTGTTTTATATA +TGTATATTTCAAAGCAAAAATACAGCTTAGGTAAAGCTCCTTAGACTCATGCAGCCTAAT +TTACTGTAGACATTCCTCCATGTACAGTACTGTGCAAATGTTTTAGGCAGTTTTAGGCAC +TAAAGGTGAACTGAGGATGCAGTCCCACGATTAATTTTTATTCATCAGTTAACCTCATGT +GAAGTGTAGTAAACAGAAAAAACCTAAATCAGATCAGTATTTCTTATGACCCCCCTCCCC +TACCACCCATCTTTAAAACAGCTCCAGTTCTCCTCAATCCCCCTGTACTTAGGTAGTTTT +TTTCTAGCGTCTTGGAGAAGTTGCCTCAGTTCTTCTGTGGATTTTAGGCTGTCTCTGTGT +CTTCTGTCTCCTCATGTATTCTCAGACTGACTCCATGATGTTGAGATCAGGGCTCTGTGG +TGGGGGGGTCAGACCATCTGTTGCGGGACTCCTTGTTCTTCTTGTCTCTGAGGATAGTTC +TTTATGATGCTGACTGTGTGATTGGACTCATTGTCCTGCTGCAGAATTAATCTGGGACTG +ATCAGACATCTCCCTGATTGTACTGACAAAGGATAAGAGCAAAAACCTGTAGACTGCCTA +AAACTTCTGCACAGTACTGCACAATACTGTATGTATGTGTGATAATACTCAGAAGTGATA +TAGAGTGGTTCAGGAGAAGCCTTAACTTACTTGTTCTTCTTTTCCCTATGATGCAATTTC +TCTCCAAGCAAACAGTACATAATGAAATATGTCTAAGAAAACACTAGCTGTTTGTCTTTT +TGAAACACATTTAATAATTCAGATTTAGCCACAAATTAAAAATGAGCTTATACTTTGAAC +TTGCCCATTGGTTTTGATGTATGAAGCTGACAGATTTAGTCCACTGCATACTGCACTTTT +TTGGATAGTGAAATAAATTAATTTATTATGCAGCATTTAAGCATATTGCTGTTATCTAGA +ATTTTATATATTAATAGGTATATATTATATTAACTCTTCATTTGCTTAAATTTGGCCTGT +ATATTCCTCCATTTTATACAACCTTTAGAAAACACTGGAGTGAACAAAAATGTGAGGTTC +AAAAGTGAGAGGAAAGAAAAATCAGTGTGTGCAGCAAGACTGTAAGAGTCCCTGAACAAA +AGTGTTGATTTATTGTTGGCTGTGAAAGTTTTTGCTGCAGGGGTTTTTGTAAACAGTGAC +TCTTCTTTATGTCTTTGTTTCTATCTTTTTGTGTTTTCTTTTTTCATCTTATTCTATTTT +TCTCTCTCTCTCTTCGGTCAGTTTCTCTTCTTCTCCTCCTCATACTCATGCTTCTCTTTT +ACTCTAATGGAAAACATAGAATTGAGGTGCAGTATCACTGAGAACAGCCAATGCTGATAC +TGTCATTTGTTATAGGATGTTTACTTAAGAAATGATTGTGGCCTTATTTGCCAAAGACGA +TTAGGCCCTGAGCCATTTGTACAGCCAATGTGACACCTCCGTAACCATAAGAAAAATTAT +GCTTCCACAAACAGAAAGAGAGACAAATACAGACAGTGAAAGCAAAACTATGAGAGAAGG +GATAGCCATCTCAGCATTCAGCCAGTGTGTCACATGTCCAGAGGGCTGCAATATGAAGCC +AATAAACTGAGAGAAAGGTGTGATTCCAGAGGGCTTTCCGGACTTTAAACTTTTCTCATT +TTCACCTTCAGCTCGTTGTGAAGCCTTCAATTGTTCCTTGGTCACATGAAGCCCCCTGTG +TGTAAAATTGACATTCAGCTTATTACTCTCATCTACATCTCTAAAGTTGCACATGACTGC +CAGTAGAGGGTGCTCATGATTTGAGACAAGTAACTTTTCTGTGCTTGTGTCAACAGTTAA +AAATTCCCTATGGATACTTGAAAGAAATGCACCAGTGTTTAAATTTAGACTCAGTCCAAG +TAGGCTTTGCCATTTGCTTTACTGTTGTAATCAAATCAGTGCTCACAGTACATCAGTGGC +AAAAGCAATTAGCTTAATTGAGTTAGTAGGGAGTGAAAAGATAGATCACTATTTCCTGGC +AGCGATATATTTTTCTCAGTTAACCTTCTCACAGAAAATTAGTTAGATGAGTTTCTTATC +TCATTAGAACCATTGTTGTTTATTATTTAAACCTGACCTTTATCCAGAGAATGTTTTTCA +CAGTAAGCATGTGTTTCACTCCAGCTCATACAGACACACCTTGTGGCCAGTCAGGGAATG +TGCATTAAAACTAATGTTCTCTCTGGTAGAGCCATTCTGCCTATTCTGTTATTCACCAAA +ACTTTCCTGCCCAGATTTTTCCATCTAGTCAGGGGACTTTTTCCTCAGCTGAACTGTCAG +TAGCCAGCTGCTCTGACCTTTACTCACCTCAGAGATTACTCATTCAAGTTATTTTACCAA +TGATGGAATAAAGTGAATACACACATCGTGACTTCATCATACTTTCCCTCCTTTACCGTA +CTTTCCTAACATCAGTTAGCAAATTATACACTTAAATATGCTAAAATAGAGATTTGTTTC +AAATTGAAGAGCAACACAGGACACATTTGCTTACACAAGTGACTCTATCTTATTTTTTAA +GTGGCTGTCATCCTCAATATCATCAATATTTTAGGCATGTGAATGTAGGTAATGAGTATT +ACTAATTAATAATTGGATGAGTTTATGAATTCATCATTAAATAAGATTATTATGATAAAG +CTACACATTATACTGCAATTCCGGGCCAACACTATTGTTTCGCCCTTACAGTACAGGACT +GTACATATCACTGTCACTACAAATTGTTTCAGTGAGGCTCAGATTACAGCTCATCTTGCT +TAGTCATTACGGCTCAGACCACCAGACAATGTCTTTACGTCAGAGAAAGTCTGAGTGAGA +GCTGTCCTTAAAACCAAGTCCCATGAGGCAAGTAAAGAGGTGACTCGCTGAGAGGATCAG +GGAGAGGGACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAAGAC +AGAGAGGGTGCTGCATTGAGTCGAGAGGTGTGAGGATGTTCGATAAGCCATTATCACTCT +GCTCGGAGGACGTCAGGCCCTATCCTTTTTTCTTTGAGTCACCACTTTCTCACCACAGCC +ATCCTTCTTCCTTTGTCTGTTTAATCTTTTGCAGCCAAACATTTGGTATGCAAACTATAG +GAAGATATGGTCTGACACCGACTGATGTGATGCTAATGCCTTCTCTGTTTTATCCAATGA +CTCCTCATTTATCAGGACCAATCCAGCTATTGTGTGCTATTGGCCCAACACAGCATTAAA +CATGGTGACCAGAATAGCCACAGCAAAATAAATAACGGGCCTCTGTGACTATTGTATGTG +CATTTCTTACAGCTGTCATTTTTCTTTTCTGTTAGCTTTCATTCTACTGTGCGTGCTCTC +TATCTGCCTACGTTGTATCTGTGCCTTTTGTTAAATTCATTCAGCCTTTTATCTGTCTTG +AATTTCTTTCTCTGTGTCCTCCTTTCCCTCTCTGTCTCAGCAGATCCGTCCCTTGTTTTT +GCAGGAGAAGCGCTCTGCGTCATGCAAGGTCCATCTTCTGCGACGCACCATCAGCGTTCC +AGTGGAAACACAGTTTCCAGAACTCCATAGCCAGCTGTCCACTGAGAGCGGTGAGTAGCC +TGAGGACATGCCTCTGCTGTGATTAGCATGCATCATTATTTCCATACACACACTGGAAAT +CTTTTAATCGTTTTTCTTGACACAATTTAATTAATTTAAATAATTAATAACATTGAAATC +AACTGTTAAACACTTGTTCAACATGGTTGTGGACAAAATCATCATAATGACTGTGGAGAC +TAAGGAGAAACAATGCTTTGTTAAAATCTTTGCTCTGGTTTTGGCTTTTTCTATATTCTA +TTTGTGTGTGCAGTGCTGCTAAACATCATCTTAGTCAGAGTCAAACTTGAGCACTCATGC +AGATGCTTGTTCTATTCAGTGGTCAAATGGGCAGTGTAGCTTAGGAGAAGCACTTCTTTC +CGCTCCTCAGTGACATAACCAGTCACACACTCACACAGGCAAGTGAGCCTTTTAAAATTG +CTGATACTATTTTTTTCTACTTCATATCCACTAGATGGAGAGCTTGGTGCTTTGTAGCAC +TCCAGTGTACTTGGAATGTTTCATCATAACCACTAAGGCAGACATTGATAAAGGCTTCGG +TCTCTTTCAAACTGCAGGTGTCATGTTGCACTCTCCCTATTGCTTTTATGAAAATGTAGT +GCCGGTAATATAACACAGTTACTCTCTGCATATATCACTATGTACACTTACTTCTATGGA +AAAGATGGAGCGCCACAGTGAAAACTGTTTTGAGTCTGTGAGGGGAAAACACAGCATCAG +TCACAGTGAAACACTAGGTGGCACTCAGGTTTGACATTCAAGCATTTGTATCCCACAGTT +ACTGTTGCTGGGTTGTTGGCTGGCATGCAACTTAATATGATCTATCTTTAAATCAGTGTG +TGCAGTGGTTATTTAGTTTAAGTGCTTTTTAATGATGTCACAACATTATTTTGTCTACAC +CCTGCATACAGCACAGTATATTAAATTTAGGTTTTATTAAGTTAAGTAATGTTCTGAGGT +GGCATTGCCCTCAGGTATATATCCCTCAGGCAGTGTTACTGGACAGCATATAGATTGTAA +TGTTGTGTAAGCAGTGTTGTGTAAGCTTTTTTAACCAAAATGCTCTCATGTTTCTTTGTT +ACCACAGTGGTTTTAGTGATGTTTTGTGCTGTGAACAGAATCATGATTTCTGCAGACACT +CCACATCTGTTTAAACAGTGATTTTCACATCTGTCTGTAACTGAGTCAGTCAGTTTGTGG +TTGGATTGATGTGTAGTGGGATCTACAGATCATCAACCCTCGGGGAAGCCATTTAGTTCT +GTGTTAAATAAAAATACAACTTTTGAGCACTGTTTTTTCATATTTTTCTTCATCCCTTAG +TTGATATTAAAGGTGCTATATGTAGATTTTTGCTATCACTACATAGCCAATGTTAGGATT +AAGAACCCCAGCAACCCGAATGTTAACTCGTACTTTGCTTCTATTTCTATTACATATTTT +CTTCTATGGAAGTTAGGATGTTAACCAGCTAACCCCAGGTCATCTCATAATACCACTTGG +CAATAGTCAGTCACTGTTGCATTCAGTCTGCCCCTCCAGCATGAAAGGATGAAGAAGTAC +CGCTACCCAGAAGGCGTAGTCTAACCCCTTGTCTTGTAAACACAGCAATGGCTGAAGCTC +TTGGTAAGTAAACAGCTGTTAATGTCAGTGTTGGCTACATAGCTAAAACTTACACATAGC +ACCGTTAAAAAAATAAATGGTAAGTTTTTGTCCTTTTAGTTAAAGCATGTAAATGTCAGT +TAAAACAATTTAGAAAATTTCTCATTCAAATAGTCATTTTATTTTGTGTGCTATAAATAC +CTTCAGTTTTTTTTCTTGTACTTTATTTTAAACTCTTGTCTTTCTCTTATGCCCCTTATC +TACCAGGAGAGGTCACACTCTTGTTGATAGCATGATAAGGAAATGAGCAGAGATGTCCTT +ATTTCTCCTCAAAGTTATTCCCTGCATAGCACATATTTGACTTTAAAAATACAAAGACTA +CAGGTGCTAAATCACACACACAACACCCAAAAGAATAGAGGACTTTTAAATATGTGCATC +CCATTTTCTAATCTATATTCACCACTGATACAAGTGATGACACGGTGATGAGAAAATAAC +GTAACATACATTATTATACAAGATTGGTATGATTGCTGTAGGTTGGTTTTGTTTATCTGA +GAAGGGACAGAGGCTAAAAGAATAAACAAAGTCTGCCAGCTACTTCACTAATTACAGAGT +ACCCCTGCAACTCTGCCAATGACATTCTGAAACTTTTCCATGACTATTATGTAAGATAAT +TTTTGAATCACTGATTCAGATTTCCATAGCCAAGATACACTATTCTGCATGTTGGCTAGA +ATACATTACAATACATTAATATATTACTGTAGTGGTTATTATGTCAGCCATTTAAACATA +ATTTCATACTTTCAGCTGTTCGCAGGGCAAACTTTTTATTTTTGTCACCTGAATCCAGTT +GCATTTGGATTTGAATTCAGTGGTGACTGCCAATGAAAAATAACTCACAGGGGCACTTAG +AGATCTGAGGCGGCATTGACTGAACATCGAAGGGCTACTATTACAGAAATGCAAGTGGAA +AAAGAATCTGATATGATACTTGCCAGTATGAAAAGCATAATTAGGACTTTAAATGCATGT +TTTAAATGTTTTTGTGAACCATAATTTCAGTGTAAAAGTTCCAGTTATATTTTAAAAGGA +ACTCCAAATATCCAGTGGTAGCATCAAGGTAACCTGAGATAAGTGAAAAGTAATTCTAAA +TTAATCAGTATGTTGAAATATCAGACATTTGTGTGCATTATTGTTAGTGTGACCATGGTG +GGATACTTCAAACACACTTGTTCATTGGCAAAATAATTGTCACATGAATTAACACAAGTG +ACACTTTCAGTATCATACTGACAGAATGTTTGTTGTTAACTTTATCCCTGAAAACAAACA +CTAATACATGTTCTTGTCGCCCCTTCAGTGCCTAACCTTTAAAATTGTGCTTGTGAGTGT +GTGTATGTGAGCGCACCTGTATGGTTTTCAAATAATGTTCCACATCACAACACTACAAAC +ACTCACATGGTCTATGTTTTACTAAATTATTCATTGGCGTAGGCCCCACCTGCTCTTGTC +CTTGTGTGACACAGGCTCTAAATAAGCAGCATGATGAATAAAAATGACACTGAGATGAAT +AGGAATCCACAGCAAATCAGAGAGCACTCTCATCTCGTCTCATCCTCCGACTGAATAGCG +AGCGGCTGCCTCCTTTATTCTTTTGAACTCTTCGCGGTTTTGGCACAAACATGCCAGAGA +CAGAGAGCGCTGTTACGCTCCCACCAGAGCAACTACGCTTAATTTTACTGAGGTGAAAGT +TTTTTTTTTTTTTCTGGATAAAGCTTTGGAAAAGTTCTCAACTGTTGCTTCTTTAAAGAT +GCTGGGGCATATTCTGCCCTATTTTCCAGTTTTTTACATCTCCCATAAATATTTTCACAA +GCTGTAACATTTTAGACAGGATTAAATTGAACTAGATAACTGACCAAAGGCTGCTAAATT +ATTTTTATGTGCCTTGTAGCACAAATACTGTCTCACATATATTTTAAGTATCTTAATATT +CCCTCTGATGTTTAATTTAACAAAGCTGTCCTTCCCTCCATCTCTCTCCTCATAGTTTTT +CATTACAATTAGCTCTCATTAGATAGAATTGTATTTGTTGTCTTTGTGCCAGTCACTCCA +GTCTATTTTGCCAGCACACAACTAAATAATTGTTTTGCCTTGTCAGTGTCTTGTTTTACA +GTTTTAATGATGAGCCCAAAACATGTCAAATATGAGGAACTATAACTTATCAAGAAAGTG +GGGTAGTTGAGATAAAACTGTTTCCGAGGTGGAGGTTGGCTGAGCTCCATTTGGCCACAA +AATGTAGCTGAAAGGGCAGAGAAACCCACTTTAATGGAGTACAGGTTGCATATGAGCTGG +TAGGAAAGATATAGAATCATTATCAGCTGATAATCTAACAGTAGCAGTCAGTGTAGATGC +TGTGCTAACGCAAAGTTGTGAAACTACTGTCTGTAAGCAACTCATCCAATTGTTATTGCT +GCCAAGCTTAAGTATTTTAACAGCTTTTAGGTGTTGTTAGCACAGTACTCGTTTGGATCC +GGCCATGACAAGAAATCTTTATCGGCCACTAATTTAATTAATCATCTTTTTTCAAGCAAA +AATCACAAACTTTCTTTGGTTCCACTTTTTCTCATTGTAGATTAAATATCTTTTGGGTTT +TGGCACAGGCTGGACAAAAAAACCTCTGAGACGCTGTGATAAGAATTTATTTTCACATTT +TTTTTACTTTTCAGGGACTACACAATTATTTGGTGATGAAAATAATATTTTGTTGCAGCC +CAAATGTTAACTTGCCACTCAAGCTGTCAATTCAAGAAGGCAAAAGCAACAAATACTGCT +TTGCCGAGCTGAAATGAGAGTAGAAACCATGCCCGGGCCAGCTCAGCCTGGTCAGGTTTT +TGAGTCCATCTATTGTTAACATTCAGGTTGCAGCACAAGGGAAGTTCCTAAATTCTTCTG +GCATGTTAATGTTTTCCAAACTGATGTTACCAAGTCCTCGTTATGATGAGAAAAAAATAT +GCTGAGAGTGAAATTGATCAAAGTGGAAACATTAAGCACTGCCACAGTCCCCTCCACCCA +TCTTTCTCCCTCTCTCTCCACCGCAACGTGGAAACTGCTCCTCTCTCTCTCTCCCTCTCC +CTCTCCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCCTCCCTCCCA +TTCTGTGCTCCGGTATACTCGCTCTCTCACTCTCTCCCTCTCTCCCCACTCTCCAGAGAG +GGGTTCAGTCAGACAGATGTAACACAGCAGTAGAAGCCTGAGCTGAGCTGGCAGGCTGCG +GAGGCCAGACCAGAGCCAGCAGCTCTGACTGGAGAGAACAGCGAAGAGGTGACGGCAGCA +GCGGCAGCACAGGGGATAAAACAGTAGTTGTAGTAGCAGTTTCAGTAGTTGTACTTCAGT +TACAGAGCTGACTTTACCTCAGGATATGGGTGTGTTGATCTGGGCGCTCATCACCCTGTG +AATTTAACATGGACCCTGACACCAGCACCCATCCAGAGACACAACATGGTGAGTGAGATT +TGAAGGAGGAAAAGATTAGAAACAATGAGAGTGATACTGTGGGAAAAGTTGAGGAGCGTG +TTAGTGAACAAGGGAGTCAGGTCACATGGTTTTCCCGTTAGGGTTGCTATTATGACTAGG +CTCTCCATTTGGCTCTAAAGCTGTTGACTAGGGCCTCTGTGGTCAAATGAATAGGACTTC +AGTCTGTTCTGTTGAGTAGGGGGTGAAGGGGTGAGGGGTGTTACATGGCAATGTGATGAA +ATCAACTGCCTGCTTAAGACATTTTGCTCTAATGACCCTCTTGTCATTACTTTAACAGGG +TTGTAAAGTTTTTTTTGCTTTTTTGTAGCTAAGAAAGTTGAGAATTGTTTTGTCTGAACT +CTCTCTGGGATTTGTCTTGTCGGTTTTTGGCTGGTTTTTGGTGTGAGCTTCACAGAGCAG +GAAGCTCTCCTCCTGCTCTTTGAATGGAATAATTAACTGTGAGGGCCGTGGGTGTACCAT +TTCTGCTCAAAAGCAGCAAGATAATTTGATGGTTATGTGAATCTGTATATTTGTATGCTG +TGTAGTGGTGCTGCATGTGGGTGTTTGTGTTAGAGACACAGAGGGGCACCAAAGAATGAA +AGTGGGAGACAGAGGGAACGAGAAGGGGAGAGACCGAGAGAGAAGGACTTATGTACACAA +ATAAATCCAGGGGGATCTAGACTGCACTGTAGGCCAGTGCGAATGCTCACTCTTTCTCTT +CCCCCTCATTTTCCGCTGCCTCTTTTTTTTCCTCCACTGCTCCAAATGTTGGAGCCCAAA +CTAGATAGTTAAAAGGCAGAAAAAAATGGTGTGTTATTAACTGGGCCAGAAAAGACGATG +TTTTGTCTTCATGGCCGATGGGAAGGACTCTGCTGTGCTGCTGATGAAGAATGTGGTTAA +CAGAATGAGTCAACAGAGAACATATTTCCAAGAGACTGTAGTTTCTCTTCGCCAGCGTCT +AAGCCACATTGCTTTATGCACTGGGCTCTCTGCCATGACGGCGAAGGAGAGACAAAGAGA +GGGGAAAGAGAATCGCTAAAGATAAAACTCTGTTTTTATCCATCTCCTGAGACACAAAAC +TTCTGTTAGCAATCTAACCAAATAAGCGAGAACATTAAGCTTCTTCTCACCTTCCCAAAA +TAAAATGTCAACTTCATGCAGTAATACTCACTCTCTTCTCTCAACCAGTCAGCCAGCCTC +TTTCAGTGCATACAGCTTGAGGAGATCCCTTCTAAAGGTCCAATATAAATAGAAAAGTGG +GAGTAGAAAGGGCAATAATCTGATATCATCTGATTACATTCACACCTCAGGCTTGCACGC +TACAGGAAGAGTTCACGCTTCCCAAGGCTTGCAGGCATCACTTTTCACTCATTCTCAGAT +AGACACACAAACACATGCACAGAAATATCATTGCTGTTGTTTGCAACAAATCCTGGAGTA +GACTACTTTGCATTGAATTTCTATATGCATGTGTTTTCATATATTACACATTGCTTTTTG +TACTTTGCATATAAAGTAGATGCTGATCTGCTATCTGCATATATAGTAGCTGCAGATAGC +ACTGTAACTACATCTACATATTTCTTGTTTGTATCTACATCAAGCAGATAGTTGCTGATG +CTGGACAACCTCTTCCGATATTCATTATTTTGGAAAAAAAAATATCAATGGCATTCCTAC +CTTACAACTTAAATTAAAAGTGGTCACTTGAGTAACTGCAGAACATGTGTTCACCCAATA +GTTTTTTTTAGAGAGTGTCACTCAAAGTATTCCTCCTTACAAGCACCCATGCCTAAATCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.align Sun Apr 22 02:00:59 2018 -0400 @@ -0,0 +1,104 @@ +18 0.00 0.00 0.00 scaffold_1 613 632 (13519) (GT)n#Simple_repeat 1 20 (0) m_b1s252i0 1 + + scaffold_1 613 GTGTGTGTGTGTGTGTGTGT 632 + + (GT)n#Simple_ 1 GTGTGTGTGTGTGTGTGTGT 20 + +Matrix = Unknown +Transitions / transversions = 1.00 (0/0) +Gap_init rate = 0.00 (0 / 19), avg. gap size = 0.0 (0 / 0) + + +16 18.30 2.22 2.22 scaffold_1 780 824 (13327) (ATAATA)n#Simple_repeat 1 45 (0) m_b1s252i1 2 + + scaffold_1 780 ATATTAAT-ATACTAAATAATGATAATATTAATTCTAATAATGATA 824 + v - v - i v vv i + (ATAATA)n#Sim 1 ATAATAATAATAAT-AATAATAATAATAATAATAATAATAATAATA 45 + +Matrix = Unknown +Transitions / transversions = 0.40 (2/5) +Gap_init rate = 0.05 (2 / 44), avg. gap size = 1.00 (2 / 2) + + +12 23.88 4.55 0.00 scaffold_1 2231 2274 (11877) (CAGA)n#Simple_repeat 1 46 (0) m_b1s252i2 3 + + scaffold_1 2231 CAGA-AAACAGTCAGCCAATCAGA-GGACAGACTCAGAGACAGACA 2274 + - i v v iv -i vv v + (CAGA)n#Simpl 1 CAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACA 46 + +Matrix = Unknown +Transitions / transversions = 0.50 (3/6) +Gap_init rate = 0.05 (2 / 43), avg. gap size = 1.00 (2 / 2) + + +15 18.42 10.20 0.00 scaffold_1 4853 4901 (9250) (TC)n#Simple_repeat 1 54 (0) m_b1s252i3 4 + + scaffold_1 4853 TCTATTTTTCTCTCTCTCTCT-TCGGTCAGTTTCTCT-TCT-TCTC-CTC 4898 + v i i - vv vv i - - - + (TC)n#Simple_ 1 TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 50 + + scaffold_1 4899 -CTC 4901 + - + (TC)n#Simple_ 51 TCTC 54 + +Matrix = Unknown +Transitions / transversions = 0.60 (3/5) +Gap_init rate = 0.10 (5 / 48), avg. gap size = 1.00 (5 / 5) + + +13 19.07 1.82 7.69 scaffold_1 6230 6284 (7867) (TAATTAA)n#Simple_repeat 1 52 (0) m_b1s252i4 5 + + scaffold_1 6230 TAATGAGT-ATTACTAATTAATAATTGGATGAGTTTATGAATTCATCATT 6278 + v i - v -i - i v - v v + (TAATTAA)n#Si 1 TAATTAATAATTAATAATTAATAATT-AAT-AATTAAT-AATTAATAATT 47 + + scaffold_1 6279 AAATAA 6284 + - + (TAATTAA)n#Si 48 -AATAA 52 + +Matrix = Unknown +Transitions / transversions = 0.60 (3/5) +Gap_init rate = 0.09 (5 / 54), avg. gap size = 1.00 (5 / 5) + + +15 28.31 0.00 3.51 scaffold_1 6548 6606 (7545) (GACA)n#Simple_repeat 1 57 (0) m_b1s252i5 6 + + scaffold_1 6548 GACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAA 6597 + v i i viv i vi v -v i - + (GACA)n#Simpl 1 GACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGA-CAGAC-A 48 + + scaffold_1 6598 GACAGAGAG 6606 + v + (GACA)n#Simpl 49 GACAGACAG 57 + +Matrix = Unknown +Transitions / transversions = 0.86 (6/7) +Gap_init rate = 0.03 (2 / 58), avg. gap size = 1.00 (2 / 2) + + +67 2.94 1.43 0.00 scaffold_1 11981 12050 (2170) (CT)n#Simple_repeat 1 71 (0) c_b1s251i0 7 + + scaffold_1 11981 CTCTCTCTCTCTCCCTCTCCCTCTC-CTCTCTCTCTCTCTCTCTCTCTCT 12029 + i i - + (CT)n#Simple_ 1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT 50 + + scaffold_1 12030 CTCTCTCTCTCTCTCTCTCTC 12050 + + (CT)n#Simple_ 51 CTCTCTCTCTCTCTCTCTCTC 71 + +Matrix = Unknown +Transitions / transversions = 1.00 (2/0) +Gap_init rate = 0.01 (1 / 69), avg. gap size = 1.00 (1 / 1) + + +19 15.37 2.78 0.00 scaffold_1 12078 12113 (2107) (CT)n#Simple_repeat 1 37 (0) m_b1s252i6 8 + + scaffold_1 12078 CTCGCTCTCTCACTCTCTCCCTCTCTC-CCCACTCTC 12113 + v v i - i v + (CT)n#Simple_ 1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 37 + +Matrix = Unknown +Transitions / transversions = 0.67 (2/3) +Gap_init rate = 0.03 (1 / 35), avg. gap size = 1.00 (1 / 1) + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.cat Sun Apr 22 02:00:59 2018 -0400 @@ -0,0 +1,103 @@ +18 0.00 0.00 0.00 scaffold_1 613 632 (13519) (GT)n#Simple_repeat 1 20 (0) m_b1s252i0 + + scaffold_1 613 GTGTGTGTGTGTGTGTGTGT 632 + + (GT)n#Simple_ 1 GTGTGTGTGTGTGTGTGTGT 20 + +Matrix = Unknown +Transitions / transversions = 1.00 (0/0) +Gap_init rate = 0.00 (0 / 19), avg. gap size = 0.0 (0 / 0) + +16 18.30 2.22 2.22 scaffold_1 780 824 (13327) (ATAATA)n#Simple_repeat 1 45 (0) m_b1s252i1 + + scaffold_1 780 ATATTAAT-ATACTAAATAATGATAATATTAATTCTAATAATGATA 824 + v - v - i v vv i + (ATAATA)n#Sim 1 ATAATAATAATAAT-AATAATAATAATAATAATAATAATAATAATA 45 + +Matrix = Unknown +Transitions / transversions = 0.40 (2/5) +Gap_init rate = 0.05 (2 / 44), avg. gap size = 1.00 (2 / 2) + +12 23.88 4.55 0.00 scaffold_1 2231 2274 (11877) (CAGA)n#Simple_repeat 1 46 (0) m_b1s252i2 + + scaffold_1 2231 CAGA-AAACAGTCAGCCAATCAGA-GGACAGACTCAGAGACAGACA 2274 + - i v v iv -i vv v + (CAGA)n#Simpl 1 CAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACA 46 + +Matrix = Unknown +Transitions / transversions = 0.50 (3/6) +Gap_init rate = 0.05 (2 / 43), avg. gap size = 1.00 (2 / 2) + +15 18.42 10.20 0.00 scaffold_1 4853 4901 (9250) (TC)n#Simple_repeat 1 54 (0) m_b1s252i3 + + scaffold_1 4853 TCTATTTTTCTCTCTCTCTCT-TCGGTCAGTTTCTCT-TCT-TCTC-CTC 4898 + v i i - vv vv i - - - + (TC)n#Simple_ 1 TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 50 + + scaffold_1 4899 -CTC 4901 + - + (TC)n#Simple_ 51 TCTC 54 + +Matrix = Unknown +Transitions / transversions = 0.60 (3/5) +Gap_init rate = 0.10 (5 / 48), avg. gap size = 1.00 (5 / 5) + +13 19.07 1.82 7.69 scaffold_1 6230 6284 (7867) (TAATTAA)n#Simple_repeat 1 52 (0) m_b1s252i4 + + scaffold_1 6230 TAATGAGT-ATTACTAATTAATAATTGGATGAGTTTATGAATTCATCATT 6278 + v i - v -i - i v - v v + (TAATTAA)n#Si 1 TAATTAATAATTAATAATTAATAATT-AAT-AATTAAT-AATTAATAATT 47 + + scaffold_1 6279 AAATAA 6284 + - + (TAATTAA)n#Si 48 -AATAA 52 + +Matrix = Unknown +Transitions / transversions = 0.60 (3/5) +Gap_init rate = 0.09 (5 / 54), avg. gap size = 1.00 (5 / 5) + +15 28.31 0.00 3.51 scaffold_1 6548 6606 (7545) (GACA)n#Simple_repeat 1 57 (0) m_b1s252i5 + + scaffold_1 6548 GACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAA 6597 + v i i viv i vi v -v i - + (GACA)n#Simpl 1 GACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGA-CAGAC-A 48 + + scaffold_1 6598 GACAGAGAG 6606 + v + (GACA)n#Simpl 49 GACAGACAG 57 + +Matrix = Unknown +Transitions / transversions = 0.86 (6/7) +Gap_init rate = 0.03 (2 / 58), avg. gap size = 1.00 (2 / 2) + +67 2.94 1.43 0.00 scaffold_1 11981 12050 (2170) (CT)n#Simple_repeat 1 71 (0) c_b1s251i0 + + scaffold_1 11981 CTCTCTCTCTCTCCCTCTCCCTCTC-CTCTCTCTCTCTCTCTCTCTCTCT 12029 + i i - + (CT)n#Simple_ 1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT 50 + + scaffold_1 12030 CTCTCTCTCTCTCTCTCTCTC 12050 + + (CT)n#Simple_ 51 CTCTCTCTCTCTCTCTCTCTC 71 + +Matrix = Unknown +Transitions / transversions = 1.00 (2/0) +Gap_init rate = 0.01 (1 / 69), avg. gap size = 1.00 (1 / 1) + +19 15.37 2.78 0.00 scaffold_1 12078 12113 (2107) (CT)n#Simple_repeat 1 37 (0) m_b1s252i6 + + scaffold_1 12078 CTCGCTCTCTCACTCTCTCCCTCTCTC-CCCACTCTC 12113 + v v i - i v + (CT)n#Simple_ 1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 37 + +Matrix = Unknown +Transitions / transversions = 0.67 (2/3) +Gap_init rate = 0.03 (1 / 35), avg. gap size = 1.00 (1 / 1) + +## Total Sequences: 1 +## Total Length: 14220 +## Total NonMask ( excluding >20bp runs of N/X bases ): 14220 +## Total NonSub ( excluding all non ACGT bases ):14220 +RepeatMasker version open-4.0.7 , default mode +run with rmblastn version 2.2.27+ +RepeatMasker Combined Database: Dfam_Consensus-20170127
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.gff Sun Apr 22 02:00:59 2018 -0400 @@ -0,0 +1,11 @@ +##gff-version 2 +##date 2018-04-21 +##sequence-region dataset_12.dat +scaffold_1 RepeatMasker similarity 613 632 0.0 + . Target "Motif:(GT)n" 1 20 +scaffold_1 RepeatMasker similarity 780 824 18.3 + . Target "Motif:(ATAATA)n" 1 45 +scaffold_1 RepeatMasker similarity 2231 2274 23.9 + . Target "Motif:(CAGA)n" 1 46 +scaffold_1 RepeatMasker similarity 4853 4901 18.4 + . Target "Motif:(TC)n" 1 54 +scaffold_1 RepeatMasker similarity 6230 6284 19.1 + . Target "Motif:(TAATTAA)n" 1 52 +scaffold_1 RepeatMasker similarity 6548 6606 28.3 + . Target "Motif:(GACA)n" 1 57 +scaffold_1 RepeatMasker similarity 11981 12050 2.9 + . Target "Motif:(CT)n" 1 71 +scaffold_1 RepeatMasker similarity 12078 12113 15.4 + . Target "Motif:(CT)n" 1 37
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.log Sun Apr 22 02:00:59 2018 -0400 @@ -0,0 +1,10 @@ +SW score % div. % del. % ins. query sequence pos in query: begin end (left) repeat class/family pos in repeat: begin end (left) ID + +18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1 +16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2 +12 23.9 4.5 0.0 scaffold_1 2231 2274 (11946) (CAGA)n Simple_repeat 1 46 (0) 3 +15 18.4 10.2 0.0 scaffold_1 4853 4901 (9319) (TC)n Simple_repeat 1 54 (0) 4 +13 19.1 1.8 7.7 scaffold_1 6230 6284 (7936) (TAATTAA)n Simple_repeat 1 52 (0) 5 +15 28.3 0.0 3.5 scaffold_1 6548 6606 (7614) (GACA)n Simple_repeat 1 57 (0) 6 +67 2.9 1.4 0.0 scaffold_1 11981 12050 (2170) (CT)n Simple_repeat 1 71 (0) 7 +19 15.4 2.8 0.0 scaffold_1 12078 12113 (2107) (CT)n Simple_repeat 1 37 (0) 8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.masked Sun Apr 22 02:00:59 2018 -0400 @@ -0,0 +1,286 @@ +>scaffold_1 +TGCTTTTCTGAGATTTATGTCAGCTCCCCATTGGGAGATACAGGAGAATC +CAATATATTCCTGTCTCGCTTCTTTTATATTTATCCTTTCTACACCTGAC +ATCCTCAGTGATTGAAGTGACAAAAAGTGGAGCGCACGCAGTGGTCATTA +CCCTCCAATGGTACTTCTAATTAAGGAAAGATTTTTTGCATTCACTGAGC +AAAACACTTATTTGCATGAATGGAAAATCATAAATGAGGGGTCCATTAAA +CAAATTTCCTAATTGTTTGCTTTTTTTTCTTCCTACTTCACCATCCCTCT +AAAGCTATTACTCTCTATTCTACCCCATTATCGAGTGCACAAACACCAGC +CCCAAATTGTCTTATCTCTGCTTTGATAAATGATATTTTTTTCTCTTATA +ATTGTGTTTCTCTCTAGATGTGCGGTGTCCATCAAGACACTGGATGTCAC +GTGGTCACTCCTGTGAAGAGCGAACTGTGTGGAACCCGAAGTACTGTGTG +GTTGCTGACTGTCAGATGCTGCTTCTGAATGAGGAGGAGGTGGTGAGAGC +AGACAGCACAATCTCTTGTCTGGCATGTTCCTCTGTGTCACCATTGTCTC +TCTCATCTGCTCNNNNNNNNNNNNNNNNNNNNCCTTGTCAATGTCATTAC +ACAATTTATGTTGGTGCACTGCATGTCAGAGTCTGAACTTTGTTGGACAT +CTACTTTCTGTCACCTGATACCTCATACTTTACACTCTAATCCCTTTTTC +CTTTCTAACTGCTGCTGTGTTAAGCTGTCNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNGTGAATGGAGATTCACAATGAAGAAA +CACAAGGTCCAGACTGAACCCCTGTGCTGTGTGAAGAGTTCACTCTACAT +TTACATTCATCCCACATCAGAGACCAGCAATAGCCAGGTTAAGCAGAGCA +AGGCTGAGTCTGGCTCTTTGCCTCTGTTGTTTTGATTAGTGTCACACTGG +ATTATGCTAGTTTGTGTGAGATTAGAGGTCCTGTTGTTACGAGCAAATCA +GCAGTCAGCAGGGTAATCTAAATTACATTGTTCTGTCGGCAATATCCTCT +CCTCGCTCAACAGCTTTGTGTGAGCTCATATTTAAAGCCCCCACAGTAGT +GCAGTGCATAAAATCATGCAGATACAGATCAGAAGCTTCAGGTAATGTTT +ACATCAAACATCAGGGGGAAAATGTGATCTCAGTGCCTCTGACCGTGGTG +TGATTGTTGGTGCCGGACAGGCTGGTTTGAGTATTTCTAAAACTGCTGAT +CTCCTGGGATTTTCACACACGGCATTCTCTAGAGTTTACTCAGAACGGTG +TGGAAAGCAAAAAAACATCCCGCGAGCTGCAATCCTGCGGATGGAAACAC +CTTGTTGATGGGATATGTCAGAGGAATGGCCAGGCTGGATCAATCTGACA +GAAAGGCGGTAACTCAGATAAGCACTCTCTACAACTGTGGTGAGCAGAAA +AGCGTTCTCAGAATGCACAACACGTTCAACCTTGAGGAAGATGTTGCTAC +AGCAGCAGATGAAGACCACGTCTGGTTCCACTCCTGTCAGCCAAGAACAT +CTCAGACTGCACAGGACACCAAAACAAAAACACATCCTGTTTTCTACTGG +TGGTAGAGTCACAATTTGGCAATAAGATAAATCCATGGACCCAACTTGCC +TTGTGTCAATAGACCAAGCTGCTGGTGGTCTTGGGGAATGTTTTCTTTAC +ACTTTCACACTATCGGCCTATATAAAATCCTATTATAAACTGCTGTACAG +TGCATTGGGTTTGATAAGCCCTCTAAGTCTGTATATCATGATCTCATTGA +TACCTGCGGCAATACACCCCCACAAACGCAGCTCCCTGCATTTTAAGTAT +TATAGAGAGTAATTTCACTGTCCATGTCTTTTTTGATCATAAAGCAGGTC +TAGGTGCTGTATACAACACTGTATTGAAATGCTGAAGCAGGTCTAGGTGC +TGTATACAACACTGTATTGAAATGCTGAAGCCAATATATCAATCTATCTG +TCTATATTTTATGAAATATCAGAGCATTTTAGTCCAAATTTGTCTGTTTT +GTTCATTGCAATGCTGACTGCTTAACATTTTCCTCAGTGGTGGCCATCAC +CACAGCATTTACTGGTGAGTTTTACCAAGCTACAGTGGGCCACAATGAGT +TACCTGTTATTGGCCTGGCTCTATGGCATGNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNCAAAGTGCCCTGTTCTTGTTAGAGCA +GAGATAAATGAGACAGAGGAGATGTAAAACTATACTGAGATGGTTTTTTG +GTTCTTAAAACCACAAATATATTGTATTACGGATATCAAAACTTCAAATA +AAACCAGTAAAGTGTAAAACATGGGACCTCTAAATGAAGGGCTGTTTTGT +TTCGTAGTTCTAGATTATGTCACTATGTGGCCTCGGTCTCTGTCTGCGTA +AAGCGGCTACATGTAGAATTTGACCCACTTTGGTGCCCACATATGGTAAC +TAAAACACTACAGACAGTATGCACTCCAACCCTATATAATTTATGTCCTT +CAGTGAAGCCTACAAAATGTGCAGCATGATTTGCTATGATTAATACCTAG +AATTATATTAAATTTTAGAAGTTTAGAGAGATGGCTTATACCTTTTTCTG +AGCATGGAGTCATCTCATGTAATTTGTCACATACCTTTGGCCAGCTTAAA +CCTGGTCATTCCAGTGAGAGCTGCTCACCCAAATGTTTCTGTATGCACCT +CCTGCTGTGGGTGCCAATAGCTCATGCAAGTTGGTGTCTTTCAACTAAGG +CCACTATGTGAGGACCATCAGCAGCAAATTAAATTAGAAGTGCCTTGGAG +TTTCTCAGGCCCTCTTCTATGATCATTAAGGTCTCAGTGACAGAACAGTG +CCCCTTTGAGGGACTATGGCATCTTGAGCACTTAAGTTGGCCGAATGTTT +GATCTCAAATATGAGTGCACTGTTTCAAAGTCACAAAAGCCTCTGGACAA +GGCATAGTTACTGGGCTTGGTTAACCTTTTTTTCATACCGGCGTGACTGC +TGGAACTTGGAACATGTAGCCTAAGTTTCCCTTTGTCTGCAATTACTGAG +GTATGCCATGTTGAAATAGAACAATAATTGCAGGGAGGAATTATTCCAAT +CGACCCATCGGCAGGCTTTCCAATACAGTGCTTTTTGTTTCACAGCAACA +AAGAACAAGAGCTTTTAAATGCATCCATCTGGACCCTGACTATGAAAGGT +CATTGGTAACGCTGCTGGATTTCTGTTATGTCAGCAAACTGCAGGTCAAA +GCGATGATACAGTCTTATAATGGCTTTCTCCAAGTGAAGGTGGGATAACA +TGGTTTAAACAAGCTCAGTTAAATGGATCAGAGCATCAACTGCTGTGAAC +AAGCAATCACACGGTCAAGGTCAAACATTTACACTTCTGAGAGATCTGGA +GAGTATGTCATGGCAGTATTGCTTTTCAGTTATTTCAGTAACTGATTACG +CCTTGTACTTAGTCTCAGAATTAGTGATTTGATTCAAAGTGTTTTATATA +TGTATATTTCAAAGCAAAAATACAGCTTAGGTAAAGCTCCTTAGACTCAT +GCAGCCTAATTTACTGTAGACATTCCTCCATGTACAGTACTGTGCAAATG +TTTTAGGCAGTTTTAGGCACTAAAGGTGAACTGAGGATGCAGTCCCACGA +TTAATTTTTATTCATCAGTTAACCTCATGTGAAGTGTAGTAAACAGAAAA +AACCTAAATCAGATCAGTATTTCTTATGACCCCCCTCCCCTACCACCCAT +CTTTAAAACAGCTCCAGTTCTCCTCAATCCCCCTGTACTTAGGTAGTTTT +TTTCTAGCGTCTTGGAGAAGTTGCCTCAGTTCTTCTGTGGATTTTAGGCT +GTCTCTGTGTCTTCTGTCTCCTCATGTATTCTCAGACTGACTCCATGATG +TTGAGATCAGGGCTCTGTGGTGGGGGGGTCAGACCATCTGTTGCGGGACT +CCTTGTTCTTCTTGTCTCTGAGGATAGTTCTTTATGATGCTGACTGTGTG +ATTGGACTCATTGTCCTGCTGCAGAATTAATCTGGGACTGATCAGACATC +TCCCTGATTGTACTGACAAAGGATAAGAGCAAAAACCTGTAGACTGCCTA +AAACTTCTGCACAGTACTGCACAATACTGTATGTATGTGTGATAATACTC +AGAAGTGATATAGAGTGGTTCAGGAGAAGCCTTAACTTACTTGTTCTTCT +TTTCCCTATGATGCAATTTCTCTCCAAGCAAACAGTACATAATGAAATAT +GTCTAAGAAAACACTAGCTGTTTGTCTTTTTGAAACACATTTAATAATTC +AGATTTAGCCACAAATTAAAAATGAGCTTATACTTTGAACTTGCCCATTG +GTTTTGATGTATGAAGCTGACAGATTTAGTCCACTGCATACTGCACTTTT +TTGGATAGTGAAATAAATTAATTTATTATGCAGCATTTAAGCATATTGCT +GTTATCTAGAATTTTATATATTAATAGGTATATATTATATTAACTCTTCA +TTTGCTTAAATTTGGCCTGTATATTCCTCCATTTTATACAACCTTTAGAA +AACACTGGAGTGAACAAAAATGTGAGGTTCAAAAGTGAGAGGAAAGAAAA +ATCAGTGTGTGCAGCAAGACTGTAAGAGTCCCTGAACAAAAGTGTTGATT +TATTGTTGGCTGTGAAAGTTTTTGCTGCAGGGGTTTTTGTAAACAGTGAC +TCTTCTTTATGTCTTTGTTTCTATCTTTTTGTGTTTTCTTTTTTCATCTT +ATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NATACTCATGCTTCTCTTTTACTCTAATGGAAAACATAGAATTGAGGTGC +AGTATCACTGAGAACAGCCAATGCTGATACTGTCATTTGTTATAGGATGT +TTACTTAAGAAATGATTGTGGCCTTATTTGCCAAAGACGATTAGGCCCTG +AGCCATTTGTACAGCCAATGTGACACCTCCGTAACCATAAGAAAAATTAT +GCTTCCACAAACAGAAAGAGAGACAAATACAGACAGTGAAAGCAAAACTA +TGAGAGAAGGGATAGCCATCTCAGCATTCAGCCAGTGTGTCACATGTCCA +GAGGGCTGCAATATGAAGCCAATAAACTGAGAGAAAGGTGTGATTCCAGA +GGGCTTTCCGGACTTTAAACTTTTCTCATTTTCACCTTCAGCTCGTTGTG +AAGCCTTCAATTGTTCCTTGGTCACATGAAGCCCCCTGTGTGTAAAATTG +ACATTCAGCTTATTACTCTCATCTACATCTCTAAAGTTGCACATGACTGC +CAGTAGAGGGTGCTCATGATTTGAGACAAGTAACTTTTCTGTGCTTGTGT +CAACAGTTAAAAATTCCCTATGGATACTTGAAAGAAATGCACCAGTGTTT +AAATTTAGACTCAGTCCAAGTAGGCTTTGCCATTTGCTTTACTGTTGTAA +TCAAATCAGTGCTCACAGTACATCAGTGGCAAAAGCAATTAGCTTAATTG +AGTTAGTAGGGAGTGAAAAGATAGATCACTATTTCCTGGCAGCGATATAT +TTTTCTCAGTTAACCTTCTCACAGAAAATTAGTTAGATGAGTTTCTTATC +TCATTAGAACCATTGTTGTTTATTATTTAAACCTGACCTTTATCCAGAGA +ATGTTTTTCACAGTAAGCATGTGTTTCACTCCAGCTCATACAGACACACC +TTGTGGCCAGTCAGGGAATGTGCATTAAAACTAATGTTCTCTCTGGTAGA +GCCATTCTGCCTATTCTGTTATTCACCAAAACTTTCCTGCCCAGATTTTT +CCATCTAGTCAGGGGACTTTTTCCTCAGCTGAACTGTCAGTAGCCAGCTG +CTCTGACCTTTACTCACCTCAGAGATTACTCATTCAAGTTATTTTACCAA +TGATGGAATAAAGTGAATACACACATCGTGACTTCATCATACTTTCCCTC +CTTTACCGTACTTTCCTAACATCAGTTAGCAAATTATACACTTAAATATG +CTAAAATAGAGATTTGTTTCAAATTGAAGAGCAACACAGGACACATTTGC +TTACACAAGTGACTCTATCTTATTTTTTAAGTGGCTGTCATCCTCAATAT +CATCAATATTTTAGGCATGTGAATGTAGGNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGATTATTATGATAAAG +CTACACATTATACTGCAATTCCGGGCCAACACTATTGTTTCGCCCTTACA +GTACAGGACTGTACATATCACTGTCACTACAAATTGTTTCAGTGAGGCTC +AGATTACAGCTCATCTTGCTTAGTCATTACGGCTCAGACCACCAGACAAT +GTCTTTACGTCAGAGAAAGTCTGAGTGAGAGCTGTCCTTAAAACCAAGTC +CCATGAGGCAAGTAAAGAGGTGACTCGCTGAGAGGATCAGGGAGAGGNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNGGTGCTGCATTGAGTCGAGAGGTGTGAGGATGTTCGATAAGCCA +TTATCACTCTGCTCGGAGGACGTCAGGCCCTATCCTTTTTTCTTTGAGTC +ACCACTTTCTCACCACAGCCATCCTTCTTCCTTTGTCTGTTTAATCTTTT +GCAGCCAAACATTTGGTATGCAAACTATAGGAAGATATGGTCTGACACCG +ACTGATGTGATGCTAATGCCTTCTCTGTTTTATCCAATGACTCCTCATTT +ATCAGGACCAATCCAGCTATTGTGTGCTATTGGCCCAACACAGCATTAAA +CATGGTGACCAGAATAGCCACAGCAAAATAAATAACGGGCCTCTGTGACT +ATTGTATGTGCATTTCTTACAGCTGTCATTTTTCTTTTCTGTTAGCTTTC +ATTCTACTGTGCGTGCTCTCTATCTGCCTACGTTGTATCTGTGCCTTTTG +TTAAATTCATTCAGCCTTTTATCTGTCTTGAATTTCTTTCTCTGTGTCCT +CCTTTCCCTCTCTGTCTCAGCAGATCCGTCCCTTGTTTTTGCAGGAGAAG +CGCTCTGCGTCATGCAAGGTCCATCTTCTGCGACGCACCATCAGCGTTCC +AGTGGAAACACAGTTTCCAGAACTCCATAGCCAGCTGTCCACTGAGAGCG +GTGAGTAGCCTGAGGACATGCCTCTGCTGTGATTAGCATGCATCATTATT +TCCATACACACACTGGAAATCTTTTAATCGTTTTTCTTGACACAATTTAA +TTAATTTAAATAATTAATAACATTGAAATCAACTGTTAAACACTTGTTCA +ACATGGTTGTGGACAAAATCATCATAATGACTGTGGAGACTAAGGAGAAA +CAATGCTTTGTTAAAATCTTTGCTCTGGTTTTGGCTTTTTCTATATTCTA +TTTGTGTGTGCAGTGCTGCTAAACATCATCTTAGTCAGAGTCAAACTTGA +GCACTCATGCAGATGCTTGTTCTATTCAGTGGTCAAATGGGCAGTGTAGC +TTAGGAGAAGCACTTCTTTCCGCTCCTCAGTGACATAACCAGTCACACAC +TCACACAGGCAAGTGAGCCTTTTAAAATTGCTGATACTATTTTTTTCTAC +TTCATATCCACTAGATGGAGAGCTTGGTGCTTTGTAGCACTCCAGTGTAC +TTGGAATGTTTCATCATAACCACTAAGGCAGACATTGATAAAGGCTTCGG +TCTCTTTCAAACTGCAGGTGTCATGTTGCACTCTCCCTATTGCTTTTATG +AAAATGTAGTGCCGGTAATATAACACAGTTACTCTCTGCATATATCACTA +TGTACACTTACTTCTATGGAAAAGATGGAGCGCCACAGTGAAAACTGTTT +TGAGTCTGTGAGGGGAAAACACAGCATCAGTCACAGTGAAACACTAGGTG +GCACTCAGGTTTGACATTCAAGCATTTGTATCCCACAGTTACTGTTGCTG +GGTTGTTGGCTGGCATGCAACTTAATATGATCTATCTTTAAATCAGTGTG +TGCAGTGGTTATTTAGTTTAAGTGCTTTTTAATGATGTCACAACATTATT +TTGTCTACACCCTGCATACAGCACAGTATATTAAATTTAGGTTTTATTAA +GTTAAGTAATGTTCTGAGGTGGCATTGCCCTCAGGTATATATCCCTCAGG +CAGTGTTACTGGACAGCATATAGATTGTAATGTTGTGTAAGCAGTGTTGT +GTAAGCTTTTTTAACCAAAATGCTCTCATGTTTCTTTGTTACCACAGTGG +TTTTAGTGATGTTTTGTGCTGTGAACAGAATCATGATTTCTGCAGACACT +CCACATCTGTTTAAACAGTGATTTTCACATCTGTCTGTAACTGAGTCAGT +CAGTTTGTGGTTGGATTGATGTGTAGTGGGATCTACAGATCATCAACCCT +CGGGGAAGCCATTTAGTTCTGTGTTAAATAAAAATACAACTTTTGAGCAC +TGTTTTTTCATATTTTTCTTCATCCCTTAGTTGATATTAAAGGTGCTATA +TGTAGATTTTTGCTATCACTACATAGCCAATGTTAGGATTAAGAACCCCA +GCAACCCGAATGTTAACTCGTACTTTGCTTCTATTTCTATTACATATTTT +CTTCTATGGAAGTTAGGATGTTAACCAGCTAACCCCAGGTCATCTCATAA +TACCACTTGGCAATAGTCAGTCACTGTTGCATTCAGTCTGCCCCTCCAGC +ATGAAAGGATGAAGAAGTACCGCTACCCAGAAGGCGTAGTCTAACCCCTT +GTCTTGTAAACACAGCAATGGCTGAAGCTCTTGGTAAGTAAACAGCTGTT +AATGTCAGTGTTGGCTACATAGCTAAAACTTACACATAGCACCGTTAAAA +AAATAAATGGTAAGTTTTTGTCCTTTTAGTTAAAGCATGTAAATGTCAGT +TAAAACAATTTAGAAAATTTCTCATTCAAATAGTCATTTTATTTTGTGTG +CTATAAATACCTTCAGTTTTTTTTCTTGTACTTTATTTTAAACTCTTGTC +TTTCTCTTATGCCCCTTATCTACCAGGAGAGGTCACACTCTTGTTGATAG +CATGATAAGGAAATGAGCAGAGATGTCCTTATTTCTCCTCAAAGTTATTC +CCTGCATAGCACATATTTGACTTTAAAAATACAAAGACTACAGGTGCTAA +ATCACACACACAACACCCAAAAGAATAGAGGACTTTTAAATATGTGCATC +CCATTTTCTAATCTATATTCACCACTGATACAAGTGATGACACGGTGATG +AGAAAATAACGTAACATACATTATTATACAAGATTGGTATGATTGCTGTA +GGTTGGTTTTGTTTATCTGAGAAGGGACAGAGGCTAAAAGAATAAACAAA +GTCTGCCAGCTACTTCACTAATTACAGAGTACCCCTGCAACTCTGCCAAT +GACATTCTGAAACTTTTCCATGACTATTATGTAAGATAATTTTTGAATCA +CTGATTCAGATTTCCATAGCCAAGATACACTATTCTGCATGTTGGCTAGA +ATACATTACAATACATTAATATATTACTGTAGTGGTTATTATGTCAGCCA +TTTAAACATAATTTCATACTTTCAGCTGTTCGCAGGGCAAACTTTTTATT +TTTGTCACCTGAATCCAGTTGCATTTGGATTTGAATTCAGTGGTGACTGC +CAATGAAAAATAACTCACAGGGGCACTTAGAGATCTGAGGCGGCATTGAC +TGAACATCGAAGGGCTACTATTACAGAAATGCAAGTGGAAAAAGAATCTG +ATATGATACTTGCCAGTATGAAAAGCATAATTAGGACTTTAAATGCATGT +TTTAAATGTTTTTGTGAACCATAATTTCAGTGTAAAAGTTCCAGTTATAT +TTTAAAAGGAACTCCAAATATCCAGTGGTAGCATCAAGGTAACCTGAGAT +AAGTGAAAAGTAATTCTAAATTAATCAGTATGTTGAAATATCAGACATTT +GTGTGCATTATTGTTAGTGTGACCATGGTGGGATACTTCAAACACACTTG +TTCATTGGCAAAATAATTGTCACATGAATTAACACAAGTGACACTTTCAG +TATCATACTGACAGAATGTTTGTTGTTAACTTTATCCCTGAAAACAAACA +CTAATACATGTTCTTGTCGCCCCTTCAGTGCCTAACCTTTAAAATTGTGC +TTGTGAGTGTGTGTATGTGAGCGCACCTGTATGGTTTTCAAATAATGTTC +CACATCACAACACTACAAACACTCACATGGTCTATGTTTTACTAAATTAT +TCATTGGCGTAGGCCCCACCTGCTCTTGTCCTTGTGTGACACAGGCTCTA +AATAAGCAGCATGATGAATAAAAATGACACTGAGATGAATAGGAATCCAC +AGCAAATCAGAGAGCACTCTCATCTCGTCTCATCCTCCGACTGAATAGCG +AGCGGCTGCCTCCTTTATTCTTTTGAACTCTTCGCGGTTTTGGCACAAAC +ATGCCAGAGACAGAGAGCGCTGTTACGCTCCCACCAGAGCAACTACGCTT +AATTTTACTGAGGTGAAAGTTTTTTTTTTTTTTCTGGATAAAGCTTTGGA +AAAGTTCTCAACTGTTGCTTCTTTAAAGATGCTGGGGCATATTCTGCCCT +ATTTTCCAGTTTTTTACATCTCCCATAAATATTTTCACAAGCTGTAACAT +TTTAGACAGGATTAAATTGAACTAGATAACTGACCAAAGGCTGCTAAATT +ATTTTTATGTGCCTTGTAGCACAAATACTGTCTCACATATATTTTAAGTA +TCTTAATATTCCCTCTGATGTTTAATTTAACAAAGCTGTCCTTCCCTCCA +TCTCTCTCCTCATAGTTTTTCATTACAATTAGCTCTCATTAGATAGAATT +GTATTTGTTGTCTTTGTGCCAGTCACTCCAGTCTATTTTGCCAGCACACA +ACTAAATAATTGTTTTGCCTTGTCAGTGTCTTGTTTTACAGTTTTAATGA +TGAGCCCAAAACATGTCAAATATGAGGAACTATAACTTATCAAGAAAGTG +GGGTAGTTGAGATAAAACTGTTTCCGAGGTGGAGGTTGGCTGAGCTCCAT +TTGGCCACAAAATGTAGCTGAAAGGGCAGAGAAACCCACTTTAATGGAGT +ACAGGTTGCATATGAGCTGGTAGGAAAGATATAGAATCATTATCAGCTGA +TAATCTAACAGTAGCAGTCAGTGTAGATGCTGTGCTAACGCAAAGTTGTG +AAACTACTGTCTGTAAGCAACTCATCCAATTGTTATTGCTGCCAAGCTTA +AGTATTTTAACAGCTTTTAGGTGTTGTTAGCACAGTACTCGTTTGGATCC +GGCCATGACAAGAAATCTTTATCGGCCACTAATTTAATTAATCATCTTTT +TTCAAGCAAAAATCACAAACTTTCTTTGGTTCCACTTTTTCTCATTGTAG +ATTAAATATCTTTTGGGTTTTGGCACAGGCTGGACAAAAAAACCTCTGAG +ACGCTGTGATAAGAATTTATTTTCACATTTTTTTTACTTTTCAGGGACTA +CACAATTATTTGGTGATGAAAATAATATTTTGTTGCAGCCCAAATGTTAA +CTTGCCACTCAAGCTGTCAATTCAAGAAGGCAAAAGCAACAAATACTGCT +TTGCCGAGCTGAAATGAGAGTAGAAACCATGCCCGGGCCAGCTCAGCCTG +GTCAGGTTTTTGAGTCCATCTATTGTTAACATTCAGGTTGCAGCACAAGG +GAAGTTCCTAAATTCTTCTGGCATGTTAATGTTTTCCAAACTGATGTTAC +CAAGTCCTCGTTATGATGAGAAAAAAATATGCTGAGAGTGAAATTGATCA +AAGTGGAAACATTAAGCACTGCCACAGTCCCCTCCACCCATCTTTCTCCC +TCTCTCTCCACCGCAACGTGGAAACTGCTCNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +CTCCCTCCCATTCTGTGCTCCGGTATANNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNCAGAGAGGGGTTCAGTCAGACAGATGTAACACAGCAG +TAGAAGCCTGAGCTGAGCTGGCAGGCTGCGGAGGCCAGACCAGAGCCAGC +AGCTCTGACTGGAGAGAACAGCGAAGAGGTGACGGCAGCAGCGGCAGCAC +AGGGGATAAAACAGTAGTTGTAGTAGCAGTTTCAGTAGTTGTACTTCAGT +TACAGAGCTGACTTTACCTCAGGATATGGGTGTGTTGATCTGGGCGCTCA +TCACCCTGTGAATTTAACATGGACCCTGACACCAGCACCCATCCAGAGAC +ACAACATGGTGAGTGAGATTTGAAGGAGGAAAAGATTAGAAACAATGAGA +GTGATACTGTGGGAAAAGTTGAGGAGCGTGTTAGTGAACAAGGGAGTCAG +GTCACATGGTTTTCCCGTTAGGGTTGCTATTATGACTAGGCTCTCCATTT +GGCTCTAAAGCTGTTGACTAGGGCCTCTGTGGTCAAATGAATAGGACTTC +AGTCTGTTCTGTTGAGTAGGGGGTGAAGGGGTGAGGGGTGTTACATGGCA +ATGTGATGAAATCAACTGCCTGCTTAAGACATTTTGCTCTAATGACCCTC +TTGTCATTACTTTAACAGGGTTGTAAAGTTTTTTTTGCTTTTTTGTAGCT +AAGAAAGTTGAGAATTGTTTTGTCTGAACTCTCTCTGGGATTTGTCTTGT +CGGTTTTTGGCTGGTTTTTGGTGTGAGCTTCACAGAGCAGGAAGCTCTCC +TCCTGCTCTTTGAATGGAATAATTAACTGTGAGGGCCGTGGGTGTACCAT +TTCTGCTCAAAAGCAGCAAGATAATTTGATGGTTATGTGAATCTGTATAT +TTGTATGCTGTGTAGTGGTGCTGCATGTGGGTGTTTGTGTTAGAGACACA +GAGGGGCACCAAAGAATGAAAGTGGGAGACAGAGGGAACGAGAAGGGGAG +AGACCGAGAGAGAAGGACTTATGTACACAAATAAATCCAGGGGGATCTAG +ACTGCACTGTAGGCCAGTGCGAATGCTCACTCTTTCTCTTCCCCCTCATT +TTCCGCTGCCTCTTTTTTTTCCTCCACTGCTCCAAATGTTGGAGCCCAAA +CTAGATAGTTAAAAGGCAGAAAAAAATGGTGTGTTATTAACTGGGCCAGA +AAAGACGATGTTTTGTCTTCATGGCCGATGGGAAGGACTCTGCTGTGCTG +CTGATGAAGAATGTGGTTAACAGAATGAGTCAACAGAGAACATATTTCCA +AGAGACTGTAGTTTCTCTTCGCCAGCGTCTAAGCCACATTGCTTTATGCA +CTGGGCTCTCTGCCATGACGGCGAAGGAGAGACAAAGAGAGGGGAAAGAG +AATCGCTAAAGATAAAACTCTGTTTTTATCCATCTCCTGAGACACAAAAC +TTCTGTTAGCAATCTAACCAAATAAGCGAGAACATTAAGCTTCTTCTCAC +CTTCCCAAAATAAAATGTCAACTTCATGCAGTAATACTCACTCTCTTCTC +TCAACCAGTCAGCCAGCCTCTTTCAGTGCATACAGCTTGAGGAGATCCCT +TCTAAAGGTCCAATATAAATAGAAAAGTGGGAGTAGAAAGGGCAATAATC +TGATATCATCTGATTACATTCACACCTCAGGCTTGCACGCTACAGGAAGA +GTTCACGCTTCCCAAGGCTTGCAGGCATCACTTTTCACTCATTCTCAGAT +AGACACACAAACACATGCACAGAAATATCATTGCTGTTGTTTGCAACAAA +TCCTGGAGTAGACTACTTTGCATTGAATTTCTATATGCATGTGTTTTCAT +ATATTACACATTGCTTTTTGTACTTTGCATATAAAGTAGATGCTGATCTG +CTATCTGCATATATAGTAGCTGCAGATAGCACTGTAACTACATCTACATA +TTTCTTGTTTGTATCTACATCAAGCAGATAGTTGCTGATGCTGGACAACC +TCTTCCGATATTCATTATTTTGGAAAAAAAAATATCAATGGCATTCCTAC +CTTACAACTTAAATTAAAAGTGGTCACTTGAGTAACTGCAGAACATGTGT +TCACCCAATAGTTTTTTTTAGAGAGTGTCACTCAAAGTATTCCTCCTTAC +AAGCACCCATGCCTAAATCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.poly Sun Apr 22 02:00:59 2018 -0400 @@ -0,0 +1,2 @@ +18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) +67 2.9 1.4 0.0 scaffold_1 11981 12050 (2170) (CT)n Simple_repeat 1 71 (0)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.stats Sun Apr 22 02:00:59 2018 -0400 @@ -0,0 +1,51 @@ +================================================== +file name: dataset_12.dat +sequences: 1 +total length: 14220 bp (14220 bp excl N/X-runs) +GC level: 39.94 % +bases masked: 378 bp ( 2.66 %) +================================================== + number of length percentage + elements* occupied of sequence +-------------------------------------------------- +SINEs: 0 0 bp 0.00 % + ALUs 0 0 bp 0.00 % + MIRs 0 0 bp 0.00 % + +LINEs: 0 0 bp 0.00 % + LINE1 0 0 bp 0.00 % + LINE2 0 0 bp 0.00 % + L3/CR1 0 0 bp 0.00 % + +LTR elements: 0 0 bp 0.00 % + ERVL 0 0 bp 0.00 % + ERVL-MaLRs 0 0 bp 0.00 % + ERV_classI 0 0 bp 0.00 % + ERV_classII 0 0 bp 0.00 % + +DNA elements: 0 0 bp 0.00 % + hAT-Charlie 0 0 bp 0.00 % + TcMar-Tigger 0 0 bp 0.00 % + +Unclassified: 0 0 bp 0.00 % + +Total interspersed repeats: 0 bp 0.00 % + + +Small RNA: 0 0 bp 0.00 % + +Satellites: 0 0 bp 0.00 % +Simple repeats: 8 378 bp 2.66 % +Low complexity: 0 0 bp 0.00 % +================================================== + +* most repeats fragmented by insertions or deletions + have been counted as one element + + +The query species was assumed to be homo +RepeatMasker Combined Database: Dfam_Consensus-20170127 + +run with rmblastn version 2.2.27+ +The query was compared to unclassified sequences in ".../dataset_2.dat" +