Mercurial > repos > devteam > sam_to_bam
changeset 5:d6181de75bdd draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/sam_to_bam commit a972815cbd43dea553b3c0e4ad548f35d0023228-dirty
author | iuc |
---|---|
date | Sun, 28 Jul 2019 14:45:59 -0400 |
parents | 4afb4a5f0924 |
children | a832dc88e7dc |
files | macros.xml test-data/cached_locally/chr_m.fasta test-data/cached_locally/chr_m.fasta.fai test-data/cached_locally/fasta_indexes.loc |
diffstat | 4 files changed, 122 insertions(+), 356 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Tue May 09 11:18:00 2017 -0400 +++ b/macros.xml Sun Jul 28 14:45:59 2019 -0400 @@ -1,11 +1,131 @@ <macros> <xml name="requirements"> <requirements> - <requirement type="package" version="1.3.1">samtools</requirement> + <requirement type="package" version="@TOOL_VERSION@">samtools</requirement> <yield/> </requirements> </xml> - <token name="@TOOL_VERSION@">1.3.1</token> + <token name="@TOOL_VERSION@">1.9</token> + <token name="@FLAGS@">#set $flags = sum(map(int, str($filter).split(',')))</token> + <token name="@PREPARE_IDX@"><![CDATA[ + ##prepare input and indices + ln -s '$input' infile && + #if $input.is_of_type('bam'): + #if str( $input.metadata.bam_index ) != "None": + ln -s '${input.metadata.bam_index}' infile.bai && + #else: + samtools index infile infile.bai && + #end if + #elif $input.is_of_type('cram'): + #if str( $input.metadata.cram_index ) != "None": + ln -s '${input.metadata.cram_index}' infile.crai && + #else: + samtools index infile infile.crai && + #end if + #end if + ]]></token> + <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[ + ##prepare input and indices + #for $i, $bam in enumerate( $input_bams ): + ln -s '$bam' '${i}' && + #if $bam.is_of_type('bam'): + #if str( $bam.metadata.bam_index ) != "None": + ln -s '${bam.metadata.bam_index}' '${i}.bai' && + #else: + samtools index '${i}' '${i}.bai' && + #end if + #elif $bam.is_of_type('cram'): + #if str( $bam.metadata.cram_index ) != "None": + ln -s '${bam.metadata.cram_index}' '${i}.crai' && + #else: + samtools index '${i}' '${i}.crai' && + #end if + #end if + #end for + ]]></token> + <token name="@PREPARE_FASTA_IDX@"><![CDATA[ + ##checks for reference data ($addref_cond.addref_select=="history" or =="cached") + ##and sets the -t/-T parameters accordingly: + ##- in case of history a symbolic link is used because samtools (view) will generate + ## the index which might not be possible in the directory containing the fasta file + ##- in case of cached the absolute path is used which allows to read the cram file + ## without specifying the reference + #if $addref_cond.addref_select == "history": + ln -s '${addref_cond.ref}' reference.fa && + samtools faidx reference.fa && + #set reffa="reference.fa" + #set reffai="reference.fa.fai" + #elif $addref_cond.addref_select == "cached": + #set reffa=str($addref_cond.ref.fields.path) + #set reffai=str($addref_cond.ref.fields.path)+".fai" + #else + #set reffa=None + #set reffai=None + #end if + ]]></token> + <token name="@ADDTHREADS@"><![CDATA[ + ##compute the number of ADDITIONAL threads to be used by samtools (-@) + addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) && + ]]></token> + <token name="@ADDMEMORY@"><![CDATA[ + ##compute the number of memory available to samtools sort (-m) + ##use only 75% of available: https://github.com/samtools/samtools/issues/831 + addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && + ((addmemory=addmemory*75/100)) && + ]]></token> + <xml name="seed_input"> + <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." /> + </xml> + <xml name="flag_options"> + <option value="1">read is paired</option> + <option value="2">read is mapped in a proper pair</option> + <option value="4">read is unmapped</option> + <option value="8">mate is unmapped</option> + <option value="16">read reverse strand</option> + <option value="32">mate reverse strand</option> + <option value="64">read is the first in a pair</option> + <option value="128">read is the second in a pair</option> + <option value="256">alignment or read is not primary</option> + <option value="512">read fails platform/vendor quality checks</option> + <option value="1024">read is a PCR or optical duplicate</option> + <option value="2048">supplementary alignment</option> + </xml> + + <!-- region specification macros and tokens for tools that allow the specification + of region by bed file / space separated list of regions --> + <token name="@REGIONS_FILE@"><![CDATA[ + #if $cond_region.select_region == 'tab': + -t '$cond_region.targetregions' + #end if + ]]></token> + <token name="@REGIONS_MANUAL@"><![CDATA[ + #if $cond_region.select_region == 'text': + #for $i, $x in enumerate($cond_region.regions_repeat): + '${x.region}' + #end for + #end if + ]]></token> + <xml name="regions_macro"> + <conditional name="cond_region"> + <param name="select_region" type="select" label="Filter by regions" help="restricts output to only those alignments which overlap the specified region(s)"> + <option value="no" selected="True">No</option> + <option value="text">Manualy specify regions</option> + <option value="tab">Regions from tabular file</option> + </param> + <when value="no"/> + <when value="text"> + <repeat name="regions_repeat" min="1" default="1" title="Regions"> + <param name="region" type="text" label="region" help="format chr:from-to"> + <validator type="regex" message="Required format: CHR[:FROM[-TO]]; where CHR: string containing any character except quotes, whitespace and colon; FROM and TO: any integer">^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$</validator> + </param> + </repeat> + </when> + <when value="tab"> + <param name="targetregions" argument="-t/--target-regions" type="data" format="tabular" label="Target regions file" help="Do stats in these regions only. Tab-delimited file chr,from,to (1-based, inclusive)" /> + </when> + </conditional> + </xml> + <xml name="citations"> <citations> <citation type="bibtex"> @@ -49,21 +169,4 @@ <exit_code range="1:" level="fatal" description="Error" /> </stdio> </xml> - <token name="@no-chrom-options@"> ------ - -.. class:: warningmark - -**No options available? How to re-detect metadata** - -If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps: - -1. Click on the **pencil** icon adjacent to the dataset in the history -2. A new menu will appear in the center pane of the interface -3. Click **Datatype** tab -4. Set **New Type** to **BAM** -5. Click **Save** - -The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down. - </token> </macros>
--- a/test-data/cached_locally/chr_m.fasta Tue May 09 11:18:00 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,335 +0,0 @@ ->chrM -GTTAATGTAGCTTAATAATATAAAGCAAGGCACTGAAAATGCCTAGATGA -GTATTCTTACTCCATAAACACATAGGCTTGGTCCTAGCCTTTTTATTAGT -TATTAATAGAATTACACATGCAAGTATCCGCACCCCAGTGAGAATGCCCT -CTAAATCACGTCTCTACGATTAAAAGGAGCAGGTATCAAGCACACTAGAA -AGTAGCTCATAACACCTTGCTCAGCCACACCCCCACGGGACACAGCAGTG -ATAAAAATTAAGCTATGAACGAAAGTTCGACTAAGTCATATTAAATAAGG -GTTGGTAAATTTCGTGCCAGCCACCGCGGTCATACGATTAACCCAAATTA -ATAAATCTCCGGCGTAAAGCGTGTCAAAGACTAATACCAAAATAAAGTTA -AAACCCAGTTAAGCCGTAAAAAGCTACAACCAAAGTAAAATAGACTACGA -AAGTGACTTTAATACCTCTGACTACACGATAGCTAAGACCCAAACTGGGA -TTAGATACCCCACTATGCTTAGCCCTAAACTAAAATAGCTTACCACAACA -AAGCTATTCGCCAGAGTACTACTAGCAACAGCCTAAAACTCAAAGGACTT -GGCGGTGCTTTACATCCCTCTAGAGGAGCCTGTTCCATAATCGATAAACC -CCGATAAACCCCACCATCCCTTGCTAATTCAGCCTATATACCGCCATCTT -CAGCAAACCCTAAACAAGGTACCGAAGTAAGCACAAATATCCAACATAAA -AACGTTAGGTCAAGGTGTAGCCCATGGGATGGAGAGAAATGGGCTACATT -TTCTACCCTAAGAACAAGAACTTTAACCCGGACGAAAGTCTCCATGAAAC -TGGAGACTAAAGGAGGATTTAGCAGTAAATTAAGAATAGAGAGCTTAATT -GAATCAGGCCATGAAGCGCGCACACACCGCCCGTCACCCTCCTTAAATAT -CACAAATCATAACATAACATAAAACCGTGACCCAAACATATGAAAGGAGA -CAAGTCGTAACAAGGTAAGTATACCGGAAGGTGTACTTGGATAACCAAAG -TGTAGCTTAAACAAAGCATCCAGCTTACACCTAGAAGATTTCACTCAAAA -TGAACACTTTGAACTAAAGCTAGCCCAAACAATACCTAATTCAATTACCC -TTAGTCACTTAACTAAAACATTCACCAAACCATTAAAGTATAGGAGATAG -AAATTTTAACTTGGCGCTATAGAGAAAGTACCGTAAGGGAACGATGAAAG -ATGCATTAAAAGTACTAAACAGCAAAGCTTACCCCTTTTACCTTTTGCAT -AATGATTTAACTAGAATAAACTTAGCAAAGAGAACTTAAGCTAAGCACCC -CGAAACCAGACGAGCTACCTATGAACAGTTACAAATGAACCAACTCATCT -ATGTCGCAAAATAGTGAGAAGATTCGTAGGTAGAGGTGAAAAGCCCAACG -AGCCTGGTGATAGCTGGTTGTCCAGAAACAGAATTTCAGTTCAAATTTAA -ATTTACCTAAAAACTACTCAATTCTAATGTAAATTTAAATTATAGTCTAA -AAAGGTACAGCTTTTTAGATACAGGTTACAACCTTCATTAGAGAGTAAGA -ACAAGATAAACCCATAGTTGGCTTAAAAGCAGCCATCAATTAAGAAAGCG -TTCAAGCTCAACGACACATCTATCTTAATCCCAACAATCAACCCAAACTA -ACTCCTAATCTCATACTGGACTATTCTATCAACACATAGAAGCAATAATG -TTAATATGAGTAACAAGAATTATTTCTCCTTGCATAAGCTTATATCAGAA -CGAATACTCACTGATAGTTAACAACAAGATAGGGATAATCCAAAAACTAA -TCATCTATTTAAACCATTGTTAACCCAACACAGGCATGCATCTATAAGGA -AAGATTAAAAGAAGTAAAAGGAACTCGGCAAACACAAACCCCGCCTGTTT -ACCAAAAACATCACCTCTAGCATTTCCAGTATTAGAGGCACTGCCTGCCC -AGTGACATCTGTTtaaacggccgcggtatcctaaccgtgcaaaggtagca -taatcacttgttccctaaatagggacttgtatgaatggccacacgagggt -tttactgtctcttacttccaatcagtgaaattgaccttcccgtgaagagg -cgggaatgactaaataagacgagaagaccctatggagcttTAATTAACTG -ATTCACAAAAAACAACACACAAACCTTAACCTTCAGGGACAACAAAACTT -TTGATTGAATCAGCAATTTCGGTTGGGGTGACCTCGGAGAACAAAACAAC -CTCCGAGTGATTTAAATCCAGACTAACCAGTCAAAATATATAATCACTTA -TTGATCCAAACCATTGATCAACGGAACAAGTTACCCTAGGGATAACAGCG -CAATCCTATTCCAGAGTCCATATCGACAATTAGGGTTTACGACCTCGATG -TTGGATCAAGACATCCTAATGGTGCAACCGCTATTAAGGGTTCGTTTGTT -CAACGATTAAAGTCTTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGT -CGGTTTCTATCTATTCTATACTTTTCCCAGTACGAAAGGACAAGAAAAGT -AGGGCCCACTTTACAAGAAGCGCCCTCAAACTAATAGATGACATAATCTA -AATCTAACTAATTTATAACTTCTACCGCCCTAGAACAGGGCTCgttaggg -tggcagagcccggaaattgcataaaacttaaacctttacactcagaggtt -caactcctctccctaacaacaTGTTCATAATTAACGTCCTCCTCCTAATT -GTCCCAATCTTGCTCGCCGTAGCATTCCTCACACTAGTTGAACGAAAAGT -CTTAGGCTATATGCAACTTCGCAAAGGACCCAACATCGTAGGCCCCTATG -GCCTACTACAACCTATTGCCGATGCCCTCAAACTATTTATCAAAGAGCCA -CTACAACCACTAACATCATCGACATCCATATTCATCATCGCACCAATCCT -AGCCCTAACCCTGGCCTTAACCATATGAATCCCTCTGCCCATACCATACC -CACTAATCAACATAAACCTAGGAATTCTATTCATACTAGCCATGTCCAGC -CTAGCTGTCTACTCAATCCTTTGATCAGGATGGGCCTCAAACTCAAAATA -CGCCCTAATTGGAGCTCTACGAGCAGTAGCACAAACCATCTCATACGAAG -TAACTCTAGCAATCATCCTACTCTCAGTCCTCCTAATAAGCGGATCATTC -ACATTATCAACACTTATTATTACCCAAGAATACCTCTGATTAATCTTCCC -ATCATGACCCTTAGCCATAATGTGATTCATCTCAACATTAGCCGAAACCA -ACCGAGCTCCATTTGACCTAACAGAAGGAGAATCAGAACTCGTCTCTGGA -TTCAACGTTGAATACGCAGCCGGCCCATTTGCTCTATTCTTCCTAGCAGA -ATACGCAAACATCATCATGATAAACATCTTCACAACAACCCTATTTCTAG -GAGCATTTCACAACCCCTACCTGCCAGAACTCTACTCAATTAATTTCACC -ATTAAAGCTCTCCTTCTAACATGTTCCTTCCTATGAATCCGAGCATCCTA -CCCACGATTCCGATATGACCAACTTATACACCTCCTATGAAAGAACTTCC -TACCACTCACACTAGCCCTCTGCATATGACACGTCTCACTTCCAATCATA -CTATCCAGCATCCCACCACAAACATAGGAAATATGTCTGACAAAAGAGTT -ACTTTGATAGAGTAAAACATAGAGGCTCAAACCCTCTTATTTctagaact -acaggaattgaacctgctcctgagaattcaaaatcctccgtgctaccgaa -ttacaccatgtcctaCAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCAT -ACCCCGAAAATGTTGGATTACACCCTTCCCGTACTAATAAATCCCCTTAT -CTTCACAACTATTCTAATAACAGTTCTTCTAGGAACTATAATCGTTATAA -TAAGCTCACACTGACTAATAATCTGAATCGGATTTGAAATAAATCTACTA -GCCATTATCCCTATCCTAATAAAAAAGTACAATCCCCGAACCATAGAAGC -CTCCACCAAATATTTTCTAACCCAAGCCACCGCATCAATACTCCTCATAA -TAGCGATCATCATTAACCTCATACACTCAGGCCAATGAACAATCACAAAA -GTCTTCAACCCCACAGCGTCCATCATTATAACTTCAGCTCTCGCCATAAA -ACTTGGACTCACACCATTCCACTTCTGAGTACCCGAAGTCACACAGGGCA -TCTCATTAACATCAGGTCTCATCCTACTTACATGACAAAAACTAGCCCCA -ATATCAATCCTATATCAAATCTCACCCTCAATTAACCTAAATATCTTATT -AACTATAGCCGTACTGTCAATCCTAGTAGGAGGCTGAGGCGGTCTCAACC -AAACCCAACTACGAAAAATCATAGCATACTCGTCAATCGCGCATATAGGA -TGAATAACAGCTGTCCTAGTATATAACCCAACACTAACAATACTAAACAT -ATTAATTTACATTATAATAACACTCACAATATTCATACTATTTATCCACA -GCTCCTCTACTACAACACTATCACTCTCCCACACATGAAACAAAATACCT -CTAACCACTACACTAATCTTAATTACCTTACTATCCATAGGAGGCCTCCC -CCCACTATCAGGATTCATACCCAAATGAATAATCATTCAAGAGCTCACCA -AAAATAGCAGCATCATCCTCCCCACACTAATAGCCATTATAGCACTACTC -AACCTCTACTTCTACATACGACTAACCTATTCCACCTCACTGACCATATT -CCCATCCACAAACAACATAAAAATAAAATGACAATTCGAAACCAAACGAA -TTACTCTCTTACCCCCGTTAATTGTTATATCCTCCCTACTCCTCCCCCTA -ACCCCCATACTATCAATTTTGGACTAGGAATTTAGGTTAACATCCCAGAC -CAAGAGCCTTCAAAGCTCTAAGCAAGTGAATCCACTTAATTCCTGCATAC -TAAGGACTGCGAGACTCTATCTCACATCAATTGAACGCAAATCAAACTCT -TTTATTAAGCTAAGCCCTTACTAGATTGGTGGGCTACCATCCCACGAAAT -TTTAGTTAACAGCTAAATACCCTAATCAACTGGCTTCAATCTACTTCTCC -CGCCGCCTAGAAAAAAAGGCGGGAGAAGCCCCGGCAGAAATTGAAGCTGC -TCCTTTGAATTTGCAATTCAATGTGAAAATTCACCACGGGACTTGATAAG -AAGAGGATTCCAACCCCTGTCTTTAGATTTACAGTCTAATGCTTACTCAG -CCATCTTACCTATGTTCATCAACCGCTGACTATTTTCAACTAACCACAAA -GACATCGGCACTCTGTACCTCCTATTCGGCGCTTGAGCTGGAATAGTAGG -AACTGCCCTAAGCCTCCTAATCCGTGCTGAATTAGGCCAACCTGGGACCC -TACTAGGAGATGATCAGATCTACAATGTCATTGTAACCGCCCATGCATTC -GTAATAATTTTCTTTATGGTCATACCCATTATAATCGGAGGATTCGGAAA -CTGATTAGTCCCCCTGATAATTGGAGCACCTGATATAGCTTTCCCCCGAA -TAAACAACATAAGCTTCTGATTACTTCCCCCATCATTCCTACTTCTTCTC -GCTTCCTCAATAATTGAAGCAGGTGCCGGAACAGGCTGAACCGTATATCC -TCCTCTAGCTGGAAATCTGGCGCATGCAGGAGCCTCTGTTGACTTAACCA -TTTTCTCTCTCCACCTAGCTGGGGTGTCCTCGATTTTAGGTGCCATCAAC -TTTATTACCACAATCATTAACATAAAACCACCAGCCCTATCCCAATATCA -AACCCCCCTATTCGTTTGATCTGTCCTTATTACGGCAGTACTCCTTCTCC -TAGCCCTCCCGGTCCTAGCAGCAGGCATTACCATGCTTCTCACAGACCGT -AACCTGAACACTACTTTCTTCGACCCCGCAGGAGGAGGGGATCCAATCCT -TTATCAACACCTATTCTGATTCTTCGGACACCCCGAAGTCTATATTCTTA -TCCTACCAGGCTTCGGTATAATCTCACACATCGTCACATACTACTCAGGT -AAAAAGGAACCTTTTGGCTACATGGGTATAGTGTGAGCTATAATATCCAT -TGGCTTTCTAGGCTTCATCGTATGGGCTCACCACATGTTTACAGTAGGGA -TAGACGTTGACACACGAGCATACTTCACATCAGCTACCATAATCATCGCT -ATCCCTACTGGTGTAAAAGTATTCAGCTGACTAGCCACCCTGCACGGAGG -AAATATCAAATGATCTCCAGCTATACTCTGAGCTCTAGGCTTCATCTTCT -TATTCACAGTAGGAGGTCTAACAGGAATCGTCCTAGCTAACTCATCCCTA -GATATTGTTCTCCACGATACTTATTATGTAGTAGCACATTTCCATTATGT -CCTGTCTATAGGAGCAGTCTTCGCCATTATGGGGGGATTTGTACACTGAT -TCCCTCTATTCTCAGGATACACACTCAACCAAACCTGAGCAAAAATCCAC -TTTACAATTATATTCGTAGGGGTAAATATAACCTTCTTCCCACAACATTT -CCTTGGCCTCTCAGGAATGCCACGACGCTATTCTGATTATCCAGACGCAT -ATACAACATGAAATACCATCTCATCCATAGGATCTTTTATCTCACTTACA -GCAGTGATACTAATAATTTTCATAATTTGAGAAGCGTTCGCATCCAAACG -AGAAGTGTCTACAGTAGAATTAACCTCAACTAATCTGGAATGACTACACG -GATGCCCCCCACCATACCACACATTTGAAGAACCCACCTACGTAAACCTA -AAAtaagaaaggaaggaatcgaaccccctctaactggtttcaagccaata -tcataaccactatgtctttctcCATCAATTGAGGTATTAGTAAAAATTAC -ATGACTTTGTCAAAGTTAAATTATAGGTTAAACCCCTATATACCTCTATG -GCCTACCCCTTCCAACTAGGATTCCAAGACGCAACATCCCCTATTATAGA -AGAACTCCTACACTTCCACGACCACACACTAATAATCGTATTCCTAATTA -GCTCTCTAGTATTATATATTATCTCATCAATACTAACAACTAAATTAACC -CATACCAGCACCATAGATGCTCAAGAAGTAGAGACAATTTGAACGATTTT -ACCAGCCATCATCCTTATTCTAATCGCCCTCCCATCCCTACGAATTCTAT -ATATAATAGATGAAATCAATAATCCGTCCCTCACAGTCAAAACAATAGGC -CACCAATGATACTGAAGCTACGAGTATACCGATTACGAAGACTTGACCTT -TGACTCCTACATGATCCCCACATCAGACCTAAAACCAGGAGAATTACGTC -TTCTAGAAGTCGACAATCGAGTGGTTCTCCCCATAGAAATAACCATCCGA -ATGCTAATTTCATCCGAAGACGTCCTACACTCATGAGCTGTGCCCTCCCT -AGGCCTAAAAACAGACGCTATCCCTGGGCGCCTAAATCAGACAACTCTCG -TGGCCTCTCGACCAGGACTTTACTACGGTCAATGCTCAGAGATCTGCGGA -TCAAACCACAGCTTTATACCAATTGTCCTTGAACTAGTTCCACTGAAACA -CTTCGAAGAATGATCTGCATCAATATTATAAAGTCACTAAGAAGCTATTA -TAGCATTAACCTTTTAAGTTAAAGATTGAGGGTTCAACCCCCTCCCTAGT -GATATGCCACAGTTGGATACATCAACATGATTTATTAATATCGTCTCAAT -AATCCTAACTCTATTTATTGTATTTCAACTAAAAATCTCAAAGCACTCCT -ATCCGACACACCCAGAAGTAAAGACAACCAAAATAACAAAACACTCTGCC -CCTTGAGAATCAAAATGAACGAAAATCTATTCGCCTCTTTCGCTACCCCA -ACAATAGTAGGCCTCCCTATTGTAATTCTGATCATCATATTTCCCAGCAT -CCTATTCCCCTCACCCAACCGACTAATCAACAATCGCCTAATCTCAATTC -AACAATGGCTAGTCCAACTTACATCAAAACAAATAATAGCTATCCATAAC -AGCAAAGGACAAACCTGAACTCTTATACTCATATCACTGATCCTATTCAT -TGGCTCAACAAACTTATTAGGCCTACTACCTCACTCATTTACACCAACAA -CACAACTATCAATAAACCTAGGCATAGCTATTCCCCTATGGGCAGGGACA -GTATTCATAGGCTTTCGTCACAAAACAAAAGCAGCCCTAGCCCACTTTCT -ACCTCAAGGGACGCCCATTTTCCTCATCCCCATACTAGTAATTATCGAGA -CTATCAGCCTATTTATTCAACCTGTAGCCCTAGCCGTGCGGCTAACCGCT -AACATTACCGCCGGACACCTCCTAATACACCTCATCGGAGGGGCAACACT -AGCCCTCATAAGCATCAGCCCCTCAACAGCCCTTATTACGTTTATCATCC -TAATTCTACTAACTATCCTCGAATTCGCAGTAGCTATAATCCAAGCCTAC -GTATTCACTCTCCTGGTAAGCCTTTACTTACACGACAACACCTAATGACC -CACCAAACCCACGCTTACCACATAGTAAACCCCAGCCCATGACCACTTAC -AGGAGCCCTATCAGCCCTCCTGATAACATCAGGACTAGCCATGTGATTTC -ACTTTAACTCAACCTTACTTCTAGCTATAGGGCTATTAACTAACATCCTT -ACCATATATCAATGATGACGAGACATCATCCGAGAAAGCACATTCCAAGG -CCATCACACATCAATCGTTCAAAAGGGACTCCGATATGGCATAATCCTTT -TTATTATCTCAGAAGTCTTCTTCTTCTCTGGCTTCTTCTGAGCCTTTTAC -CACTCAAGCCTAGCCCCCACACCCGAACTAGGCGGCTGCTGACCACCCAC -AGGTATCCACCCCTTAAACCCCCTAGAAGTCCCCTTACTCAACACCTCAG -TGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCACCATAGCCTAATA -GAAGGAAACCGTAAAAATATGCTCCAAGGCCTATTCATCACAATTTCACT -AGGCGTATACTTCACCCTTCTCCAAGCCTCAGAATACTATGAAGCCTCAT -TTACTATTTCAGATGGAGTATACGGATCAACATTTTTCGTAGCAACAGGG -TTCCACGGACTACACGTAATTATCGGATCTACCTTCCTCATTGTATGTTT -CCTACGCCAACTAAAATTCCACTTTACATCCAGCCACCACTTCGGATTCG -AAGCAGCCGCTTGATACTGACACTTCGTCGACGTAGTCTGACTATTCTTG -TACGTCTCTATTTATTGATGAGGATCCTATTCTTTTAGTATTGACCAGTA -CAATTGACTTCCAATCAATCAGCTTCGGTATAACCCGAAAAAGAATAATA -AACCTCATACTGACACTCCTCACTAACACATTACTAGCCTCGCTACTCGT -ACTCATCGCATTCTGACTACCACAACTAAACATCTATGCAGAAAAAACCA -GCCCATATGAATGCGGATTTGACCCTATAGGGTCAGCACGCCTCCCCTTC -TCAATAAAATTTTTCTTAGTGGCCATTACATTTCTGCTATTCGACTTAGA -AATTGCCCTCCTATTACCCCTTCCATGAGCATCCCAAACAACTAACCTAA -ACACTATACTTATCATAGCACTAGTCCTAATCTCTCTTCTAGCCATCAGC -CTAGCCTACGAATGAACCCAAAAAGGACTAGAATGAACTGAGTATGGTAA -TTAGTTTAAACCAAAACAAATGATTTCGACTCATTAAACTATGATTAACT -TCATAATTACCAACATGTCACTAGTCCATATTAATATCTTCCTAGCATTC -ACAGTATCCCTCGTAGGCCTACTAATGTACCGATCCCACCTAATATCCTC -ACTCCTATGCCTAGAAGGAATAATACTATCACTATTCGTCATAGCAACCA -TAATAGTCCTAAACACCCACTTCACACTAGCTAGTATAATACCTATCATC -TTACTAGTATTTGCTGCCTGCGAACGAGCTCTAGGATTATCCCTACTAGT -CATAGTCTCCAATACTTATGGAGTAGACCACGTACAAAACCTTAACCTCC -TCCAATGCTAAAAATTATCATTCCCACAATCATACTTATGCCCCTTACAT -GACTATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATAGTCTA -TTAATCAGCCTTATCAGCCTATCCCTCCTAAACCAACCTAGCAACAATAG -CCTAAACTTCTCACTAATATTCTTCTCCGATCCCCTATCAGCCCCACTTC -TGGTGTTGACAACATGACTACTGCCACTAATACTCATAGCCAGCCAACAC -CATCTATCTAAGGAACCACTAATCCGAAAAAAACTCTACATCACCATGCT -AACCATACTTCAAACTTTCCTAATCATGACTTTTACCGCCACAGAACTAA -TCTCCTTCTACATCCTATTTGAAGCCACATTAGTTCCAACACTAATTATC -ATCACCCGCTGAGGCAACCAAACAGAACGCCTGAACGCAGGCCTCTACTT -CCTATTCTACACACTAATAGGTTCCCTCCCACTCTTAGTTGCACTAATCT -CTATCCAAAACCTAACAGGCTCACTAAACTTCCTATTAATTCAATACTGA -AACCAAGCACTACCCGACTCTTGATCCAATATTTTCCTATGACTAGCATG -TATAATAGCATTCATAGTCAAAATACCGGTATATGGTCTTCACCTCTGAC -TCCCAAAAGCCCATGTAGAAGCCCCAATTGCCGGATCCATAGTGCTAGCA -GCCATTCTACTAAAACTAGGAGGCTACGGAATACTACGAATTACAACAAT -ACTAAACCCCCAAACTAGCTTTATAGCCTACCCCTTCCTCATACTATCCC -TGTGAGGAATAATCATAACTAGTTCCATCTGCTTGCGACAAACCGATCTA -AAATCACTTATTGCATACTCCTCTGTCAGCCACATAGCCCTAGTAATCGT -AGCCGTCCTCATCCAAACACCATGAAGTTATATAGGAGCTACAGCCCTAA -TAATCGCTCACGGCCTTACATCATCAATACTATTCTGCCTGGCAAACTCA -AATTACGAACGTACCCATAGCCGAACTATAATCCTAGCCCGCGGGCTTCA -AACACTTCTTCCCCTTATAGCAGCCTGATGACTATTAGCCAGCCTAACCA -ACCTGGCCCTCCCTCCCAGCATTAACCTAATTGGAGAGCTATTCGTAGTA -ATATCATCATTCTCATGATCAAATATTACCATTATCCTAATAGGAGCCAA -TATCACCATCACCGCCCTCTACTCCCTATACATACTAATCACAACACAAC -GAGGGAAATACACACACCATATCAACAGCATTAAACCTTCATTTACACGA -GAAAACGCACTCATGGCCCTCCACATGACTCCCCTACTACTCCTATCACT -TAACCCTAAAATTATCCTAGGCTTTACGTACTGTAAATATAGTTTAACAA -AAACACTAGATTGTGGATCTAGAAACAGAAACTTAATATTTCTTATTTAC -CGAGAAAGTATGCAAGAACTGCTAATTCATGCCCCCATGTCCAACAAACA -TGGCTCTCTCAAACTTTTAAAGGATAGGAGCTATCCGTTGGTCTTAGGAA -CCAAAAAATTGGTGCAACTCCAAATAAAAGTAATCAACATGTTCTCCTCC -CTCATACTAGTTTCACTATTAGTACTAACCCTCCCAATCATATTATCAAT -CTTCAATACCTACAAAAACAGCACGTTCCCGCATCATGTAAAAAACACTA -TCTCATATGCCTTCATTACTAGCCTAATTCCCACTATAATATTTATTCAC -TCTGGACAAGAAACAATTATCTCAAACTGACACTGAATAACCATACAAAC -CCTCAAACTATCCCTAAGCTTCAAACTAGATTACTTCTCAATAATTTTCG -TACCAGTAGCCCTATTCGTAACATGATCTATTATGGAATTCTCCCTATGA -TACATGCACTCAGATCCTTACATTACTCGATTTTTTAAATACTTACTTAC -ATTCCTCATCACTATAATAATTCTAGTCACAGCTAACAACCTTTTCCAAC -TGTTCATCGGATGGGAGGGAGTAGGCATCATGTCATTCTTACTAATCGGA -TGATGATACGGCCGAACAGATGCCAACACCGCGGCCCTTCAAGCAATCCT -TTATAACCGCATCGGGGATATCGGCTTCATCATGGCCATAGCCTGATTCC -TATTCAACACCAACACATGAGACCTCCAACAAATCTTCATACTCGACCCC -AACCTTACCAACCTCCCGCTCCTAGGCCTCCTCCTAGCCGCAACTGGCAA -ATCCGCTCAATTTGGACTCCACCCATGACTTCCTTCAGCCATAGAGGGCC -CTACACCAGTCTCAGCCCTACTCCACTCCAGCACAATAGTTGTAGCAGGC -GTCTTCCTGCTAATCCGCTTCCATCCACTAATAGAAAACAACAAAACAAT -CCAGTCACTTACCCTATGCCTAGGAGCCATCACCACACTATTCACAGCAA -TCTGCGCACTCACTCAAAACGATATCAAAAAAATCATTGCTTTCTCCACC -TCCAGCCAACTAGGCCTGATAATCGTAACCATCGGTATCAATCAACCCTA -CCTAGCATTCCTCCACATTTGCACTCACGCATTCTTCAAAGCTATACTAT -TTATATGTTCCGGATCCATTATCCACAGCCTAAATGACGAGCAAGATATC -CGAAAAATAGGCGGACTATTTAATGCAATACCCTTCACCACCACATCTCT -AATTATTGGCAGCCTTGCACTCACCGGAATTCCTTTCCTCACAGGCTTCT -ACTCCAAAGACCTCATCATCGAAACCGCCAACACATCGTACACCAACGCC -TGAGCCCTACTAATAACTCTCATTGCCACATCCCTCACAGCTGTCTACAG -TACCCGAATCATCTTCTTTGCACTCCTAGGGCAACCCCGCTTCCTCCCTC -TGACCTCAATCAACGAAAATAACCCCTTTCTAATTAACTCCATCAAACGC -CTCTTAATTGGCAGCATTTTTGCCGGATTCTTCATCTCCAACAATATCTA -CCCCACAACCGTCCCAGAAATAACCATACCTACTTACATAAAACTCACCG -CCCTCGCAGTAACCATCCTAGGATTTACACTAGCCCTAGAACTAAGCTTG -ATAACCCATAACTTAAAACTAGAACACTCCACCAACGTATTCAAATTCTC -CAACCTCCTAGGATACTACCCAACAATTATACACCGACTCCCACCGCTCG -CTAACCTATCAATAAGCCAAAAATCAGCATCACTTCTACTAGACTCAATC -TGACTAGAAAACATCCTGCCAAAATCTATCTCCCAGTTCCAAATAAAAAC -CTCGATCCTAATTTCCACCCAAAAAGGACAAATCAAATTATATTTCCTCT -CATTCCTCATCACCCTTACCCTAAGCATACTACTTTTTAATCTCCACGAG -TAACCTCTAAAATTACCAAGACCCCAACAAGCAACGATCAACCAGTCACA -ATCACAACCCAAGCCCCATAACTATACAATGCAGCAGCCCCTATAATTTC -CTCACTAAACGCCCCAGAATCTCCAGTATCATAAATAGCTCAAGCCCCCA -CACCACTAAACTTAAACACTACCCCCACTTCCTCACTCTTCAGAACATAT -AAAACCAACATAACCTCCATCAACAACCCTAAAAGAAATACCCCCATAAC -AGTCGTATTAGACACCCATACCTCAGGATACTGCTCAGTAGCCATAGCCG -TTGTATAACCAAAAACAACCAACATTCCTCCCAAATAAATCAAAAACACC -ATCAACCCCAAAAAGGACCCTCCAAAATTCATAATAATACCACAACCTAC -CCCTCCACTTACAATCAGCACTAAACCCCCATAAATAGGTGAAGGTTTTG -AAGAAAACCCCACAAAACTAACAACAAAAATAACACTCAAAATAAACACA -ATATATGTCATCATTATTCCCACGTGGAATCTAACCACGACCAATGACAT -GAAAAATCATCGTTGTATTTCAACTATAAGAACACCAATGACAAACATCC -GGAAATCTCACCCACTAATTAAAATCATCAATCACTCTTTTATTGACCTA -CCAGCCCCCTCAAACATTTCATCATGATGAAACTTCGGCTCCCTCCTAGG -AATCTGCCTAATCCTCCAAATCTTAACAGGCCTATTCCTAGCCATACACT -ACACATCAGACACGACAACTGCCTTCTCATCCGTCACTCACATCTGCCGA -GACGTTAACTACGGATGAATTATTCGCTACCTCCATGCCAACGGAGCATC -AATATTTTTTATCTGCCTCTTCATTCACGTAGGACGCGGCCTCTACTACG -GCTCTTACACATTCCTAGAGACATGAAACATTGGAATCATCCTACTTTTC -ACAGTTATAGCTACAGCATTCATGGGCTATGTCCTACCATGAGGCCAAAT -ATCCTTTTGAGGAGCAACAGTCATCACGAACCTCCTATCAGCAATTCCCT -ACATCGGTACTACCCTCGTCGAGTGAATCTGAGGTGGATTCTCAGTAGAC -AAAGCCACCCTTACCCGATTTTTTGCTTTCCACTTCATCCTACCCTTCAT -CATCACAGCCCTGGTAGTCGTACATTTACTATTTCTTCACGAAACAGGAT -CTAATAACCCCTCAGGAATCCCATCCGATATGGACAAAATCCCATTCCAC -CCATATTATACAATTAAAGACATCCTAGGACTCCTCCTCCTGATCTTGCT -CCTACTAACTCTAGTATTATTCTCCCCCGACCTCCTAGGAGACCCAGACA -ACTACACCCCAGCTAACCCTCTCAGCACTCCCCCTCATATTAAACCAGAA -TGGTACTTCCTGTTTGCCTACGCCATCCTACGCTCCATTCCCAACAAACT -AGGCGGCGTATTAGCCCTAATCCTCTCCATCCTGATCCTAGCACTCATCC -CCACCCTCCACATATCAAAACAACGAAGCATAATATTCCGGCCTCTCAGC -CAATGCGTATTCTGACTCTTAGTGGCAGACTTACTGACACTAACATGAAT -CGGCGGACAGCCAGTGGAACACCCATACGTAATTATCGGCCAACTGGCCT -CAATCCTCTACTTCTCCCTAATTCTCATTTTTATACCACTCGCAAGCACC -ATCGAAAACAATCTTCTAAAATGAAGAGTCCCTGTAGTATATCGCACATT -ACCCTGGTCTTGTAAACCAGAAAAGGGGGAAAACGTTTCCTCCCAAGGAC -TATCAAGGAAGAAGCTCTAGCTCCACCATCAACACCCAAAGCTGAAATTC -TACTTAAACTATTCCTTGATTTCTTCCCCTAAACGACAACAATTTACCCT -CATGTGCTATGTCAGTATCAGATTATACCCCCACATAACACCATACCCAC -CTGACATGCAATATCTTATGAATGGCCTATGTACGTCGTGCATTAAATTG -TCTGCCCCATGAATAATAAGCATGTACATAATATCATTTATCTTACATAA -GTACATTATATTATTGATCGTGCATACCCCATCCAAGTCAAATCATTTCC -AGTCAACACGCATATCACAGCCCATGTTCCACGAGCTTAATCACCAAGCC -GCGGGAAATCAGCAACCCTCCCAACTACGTGTCCCAATCCTCGCTCCGGG -CCCATCCAAACGTGGGGGTTTCTACAATGAAACTATACCTGGCATCTGGT -TCTTTCTTCAGGGCCATTCCCACCCAACCTCGCCCATTCTTTCCCCTTAA -ATAAGACATCTCGATGGACTAATGACTAATCAGCCCATGCTCACACATAA -CTGTGATTTCATGCATTTGGTATCTTTTTATATTTGGGGATGCTATGACT -CAGCTATGGCCGTCAAAGGCCTCGACGCAGTCAATTAAATTGAAGCTGGA -CTTAAATTGAACGTTATTCCTCCGCATCAGCAACCATAAGGTGTTATTCA -GTCCATGGTAGCGGGACATAGGAAACAAgtgcacctgtgcacctgtgcac -ctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacct -gtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgt -gcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgc -acctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcac -ctgtgcacctACCCGCGCAGTAAGCAAGTAATATAGCTTTCTTAATCAAA -CCCCCCCTACCCCCCATTAAACTCCACATATGTACATTCAACACAATCTT -GCCAAACCCCAAAAACAAGACTAAACAATGCACAATACTTCATGAAGCTT -AACCCTCGCATGCCAACCATAATAACTCAACACACCTAACAATCTTAACA -GAACTTTCCCCCCGCCATTAATACCAACATGCTACTTTAATCAATAAAAT -TTCCATAGACAGGCATCCCCCTAGATCTAATTTTCTAAATCTGTCAACCC -TTCTTCCCCC