Mercurial > repos > devteam > sam_to_bam
changeset 2:5a5f92086188 draft
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author | devteam |
---|---|
date | Tue, 13 Oct 2015 12:53:53 -0400 |
parents | 7095d651c95f |
children | 6bb1b0a4652f |
files | macros.xml sam_to_bam.py sam_to_bam.xml test-data/cached_locally/all_fasta.loc test-data/cached_locally/chr_m.fasta test-data/cached_locally/chr_m.fasta.fai test-data/cached_locally/fasta_indexes.loc test-data/chr_m.fasta.fai test-data/sam_to_bam_noheader_in2.sam test-data/sam_to_bam_out3.bam tool-data/fasta_indexes.loc.sample tool-data/sam_fa_indices.loc.sample tool-data/tool_data_table_conf.xml.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test tool_dependencies.xml |
diffstat | 16 files changed, 549 insertions(+), 325 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Oct 13 12:53:53 2015 -0400 @@ -0,0 +1,70 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.2">samtools</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @misc{SAM_def, + title={Definition of SAM/BAM format}, + url = {https://samtools.github.io/hts-specs/SAMv1.pdf},} + </citation> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + <citation type="doi">10.1093/bioinformatics/btr076</citation> + <citation type="doi">10.1093/bioinformatics/btr509</citation> + <citation type="bibtex"> + @misc{Danecek_et_al, + Author={Danecek, P., Schiffels, S., Durbin, R.}, + title={Multiallelic calling model in bcftools (-m)}, + url = {http://samtools.github.io/bcftools/call-m.pdf},} + </citation> + <citation type="bibtex"> + @misc{Durbin_VCQC, + Author={Durbin, R.}, + title={Segregation based metric for variant call QC}, + url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} + </citation> + <citation type="bibtex"> + @misc{Li_SamMath, + Author={Li, H.}, + title={Mathematical Notes on SAMtools Algorithms}, + url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} + </citation> + <citation type="bibtex"> + @misc{SamTools_github, + title={SAMTools GitHub page}, + url = {https://github.com/samtools/samtools},} + </citation> + </citations> + </xml> + <xml name="version_command"> + <version_command>samtools --version | head -n 1 | awk '{ print $2 }'</version_command> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + </xml> + <token name="@no-chrom-options@"> +----- + +.. class:: warningmark + +**No options available? How to re-detect metadata** + +If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps: + +1. Click on the **pencil** icon adjacent to the dataset in the history +2. A new menu will appear in the center pane of the interface +3. Click **Datatype** tab +4. Set **New Type** to **BAM** +5. Click **Save** + +The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down. + + </token> + +</macros>
--- a/sam_to_bam.py Thu Mar 27 14:04:25 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,197 +0,0 @@ -#!/usr/bin/env python -""" -Converts SAM data to sorted BAM data. -usage: sam_to_bam.py [options] - --input1: SAM file to be converted - --dbkey: dbkey value - --ref_file: Reference file if choosing from history - --output1: output dataset in bam format - --index_dir: GALAXY_DATA_INDEX_DIR -""" - -import optparse, os, sys, subprocess, tempfile, shutil, gzip -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from bx.cookbook import doc_optparse -from galaxy import util - -def stop_err( msg ): - sys.stderr.write( '%s\n' % msg ) - sys.exit() - -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - -def __main__(): - #Parse Command Line - parser = optparse.OptionParser() - parser.add_option( '', '--input1', dest='input1', help='The input SAM dataset' ) - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) - parser.add_option( '', '--output1', dest='output1', help='The output BAM dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) - ( options, args ) = parser.parse_args() - - # output version # of tool - try: - tmp = tempfile.NamedTemporaryFile().name - tmp_stdout = open( tmp, 'wb' ) - proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout ) - tmp_stdout.close() - returncode = proc.wait() - stdout = None - for line in open( tmp_stdout.name, 'rb' ): - if line.lower().find( 'version' ) >= 0: - stdout = line.strip() - break - if stdout: - sys.stdout.write( 'Samtools %s\n' % stdout ) - else: - raise Exception - except: - sys.stdout.write( 'Could not determine Samtools version\n' ) - - cached_seqs_pointer_file = '%s/sam_fa_indices.loc' % options.index_dir - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - tmp_dir = tempfile.mkdtemp( dir='.' ) - if not options.ref_file or options.ref_file == 'None': - # We're using locally cached reference sequences( e.g., /galaxy/data/equCab2/sam_index/equCab2.fa ). - # The indexes for /galaxy/data/equCab2/sam_index/equCab2.fa will be contained in - # a file named /galaxy/data/equCab2/sam_index/equCab2.fa.fai - fai_index_file_base = seq_path - fai_index_file_path = '%s.fai' % seq_path - if not os.path.exists( fai_index_file_path ): - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - stop_err( 'No sequences are available for build (%s), request them by reporting this error.' % options.dbkey ) - else: - try: - # Create indexes for history reference ( e.g., ~/database/files/000/dataset_1.dat ) using samtools faidx, which will: - # - index reference sequence in the FASTA format or extract subsequence from indexed reference sequence - # - if no region is specified, faidx will index the file and create <ref.fasta>.fai on the disk - # - if regions are specified, the subsequences will be retrieved and printed to stdout in the FASTA format - # - the input file can be compressed in the RAZF format. - # IMPORTANT NOTE: a real weakness here is that we are creating indexes for the history dataset - # every time we run this tool. It would be nice if we could somehow keep track of user's specific - # index files so they could be re-used. - fai_index_file_base = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - # At this point, fai_index_file_path will look something like /tmp/dataset_13.dat - os.symlink( options.ref_file, fai_index_file_base ) - fai_index_file_path = '%s.fai' % fai_index_file_base - command = 'samtools faidx %s' % fai_index_file_base - tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - if os.path.getsize( fai_index_file_path ) == 0: - raise Exception, 'Index file empty, there may be an error with your reference file or settings.' - except Exception, e: - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - stop_err( 'Error creating indexes from reference (%s), %s' % ( options.ref_file, str( e ) ) ) - try: - # Extract all alignments from the input SAM file to BAM format ( since no region is specified, all the alignments will be extracted ). - tmp_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir ) - tmp_aligns_file_name = tmp_aligns_file.name - tmp_aligns_file.close() - command = 'samtools view -bt %s -o %s %s' % ( fai_index_file_path, tmp_aligns_file_name, options.input1 ) - tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - except Exception, e: - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - stop_err( 'Error extracting alignments from (%s), %s' % ( options.input1, str( e ) ) ) - try: - # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command - # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted - # into memory ( controlled by option -m ). - tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir ) - tmp_sorted_aligns_file_name = tmp_sorted_aligns_file.name - tmp_sorted_aligns_file.close() - command = 'samtools sort %s %s' % ( tmp_aligns_file_name, tmp_sorted_aligns_file_name ) - tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - except Exception, e: - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - stop_err( 'Error sorting alignments from (%s), %s' % ( tmp_aligns_file_name, str( e ) ) ) - # Move tmp_aligns_file_name to our output dataset location - sorted_bam_file = '%s.bam' % tmp_sorted_aligns_file_name - shutil.move( sorted_bam_file, options.output1 ) - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - # check that there are results in the output file - if os.path.getsize( options.output1 ) > 0: - sys.stdout.write( 'SAM file converted to BAM' ) - else: - stop_err( 'Error creating sorted version of BAM file.' ) - -if __name__=="__main__": __main__()
--- a/sam_to_bam.xml Thu Mar 27 14:04:25 2014 -0400 +++ b/sam_to_bam.xml Tue Oct 13 12:53:53 2015 -0400 @@ -1,97 +1,89 @@ -<tool id="sam_to_bam" name="SAM-to-BAM" version="1.1.3"> - <description>converts SAM format to BAM format</description> - <requirements> - <requirement type="package" version="0.1.19">samtools</requirement> - </requirements> - <command interpreter="python"> - sam_to_bam.py - --input1=$source.input1 - #if $source.index_source == "history": - --dbkey=${ref_file.metadata.dbkey} - --ref_file=$source.ref_file - #else - --dbkey=${input1.metadata.dbkey} - #end if - --output1=$output1 - --index_dir=${GALAXY_DATA_INDEX_DIR} - </command> - <inputs> - <conditional name="source"> - <param name="index_source" type="select" label="Choose the source for the reference list"> - <option value="cached">Locally cached</option> - <option value="history">History</option> - </param> - <when value="cached"> - <param name="input1" type="data" format="sam" metadata_name="dbkey" label="SAM File to Convert"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" /> - </param> - </when> - <when value="history"> - <param name="input1" type="data" format="sam" label="Convert SAM file" /> - <param name="ref_file" type="data" format="fasta" metadata_name="dbkey" label="Using reference file" /> - </when> - </conditional> - </inputs> - <outputs> - <data format="bam" name="output1" label="${tool.name} on ${on_string}: converted BAM"> - <actions> - <conditional name="source.index_source"> - <when value="cached"> - <action type="metadata" name="dbkey"> - <option type="from_param" name="source.input1" param_attribute="dbkey" /> - </action> - </when> - <when value="history"> - <action type="metadata" name="dbkey"> - <option type="from_param" name="source.ref_file" param_attribute="dbkey" /> - </action> - </when> +<tool id="sam_to_bam" name="SAM-to-BAM" version="2.1"> + <description>convert SAM to BAM</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"></expand> + <expand macro="stdio"></expand> + <expand macro="version_command"></expand> + <command> + <![CDATA[ + #if $source.index_source == "history": + ln -s $source.ref_file input.fa && + samtools faidx input.fa && + #else + ln -s ${source.index.fields.path} input.fa && + ln -s ${source.index.fields.path}.fai input.fa.fai && + #end if + samtools view -b -@ \${GALAXY_SLOTS:-1} -t input.fa.fai "$input1" | samtools sort -O bam -@ \${GALAXY_SLOTS:-1} -o "$output1" -T temp + ]]> + </command> + <inputs> + <conditional name="source"> + <param label="Choose the source for the reference genome" name="index_source" type="select"> + <option value="cached">Use a built-in genome</option> + <option value="history">Use a genome from the history</option> + </param> + <when value="cached"> + <param format="sam" label="SAM file to convert" metadata_name="dbkey" name="input1" type="data"> + <validator type="unspecified_build" /> + <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="fasta_indexes" type="dataset_metadata_in_data_table" /> + </param> + <param label="Using reference genome" name="index" type="select"> + <options from_data_table="fasta_indexes"> + <filter column="1" key="dbkey" ref="input1" type="data_meta" /> + <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param format="sam" label="SAM file to convert" name="input1" type="data" /> + <param format="fasta" label="Using reference file" metadata_name="dbkey" name="ref_file" type="data" /> + </when> </conditional> - </actions> - </data> - </outputs> - <tests> - <test> - <!-- - Sam-to-Bam command: - cp test-data/chr_m.fasta . - samtools faidx chr_m.fasta - samtools view -hbt chr_m.fasta.fai -o unsorted.bam test-data/sam_to_bam_in1.sam - samtools sort unsorted.bam sam_to_bam_out1 - chr_m.fasta is the reference file (chrM from equCab2) - --> - <param name="index_source" value="history" /> - <param name="input1" value="sam_to_bam_in1.sam" ftype="sam" /> - <param name="ref_file" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" /> - <output name="output1" file="sam_to_bam_out1.bam" ftype="bam" /> - </test> - <test> - <!-- - Sam-to-Bam command: - samtools view -hbt chr_m.fasta.fai -o unsorted.bam test-data/sam_to_bam_in1.sam - samtools sort unsorted.bam sam_to_bam_out2 - chr_m.fasta is the reference file and the index chr_m.fasta.fai - these should be in the same directory, and chrM is from equCab2 - --> - <param name="index_source" value="cached" /> - <param name="input1" value="sam_to_bam_in1.sam" ftype="sam" dbkey="chrM" /> - <output name="output1" file="sam_to_bam_out2.bam" ftype="bam" /> - </test> - </tests> - <help> - + </inputs> + <outputs> + <data format="bam" label="${tool.name} on ${on_string}: converted BAM" name="output1"> + <actions> + <conditional name="source.index_source"> + <when value="cached"> + <action name="dbkey" type="metadata"> + <option name="source.input1" param_attribute="dbkey" type="from_param" /> + </action> + </when> + <when value="history"> + <action name="dbkey" type="metadata"> + <option name="source.ref_file" param_attribute="dbkey" type="from_param" /> + </action> + </when> + </conditional> + </actions> + </data> + </outputs> + <tests> + <test> + <param name="index_source" value="history" /> + <param name="input1" ftype="sam" value="sam_to_bam_in1.sam" /> + <param dbkey="equCab2" ftype="fasta" name="ref_file" value="chr_m.fasta" /> + <output file="sam_to_bam_out1.bam" ftype="bam" name="output1" /> + </test> + <test> + <param name="index_source" value="history" /> + <param name="input1" ftype="sam" value="sam_to_bam_noheader_in2.sam" /> + <param dbkey="equCab2" ftype="fasta" name="ref_file" value="chr_m.fasta" /> + <output file="sam_to_bam_out3.bam" ftype="bam" name="output1" /> + </test> + </tests> + <help> +<![CDATA[ **What it does** -This tool uses the SAMTools_ toolkit to produce an indexed BAM file based on a sorted input SAM file. +Converts SAM dataset into its binary, BAM, representation using ``samtools sort`` and ``view`` commands:: -.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml - ------- + samtools sort -O bam -o sorted_input.bam [INPUT SAM] + samtools view -b -h -o -T [REFERENCE GENOME] [OUTPUT BAM] sorted_input.bam -**Citation** - -For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ - +]]> </help> + <expand macro="citations"></expand> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/all_fasta.loc Tue Oct 13 12:53:53 2015 -0400 @@ -0,0 +1,1 @@ +chrM chr_m Horse (Equus caballus): equCab2 chrM ${__HERE__}/chrM.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/chr_m.fasta Tue Oct 13 12:53:53 2015 -0400 @@ -0,0 +1,335 @@ +>chrM +GTTAATGTAGCTTAATAATATAAAGCAAGGCACTGAAAATGCCTAGATGA +GTATTCTTACTCCATAAACACATAGGCTTGGTCCTAGCCTTTTTATTAGT +TATTAATAGAATTACACATGCAAGTATCCGCACCCCAGTGAGAATGCCCT +CTAAATCACGTCTCTACGATTAAAAGGAGCAGGTATCAAGCACACTAGAA +AGTAGCTCATAACACCTTGCTCAGCCACACCCCCACGGGACACAGCAGTG +ATAAAAATTAAGCTATGAACGAAAGTTCGACTAAGTCATATTAAATAAGG +GTTGGTAAATTTCGTGCCAGCCACCGCGGTCATACGATTAACCCAAATTA +ATAAATCTCCGGCGTAAAGCGTGTCAAAGACTAATACCAAAATAAAGTTA +AAACCCAGTTAAGCCGTAAAAAGCTACAACCAAAGTAAAATAGACTACGA +AAGTGACTTTAATACCTCTGACTACACGATAGCTAAGACCCAAACTGGGA +TTAGATACCCCACTATGCTTAGCCCTAAACTAAAATAGCTTACCACAACA +AAGCTATTCGCCAGAGTACTACTAGCAACAGCCTAAAACTCAAAGGACTT +GGCGGTGCTTTACATCCCTCTAGAGGAGCCTGTTCCATAATCGATAAACC +CCGATAAACCCCACCATCCCTTGCTAATTCAGCCTATATACCGCCATCTT +CAGCAAACCCTAAACAAGGTACCGAAGTAAGCACAAATATCCAACATAAA +AACGTTAGGTCAAGGTGTAGCCCATGGGATGGAGAGAAATGGGCTACATT +TTCTACCCTAAGAACAAGAACTTTAACCCGGACGAAAGTCTCCATGAAAC +TGGAGACTAAAGGAGGATTTAGCAGTAAATTAAGAATAGAGAGCTTAATT +GAATCAGGCCATGAAGCGCGCACACACCGCCCGTCACCCTCCTTAAATAT +CACAAATCATAACATAACATAAAACCGTGACCCAAACATATGAAAGGAGA +CAAGTCGTAACAAGGTAAGTATACCGGAAGGTGTACTTGGATAACCAAAG +TGTAGCTTAAACAAAGCATCCAGCTTACACCTAGAAGATTTCACTCAAAA +TGAACACTTTGAACTAAAGCTAGCCCAAACAATACCTAATTCAATTACCC +TTAGTCACTTAACTAAAACATTCACCAAACCATTAAAGTATAGGAGATAG +AAATTTTAACTTGGCGCTATAGAGAAAGTACCGTAAGGGAACGATGAAAG +ATGCATTAAAAGTACTAAACAGCAAAGCTTACCCCTTTTACCTTTTGCAT +AATGATTTAACTAGAATAAACTTAGCAAAGAGAACTTAAGCTAAGCACCC +CGAAACCAGACGAGCTACCTATGAACAGTTACAAATGAACCAACTCATCT +ATGTCGCAAAATAGTGAGAAGATTCGTAGGTAGAGGTGAAAAGCCCAACG +AGCCTGGTGATAGCTGGTTGTCCAGAAACAGAATTTCAGTTCAAATTTAA +ATTTACCTAAAAACTACTCAATTCTAATGTAAATTTAAATTATAGTCTAA +AAAGGTACAGCTTTTTAGATACAGGTTACAACCTTCATTAGAGAGTAAGA +ACAAGATAAACCCATAGTTGGCTTAAAAGCAGCCATCAATTAAGAAAGCG +TTCAAGCTCAACGACACATCTATCTTAATCCCAACAATCAACCCAAACTA +ACTCCTAATCTCATACTGGACTATTCTATCAACACATAGAAGCAATAATG +TTAATATGAGTAACAAGAATTATTTCTCCTTGCATAAGCTTATATCAGAA +CGAATACTCACTGATAGTTAACAACAAGATAGGGATAATCCAAAAACTAA +TCATCTATTTAAACCATTGTTAACCCAACACAGGCATGCATCTATAAGGA +AAGATTAAAAGAAGTAAAAGGAACTCGGCAAACACAAACCCCGCCTGTTT +ACCAAAAACATCACCTCTAGCATTTCCAGTATTAGAGGCACTGCCTGCCC +AGTGACATCTGTTtaaacggccgcggtatcctaaccgtgcaaaggtagca +taatcacttgttccctaaatagggacttgtatgaatggccacacgagggt +tttactgtctcttacttccaatcagtgaaattgaccttcccgtgaagagg +cgggaatgactaaataagacgagaagaccctatggagcttTAATTAACTG +ATTCACAAAAAACAACACACAAACCTTAACCTTCAGGGACAACAAAACTT +TTGATTGAATCAGCAATTTCGGTTGGGGTGACCTCGGAGAACAAAACAAC +CTCCGAGTGATTTAAATCCAGACTAACCAGTCAAAATATATAATCACTTA +TTGATCCAAACCATTGATCAACGGAACAAGTTACCCTAGGGATAACAGCG +CAATCCTATTCCAGAGTCCATATCGACAATTAGGGTTTACGACCTCGATG +TTGGATCAAGACATCCTAATGGTGCAACCGCTATTAAGGGTTCGTTTGTT +CAACGATTAAAGTCTTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGT +CGGTTTCTATCTATTCTATACTTTTCCCAGTACGAAAGGACAAGAAAAGT +AGGGCCCACTTTACAAGAAGCGCCCTCAAACTAATAGATGACATAATCTA +AATCTAACTAATTTATAACTTCTACCGCCCTAGAACAGGGCTCgttaggg +tggcagagcccggaaattgcataaaacttaaacctttacactcagaggtt +caactcctctccctaacaacaTGTTCATAATTAACGTCCTCCTCCTAATT +GTCCCAATCTTGCTCGCCGTAGCATTCCTCACACTAGTTGAACGAAAAGT +CTTAGGCTATATGCAACTTCGCAAAGGACCCAACATCGTAGGCCCCTATG +GCCTACTACAACCTATTGCCGATGCCCTCAAACTATTTATCAAAGAGCCA +CTACAACCACTAACATCATCGACATCCATATTCATCATCGCACCAATCCT +AGCCCTAACCCTGGCCTTAACCATATGAATCCCTCTGCCCATACCATACC +CACTAATCAACATAAACCTAGGAATTCTATTCATACTAGCCATGTCCAGC +CTAGCTGTCTACTCAATCCTTTGATCAGGATGGGCCTCAAACTCAAAATA +CGCCCTAATTGGAGCTCTACGAGCAGTAGCACAAACCATCTCATACGAAG +TAACTCTAGCAATCATCCTACTCTCAGTCCTCCTAATAAGCGGATCATTC +ACATTATCAACACTTATTATTACCCAAGAATACCTCTGATTAATCTTCCC +ATCATGACCCTTAGCCATAATGTGATTCATCTCAACATTAGCCGAAACCA +ACCGAGCTCCATTTGACCTAACAGAAGGAGAATCAGAACTCGTCTCTGGA +TTCAACGTTGAATACGCAGCCGGCCCATTTGCTCTATTCTTCCTAGCAGA +ATACGCAAACATCATCATGATAAACATCTTCACAACAACCCTATTTCTAG +GAGCATTTCACAACCCCTACCTGCCAGAACTCTACTCAATTAATTTCACC +ATTAAAGCTCTCCTTCTAACATGTTCCTTCCTATGAATCCGAGCATCCTA +CCCACGATTCCGATATGACCAACTTATACACCTCCTATGAAAGAACTTCC +TACCACTCACACTAGCCCTCTGCATATGACACGTCTCACTTCCAATCATA +CTATCCAGCATCCCACCACAAACATAGGAAATATGTCTGACAAAAGAGTT +ACTTTGATAGAGTAAAACATAGAGGCTCAAACCCTCTTATTTctagaact +acaggaattgaacctgctcctgagaattcaaaatcctccgtgctaccgaa +ttacaccatgtcctaCAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCAT +ACCCCGAAAATGTTGGATTACACCCTTCCCGTACTAATAAATCCCCTTAT +CTTCACAACTATTCTAATAACAGTTCTTCTAGGAACTATAATCGTTATAA +TAAGCTCACACTGACTAATAATCTGAATCGGATTTGAAATAAATCTACTA +GCCATTATCCCTATCCTAATAAAAAAGTACAATCCCCGAACCATAGAAGC +CTCCACCAAATATTTTCTAACCCAAGCCACCGCATCAATACTCCTCATAA +TAGCGATCATCATTAACCTCATACACTCAGGCCAATGAACAATCACAAAA +GTCTTCAACCCCACAGCGTCCATCATTATAACTTCAGCTCTCGCCATAAA +ACTTGGACTCACACCATTCCACTTCTGAGTACCCGAAGTCACACAGGGCA +TCTCATTAACATCAGGTCTCATCCTACTTACATGACAAAAACTAGCCCCA +ATATCAATCCTATATCAAATCTCACCCTCAATTAACCTAAATATCTTATT +AACTATAGCCGTACTGTCAATCCTAGTAGGAGGCTGAGGCGGTCTCAACC +AAACCCAACTACGAAAAATCATAGCATACTCGTCAATCGCGCATATAGGA +TGAATAACAGCTGTCCTAGTATATAACCCAACACTAACAATACTAAACAT +ATTAATTTACATTATAATAACACTCACAATATTCATACTATTTATCCACA +GCTCCTCTACTACAACACTATCACTCTCCCACACATGAAACAAAATACCT +CTAACCACTACACTAATCTTAATTACCTTACTATCCATAGGAGGCCTCCC +CCCACTATCAGGATTCATACCCAAATGAATAATCATTCAAGAGCTCACCA +AAAATAGCAGCATCATCCTCCCCACACTAATAGCCATTATAGCACTACTC +AACCTCTACTTCTACATACGACTAACCTATTCCACCTCACTGACCATATT +CCCATCCACAAACAACATAAAAATAAAATGACAATTCGAAACCAAACGAA +TTACTCTCTTACCCCCGTTAATTGTTATATCCTCCCTACTCCTCCCCCTA +ACCCCCATACTATCAATTTTGGACTAGGAATTTAGGTTAACATCCCAGAC +CAAGAGCCTTCAAAGCTCTAAGCAAGTGAATCCACTTAATTCCTGCATAC +TAAGGACTGCGAGACTCTATCTCACATCAATTGAACGCAAATCAAACTCT +TTTATTAAGCTAAGCCCTTACTAGATTGGTGGGCTACCATCCCACGAAAT +TTTAGTTAACAGCTAAATACCCTAATCAACTGGCTTCAATCTACTTCTCC +CGCCGCCTAGAAAAAAAGGCGGGAGAAGCCCCGGCAGAAATTGAAGCTGC +TCCTTTGAATTTGCAATTCAATGTGAAAATTCACCACGGGACTTGATAAG +AAGAGGATTCCAACCCCTGTCTTTAGATTTACAGTCTAATGCTTACTCAG +CCATCTTACCTATGTTCATCAACCGCTGACTATTTTCAACTAACCACAAA +GACATCGGCACTCTGTACCTCCTATTCGGCGCTTGAGCTGGAATAGTAGG +AACTGCCCTAAGCCTCCTAATCCGTGCTGAATTAGGCCAACCTGGGACCC +TACTAGGAGATGATCAGATCTACAATGTCATTGTAACCGCCCATGCATTC +GTAATAATTTTCTTTATGGTCATACCCATTATAATCGGAGGATTCGGAAA +CTGATTAGTCCCCCTGATAATTGGAGCACCTGATATAGCTTTCCCCCGAA +TAAACAACATAAGCTTCTGATTACTTCCCCCATCATTCCTACTTCTTCTC +GCTTCCTCAATAATTGAAGCAGGTGCCGGAACAGGCTGAACCGTATATCC +TCCTCTAGCTGGAAATCTGGCGCATGCAGGAGCCTCTGTTGACTTAACCA +TTTTCTCTCTCCACCTAGCTGGGGTGTCCTCGATTTTAGGTGCCATCAAC +TTTATTACCACAATCATTAACATAAAACCACCAGCCCTATCCCAATATCA +AACCCCCCTATTCGTTTGATCTGTCCTTATTACGGCAGTACTCCTTCTCC +TAGCCCTCCCGGTCCTAGCAGCAGGCATTACCATGCTTCTCACAGACCGT +AACCTGAACACTACTTTCTTCGACCCCGCAGGAGGAGGGGATCCAATCCT +TTATCAACACCTATTCTGATTCTTCGGACACCCCGAAGTCTATATTCTTA +TCCTACCAGGCTTCGGTATAATCTCACACATCGTCACATACTACTCAGGT +AAAAAGGAACCTTTTGGCTACATGGGTATAGTGTGAGCTATAATATCCAT +TGGCTTTCTAGGCTTCATCGTATGGGCTCACCACATGTTTACAGTAGGGA +TAGACGTTGACACACGAGCATACTTCACATCAGCTACCATAATCATCGCT +ATCCCTACTGGTGTAAAAGTATTCAGCTGACTAGCCACCCTGCACGGAGG +AAATATCAAATGATCTCCAGCTATACTCTGAGCTCTAGGCTTCATCTTCT +TATTCACAGTAGGAGGTCTAACAGGAATCGTCCTAGCTAACTCATCCCTA +GATATTGTTCTCCACGATACTTATTATGTAGTAGCACATTTCCATTATGT +CCTGTCTATAGGAGCAGTCTTCGCCATTATGGGGGGATTTGTACACTGAT +TCCCTCTATTCTCAGGATACACACTCAACCAAACCTGAGCAAAAATCCAC +TTTACAATTATATTCGTAGGGGTAAATATAACCTTCTTCCCACAACATTT +CCTTGGCCTCTCAGGAATGCCACGACGCTATTCTGATTATCCAGACGCAT +ATACAACATGAAATACCATCTCATCCATAGGATCTTTTATCTCACTTACA +GCAGTGATACTAATAATTTTCATAATTTGAGAAGCGTTCGCATCCAAACG +AGAAGTGTCTACAGTAGAATTAACCTCAACTAATCTGGAATGACTACACG +GATGCCCCCCACCATACCACACATTTGAAGAACCCACCTACGTAAACCTA +AAAtaagaaaggaaggaatcgaaccccctctaactggtttcaagccaata +tcataaccactatgtctttctcCATCAATTGAGGTATTAGTAAAAATTAC +ATGACTTTGTCAAAGTTAAATTATAGGTTAAACCCCTATATACCTCTATG +GCCTACCCCTTCCAACTAGGATTCCAAGACGCAACATCCCCTATTATAGA +AGAACTCCTACACTTCCACGACCACACACTAATAATCGTATTCCTAATTA +GCTCTCTAGTATTATATATTATCTCATCAATACTAACAACTAAATTAACC +CATACCAGCACCATAGATGCTCAAGAAGTAGAGACAATTTGAACGATTTT +ACCAGCCATCATCCTTATTCTAATCGCCCTCCCATCCCTACGAATTCTAT +ATATAATAGATGAAATCAATAATCCGTCCCTCACAGTCAAAACAATAGGC +CACCAATGATACTGAAGCTACGAGTATACCGATTACGAAGACTTGACCTT +TGACTCCTACATGATCCCCACATCAGACCTAAAACCAGGAGAATTACGTC +TTCTAGAAGTCGACAATCGAGTGGTTCTCCCCATAGAAATAACCATCCGA +ATGCTAATTTCATCCGAAGACGTCCTACACTCATGAGCTGTGCCCTCCCT +AGGCCTAAAAACAGACGCTATCCCTGGGCGCCTAAATCAGACAACTCTCG +TGGCCTCTCGACCAGGACTTTACTACGGTCAATGCTCAGAGATCTGCGGA +TCAAACCACAGCTTTATACCAATTGTCCTTGAACTAGTTCCACTGAAACA +CTTCGAAGAATGATCTGCATCAATATTATAAAGTCACTAAGAAGCTATTA +TAGCATTAACCTTTTAAGTTAAAGATTGAGGGTTCAACCCCCTCCCTAGT +GATATGCCACAGTTGGATACATCAACATGATTTATTAATATCGTCTCAAT +AATCCTAACTCTATTTATTGTATTTCAACTAAAAATCTCAAAGCACTCCT +ATCCGACACACCCAGAAGTAAAGACAACCAAAATAACAAAACACTCTGCC +CCTTGAGAATCAAAATGAACGAAAATCTATTCGCCTCTTTCGCTACCCCA +ACAATAGTAGGCCTCCCTATTGTAATTCTGATCATCATATTTCCCAGCAT +CCTATTCCCCTCACCCAACCGACTAATCAACAATCGCCTAATCTCAATTC +AACAATGGCTAGTCCAACTTACATCAAAACAAATAATAGCTATCCATAAC +AGCAAAGGACAAACCTGAACTCTTATACTCATATCACTGATCCTATTCAT +TGGCTCAACAAACTTATTAGGCCTACTACCTCACTCATTTACACCAACAA +CACAACTATCAATAAACCTAGGCATAGCTATTCCCCTATGGGCAGGGACA +GTATTCATAGGCTTTCGTCACAAAACAAAAGCAGCCCTAGCCCACTTTCT +ACCTCAAGGGACGCCCATTTTCCTCATCCCCATACTAGTAATTATCGAGA +CTATCAGCCTATTTATTCAACCTGTAGCCCTAGCCGTGCGGCTAACCGCT +AACATTACCGCCGGACACCTCCTAATACACCTCATCGGAGGGGCAACACT +AGCCCTCATAAGCATCAGCCCCTCAACAGCCCTTATTACGTTTATCATCC +TAATTCTACTAACTATCCTCGAATTCGCAGTAGCTATAATCCAAGCCTAC +GTATTCACTCTCCTGGTAAGCCTTTACTTACACGACAACACCTAATGACC +CACCAAACCCACGCTTACCACATAGTAAACCCCAGCCCATGACCACTTAC +AGGAGCCCTATCAGCCCTCCTGATAACATCAGGACTAGCCATGTGATTTC +ACTTTAACTCAACCTTACTTCTAGCTATAGGGCTATTAACTAACATCCTT +ACCATATATCAATGATGACGAGACATCATCCGAGAAAGCACATTCCAAGG +CCATCACACATCAATCGTTCAAAAGGGACTCCGATATGGCATAATCCTTT +TTATTATCTCAGAAGTCTTCTTCTTCTCTGGCTTCTTCTGAGCCTTTTAC +CACTCAAGCCTAGCCCCCACACCCGAACTAGGCGGCTGCTGACCACCCAC +AGGTATCCACCCCTTAAACCCCCTAGAAGTCCCCTTACTCAACACCTCAG +TGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCACCATAGCCTAATA +GAAGGAAACCGTAAAAATATGCTCCAAGGCCTATTCATCACAATTTCACT +AGGCGTATACTTCACCCTTCTCCAAGCCTCAGAATACTATGAAGCCTCAT +TTACTATTTCAGATGGAGTATACGGATCAACATTTTTCGTAGCAACAGGG +TTCCACGGACTACACGTAATTATCGGATCTACCTTCCTCATTGTATGTTT +CCTACGCCAACTAAAATTCCACTTTACATCCAGCCACCACTTCGGATTCG +AAGCAGCCGCTTGATACTGACACTTCGTCGACGTAGTCTGACTATTCTTG +TACGTCTCTATTTATTGATGAGGATCCTATTCTTTTAGTATTGACCAGTA +CAATTGACTTCCAATCAATCAGCTTCGGTATAACCCGAAAAAGAATAATA +AACCTCATACTGACACTCCTCACTAACACATTACTAGCCTCGCTACTCGT +ACTCATCGCATTCTGACTACCACAACTAAACATCTATGCAGAAAAAACCA +GCCCATATGAATGCGGATTTGACCCTATAGGGTCAGCACGCCTCCCCTTC +TCAATAAAATTTTTCTTAGTGGCCATTACATTTCTGCTATTCGACTTAGA +AATTGCCCTCCTATTACCCCTTCCATGAGCATCCCAAACAACTAACCTAA +ACACTATACTTATCATAGCACTAGTCCTAATCTCTCTTCTAGCCATCAGC +CTAGCCTACGAATGAACCCAAAAAGGACTAGAATGAACTGAGTATGGTAA +TTAGTTTAAACCAAAACAAATGATTTCGACTCATTAAACTATGATTAACT +TCATAATTACCAACATGTCACTAGTCCATATTAATATCTTCCTAGCATTC +ACAGTATCCCTCGTAGGCCTACTAATGTACCGATCCCACCTAATATCCTC +ACTCCTATGCCTAGAAGGAATAATACTATCACTATTCGTCATAGCAACCA +TAATAGTCCTAAACACCCACTTCACACTAGCTAGTATAATACCTATCATC +TTACTAGTATTTGCTGCCTGCGAACGAGCTCTAGGATTATCCCTACTAGT +CATAGTCTCCAATACTTATGGAGTAGACCACGTACAAAACCTTAACCTCC +TCCAATGCTAAAAATTATCATTCCCACAATCATACTTATGCCCCTTACAT +GACTATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATAGTCTA +TTAATCAGCCTTATCAGCCTATCCCTCCTAAACCAACCTAGCAACAATAG +CCTAAACTTCTCACTAATATTCTTCTCCGATCCCCTATCAGCCCCACTTC +TGGTGTTGACAACATGACTACTGCCACTAATACTCATAGCCAGCCAACAC +CATCTATCTAAGGAACCACTAATCCGAAAAAAACTCTACATCACCATGCT +AACCATACTTCAAACTTTCCTAATCATGACTTTTACCGCCACAGAACTAA +TCTCCTTCTACATCCTATTTGAAGCCACATTAGTTCCAACACTAATTATC +ATCACCCGCTGAGGCAACCAAACAGAACGCCTGAACGCAGGCCTCTACTT +CCTATTCTACACACTAATAGGTTCCCTCCCACTCTTAGTTGCACTAATCT +CTATCCAAAACCTAACAGGCTCACTAAACTTCCTATTAATTCAATACTGA +AACCAAGCACTACCCGACTCTTGATCCAATATTTTCCTATGACTAGCATG +TATAATAGCATTCATAGTCAAAATACCGGTATATGGTCTTCACCTCTGAC +TCCCAAAAGCCCATGTAGAAGCCCCAATTGCCGGATCCATAGTGCTAGCA +GCCATTCTACTAAAACTAGGAGGCTACGGAATACTACGAATTACAACAAT +ACTAAACCCCCAAACTAGCTTTATAGCCTACCCCTTCCTCATACTATCCC +TGTGAGGAATAATCATAACTAGTTCCATCTGCTTGCGACAAACCGATCTA +AAATCACTTATTGCATACTCCTCTGTCAGCCACATAGCCCTAGTAATCGT +AGCCGTCCTCATCCAAACACCATGAAGTTATATAGGAGCTACAGCCCTAA +TAATCGCTCACGGCCTTACATCATCAATACTATTCTGCCTGGCAAACTCA +AATTACGAACGTACCCATAGCCGAACTATAATCCTAGCCCGCGGGCTTCA +AACACTTCTTCCCCTTATAGCAGCCTGATGACTATTAGCCAGCCTAACCA +ACCTGGCCCTCCCTCCCAGCATTAACCTAATTGGAGAGCTATTCGTAGTA +ATATCATCATTCTCATGATCAAATATTACCATTATCCTAATAGGAGCCAA +TATCACCATCACCGCCCTCTACTCCCTATACATACTAATCACAACACAAC +GAGGGAAATACACACACCATATCAACAGCATTAAACCTTCATTTACACGA +GAAAACGCACTCATGGCCCTCCACATGACTCCCCTACTACTCCTATCACT +TAACCCTAAAATTATCCTAGGCTTTACGTACTGTAAATATAGTTTAACAA +AAACACTAGATTGTGGATCTAGAAACAGAAACTTAATATTTCTTATTTAC +CGAGAAAGTATGCAAGAACTGCTAATTCATGCCCCCATGTCCAACAAACA +TGGCTCTCTCAAACTTTTAAAGGATAGGAGCTATCCGTTGGTCTTAGGAA +CCAAAAAATTGGTGCAACTCCAAATAAAAGTAATCAACATGTTCTCCTCC +CTCATACTAGTTTCACTATTAGTACTAACCCTCCCAATCATATTATCAAT +CTTCAATACCTACAAAAACAGCACGTTCCCGCATCATGTAAAAAACACTA +TCTCATATGCCTTCATTACTAGCCTAATTCCCACTATAATATTTATTCAC +TCTGGACAAGAAACAATTATCTCAAACTGACACTGAATAACCATACAAAC +CCTCAAACTATCCCTAAGCTTCAAACTAGATTACTTCTCAATAATTTTCG +TACCAGTAGCCCTATTCGTAACATGATCTATTATGGAATTCTCCCTATGA +TACATGCACTCAGATCCTTACATTACTCGATTTTTTAAATACTTACTTAC +ATTCCTCATCACTATAATAATTCTAGTCACAGCTAACAACCTTTTCCAAC +TGTTCATCGGATGGGAGGGAGTAGGCATCATGTCATTCTTACTAATCGGA +TGATGATACGGCCGAACAGATGCCAACACCGCGGCCCTTCAAGCAATCCT +TTATAACCGCATCGGGGATATCGGCTTCATCATGGCCATAGCCTGATTCC +TATTCAACACCAACACATGAGACCTCCAACAAATCTTCATACTCGACCCC +AACCTTACCAACCTCCCGCTCCTAGGCCTCCTCCTAGCCGCAACTGGCAA +ATCCGCTCAATTTGGACTCCACCCATGACTTCCTTCAGCCATAGAGGGCC +CTACACCAGTCTCAGCCCTACTCCACTCCAGCACAATAGTTGTAGCAGGC +GTCTTCCTGCTAATCCGCTTCCATCCACTAATAGAAAACAACAAAACAAT +CCAGTCACTTACCCTATGCCTAGGAGCCATCACCACACTATTCACAGCAA +TCTGCGCACTCACTCAAAACGATATCAAAAAAATCATTGCTTTCTCCACC +TCCAGCCAACTAGGCCTGATAATCGTAACCATCGGTATCAATCAACCCTA +CCTAGCATTCCTCCACATTTGCACTCACGCATTCTTCAAAGCTATACTAT +TTATATGTTCCGGATCCATTATCCACAGCCTAAATGACGAGCAAGATATC +CGAAAAATAGGCGGACTATTTAATGCAATACCCTTCACCACCACATCTCT +AATTATTGGCAGCCTTGCACTCACCGGAATTCCTTTCCTCACAGGCTTCT +ACTCCAAAGACCTCATCATCGAAACCGCCAACACATCGTACACCAACGCC +TGAGCCCTACTAATAACTCTCATTGCCACATCCCTCACAGCTGTCTACAG +TACCCGAATCATCTTCTTTGCACTCCTAGGGCAACCCCGCTTCCTCCCTC +TGACCTCAATCAACGAAAATAACCCCTTTCTAATTAACTCCATCAAACGC +CTCTTAATTGGCAGCATTTTTGCCGGATTCTTCATCTCCAACAATATCTA +CCCCACAACCGTCCCAGAAATAACCATACCTACTTACATAAAACTCACCG +CCCTCGCAGTAACCATCCTAGGATTTACACTAGCCCTAGAACTAAGCTTG +ATAACCCATAACTTAAAACTAGAACACTCCACCAACGTATTCAAATTCTC +CAACCTCCTAGGATACTACCCAACAATTATACACCGACTCCCACCGCTCG +CTAACCTATCAATAAGCCAAAAATCAGCATCACTTCTACTAGACTCAATC +TGACTAGAAAACATCCTGCCAAAATCTATCTCCCAGTTCCAAATAAAAAC +CTCGATCCTAATTTCCACCCAAAAAGGACAAATCAAATTATATTTCCTCT +CATTCCTCATCACCCTTACCCTAAGCATACTACTTTTTAATCTCCACGAG +TAACCTCTAAAATTACCAAGACCCCAACAAGCAACGATCAACCAGTCACA +ATCACAACCCAAGCCCCATAACTATACAATGCAGCAGCCCCTATAATTTC +CTCACTAAACGCCCCAGAATCTCCAGTATCATAAATAGCTCAAGCCCCCA +CACCACTAAACTTAAACACTACCCCCACTTCCTCACTCTTCAGAACATAT +AAAACCAACATAACCTCCATCAACAACCCTAAAAGAAATACCCCCATAAC +AGTCGTATTAGACACCCATACCTCAGGATACTGCTCAGTAGCCATAGCCG +TTGTATAACCAAAAACAACCAACATTCCTCCCAAATAAATCAAAAACACC +ATCAACCCCAAAAAGGACCCTCCAAAATTCATAATAATACCACAACCTAC +CCCTCCACTTACAATCAGCACTAAACCCCCATAAATAGGTGAAGGTTTTG +AAGAAAACCCCACAAAACTAACAACAAAAATAACACTCAAAATAAACACA +ATATATGTCATCATTATTCCCACGTGGAATCTAACCACGACCAATGACAT +GAAAAATCATCGTTGTATTTCAACTATAAGAACACCAATGACAAACATCC +GGAAATCTCACCCACTAATTAAAATCATCAATCACTCTTTTATTGACCTA +CCAGCCCCCTCAAACATTTCATCATGATGAAACTTCGGCTCCCTCCTAGG +AATCTGCCTAATCCTCCAAATCTTAACAGGCCTATTCCTAGCCATACACT +ACACATCAGACACGACAACTGCCTTCTCATCCGTCACTCACATCTGCCGA +GACGTTAACTACGGATGAATTATTCGCTACCTCCATGCCAACGGAGCATC +AATATTTTTTATCTGCCTCTTCATTCACGTAGGACGCGGCCTCTACTACG +GCTCTTACACATTCCTAGAGACATGAAACATTGGAATCATCCTACTTTTC +ACAGTTATAGCTACAGCATTCATGGGCTATGTCCTACCATGAGGCCAAAT +ATCCTTTTGAGGAGCAACAGTCATCACGAACCTCCTATCAGCAATTCCCT +ACATCGGTACTACCCTCGTCGAGTGAATCTGAGGTGGATTCTCAGTAGAC +AAAGCCACCCTTACCCGATTTTTTGCTTTCCACTTCATCCTACCCTTCAT +CATCACAGCCCTGGTAGTCGTACATTTACTATTTCTTCACGAAACAGGAT +CTAATAACCCCTCAGGAATCCCATCCGATATGGACAAAATCCCATTCCAC +CCATATTATACAATTAAAGACATCCTAGGACTCCTCCTCCTGATCTTGCT +CCTACTAACTCTAGTATTATTCTCCCCCGACCTCCTAGGAGACCCAGACA +ACTACACCCCAGCTAACCCTCTCAGCACTCCCCCTCATATTAAACCAGAA +TGGTACTTCCTGTTTGCCTACGCCATCCTACGCTCCATTCCCAACAAACT +AGGCGGCGTATTAGCCCTAATCCTCTCCATCCTGATCCTAGCACTCATCC +CCACCCTCCACATATCAAAACAACGAAGCATAATATTCCGGCCTCTCAGC +CAATGCGTATTCTGACTCTTAGTGGCAGACTTACTGACACTAACATGAAT +CGGCGGACAGCCAGTGGAACACCCATACGTAATTATCGGCCAACTGGCCT +CAATCCTCTACTTCTCCCTAATTCTCATTTTTATACCACTCGCAAGCACC +ATCGAAAACAATCTTCTAAAATGAAGAGTCCCTGTAGTATATCGCACATT +ACCCTGGTCTTGTAAACCAGAAAAGGGGGAAAACGTTTCCTCCCAAGGAC +TATCAAGGAAGAAGCTCTAGCTCCACCATCAACACCCAAAGCTGAAATTC +TACTTAAACTATTCCTTGATTTCTTCCCCTAAACGACAACAATTTACCCT +CATGTGCTATGTCAGTATCAGATTATACCCCCACATAACACCATACCCAC +CTGACATGCAATATCTTATGAATGGCCTATGTACGTCGTGCATTAAATTG +TCTGCCCCATGAATAATAAGCATGTACATAATATCATTTATCTTACATAA +GTACATTATATTATTGATCGTGCATACCCCATCCAAGTCAAATCATTTCC +AGTCAACACGCATATCACAGCCCATGTTCCACGAGCTTAATCACCAAGCC +GCGGGAAATCAGCAACCCTCCCAACTACGTGTCCCAATCCTCGCTCCGGG +CCCATCCAAACGTGGGGGTTTCTACAATGAAACTATACCTGGCATCTGGT +TCTTTCTTCAGGGCCATTCCCACCCAACCTCGCCCATTCTTTCCCCTTAA +ATAAGACATCTCGATGGACTAATGACTAATCAGCCCATGCTCACACATAA +CTGTGATTTCATGCATTTGGTATCTTTTTATATTTGGGGATGCTATGACT +CAGCTATGGCCGTCAAAGGCCTCGACGCAGTCAATTAAATTGAAGCTGGA +CTTAAATTGAACGTTATTCCTCCGCATCAGCAACCATAAGGTGTTATTCA +GTCCATGGTAGCGGGACATAGGAAACAAgtgcacctgtgcacctgtgcac +ctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacct +gtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgt +gcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgc +acctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcac +ctgtgcacctACCCGCGCAGTAAGCAAGTAATATAGCTTTCTTAATCAAA +CCCCCCCTACCCCCCATTAAACTCCACATATGTACATTCAACACAATCTT +GCCAAACCCCAAAAACAAGACTAAACAATGCACAATACTTCATGAAGCTT +AACCCTCGCATGCCAACCATAATAACTCAACACACCTAACAATCTTAACA +GAACTTTCCCCCCGCCATTAATACCAACATGCTACTTTAATCAATAAAAT +TTCCATAGACAGGCATCCCCCTAGATCTAATTTTCTAAATCTGTCAACCC +TTCTTCCCCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/chr_m.fasta.fai Tue Oct 13 12:53:53 2015 -0400 @@ -0,0 +1,1 @@ +chrM 16660 6 50 51
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/fasta_indexes.loc Tue Oct 13 12:53:53 2015 -0400 @@ -0,0 +1,1 @@ +chrM chr_m Horse (Equus caballus): equCab2 chrM ${__HERE__}/chrM.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/chr_m.fasta.fai Tue Oct 13 12:53:53 2015 -0400 @@ -0,0 +1,1 @@ +chrM 16660 6 50 51
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sam_to_bam_noheader_in2.sam Tue Oct 13 12:53:53 2015 -0400 @@ -0,0 +1,10 @@ +HWI-EAS91_1_30788AAXX:1:1:1513:715 16 chrM 9563 25 36M * 0 0 CTGACTACCACAACTAAACATCTATGCNNAAAAAAC I+-II?IDIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1698:516 16 chrM 2735 25 36M * 0 0 TTTACACTCAGAGGTTCAACTCCTCTCNNTAACAAC I9IIIII5IIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1491:637 16 chrM 10864 25 36M * 0 0 TGTAGAAGCCCCAATTGCCGGATCCATNNTGCTAGC DBAIIIIIIIIIIIFIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1711:249 16 chrM 10617 25 36M * 0 0 ACCAAACAGAACGCCTGAACGCAGGCCNNTACTTCC IIIIIIIIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1634:211 0 chrM 9350 25 36M * 0 0 GAAGCAGNNGCTTGATACTGACACTTCGTCGACGTA IIIIIII""IIIIIIIIIIIIIIIIIIIIII9IIDF NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1218:141 16 chrM 14062 25 36M * 0 0 ACAAAACTAACAACAAAAATAACACTCNNAATAAAC I+IIII1IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1398:854 16 chrM 3921 25 36M * 0 0 CACCCTTCCCGTACTAATAAATCCCCTNNTCTTCAC IIIII=AIIIIIIIIIIIIIIBIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1310:991 16 chrM 10002 25 36M * 0 0 CTCCTATGCCTAGAAGGAATAATACTANNACTATTC I:2IEI:IIDIIIIII4IIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1716:413 0 chrM 6040 25 36M * 0 0 GATCCAANNCTTTATCAACACCTATTCTGATTCTTC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1630:59 16 chrM 12387 25 36M * 0 0 TCATACTCGACCCCAACCTTACCAACCNNCCGCTCC FIIHII;IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Tue Oct 13 12:53:53 2015 -0400 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- a/tool-data/sam_fa_indices.loc.sample Thu Mar 27 14:04:25 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Samtools indexed sequences data files. You will need -#to create these data files and then create a sam_fa_indices.loc file -#similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The sam_fa_indices.loc -#file has this format (white space characters are TAB characters): -# -#index <seq> <location> -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/sam/, -#then the sam_fa_indices.loc entry would look like this: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -# -#and your /depot/data2/galaxy/sam/ directory -#would contain hg18.fa and hg18.fa.fai files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai -# -#Your sam_fa_indices.loc file should include an entry per line for -#each index set you have stored. The file in the path does actually -#exist, but it should never be directly used. Instead, the name serves -#as a prefix for the index file. For example: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -#index hg19 /depot/data2/galaxy/sam/hg19.fa
--- a/tool-data/tool_data_table_conf.xml.sample Thu Mar 27 14:04:25 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> -<tables> - <!-- Location of SAMTools indexes and other files --> - <table name="sam_fa_indexes" comment_char="#"> - <columns>line_type, value, path</columns> - <file path="tool-data/sam_fa_indices.loc" /> - </table> -</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Oct 13 12:53:53 2015 -0400 @@ -0,0 +1,7 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue Oct 13 12:53:53 2015 -0400 @@ -0,0 +1,10 @@ +<tables> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/cached_locally/fasta_indexes.loc" /> + </table> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/cached_locally/all_fasta.loc" /> + </table> +</tables> \ No newline at end of file
--- a/tool_dependencies.xml Thu Mar 27 14:04:25 2014 -0400 +++ b/tool_dependencies.xml Tue Oct 13 12:53:53 2015 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="samtools" version="0.1.19"> - <repository changeset_revision="233326db3402" name="package_samtools_0_1_19" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <package name="samtools" version="1.2"> + <repository changeset_revision="192f00129358" name="package_samtools_1_2" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>