# HG changeset patch # User dddd # Date 1362093048 18000 # Node ID f1de190a2aef70ae7a49cbb9ca5df55a2a14a2b7 Uploaded diff -r 000000000000 -r f1de190a2aef sra_tools-ffdd41766195/datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-ffdd41766195/datatypes_conf.xml Thu Feb 28 18:10:48 2013 -0500 @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff -r 000000000000 -r f1de190a2aef sra_tools-ffdd41766195/fastq_dump.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-ffdd41766195/fastq_dump.xml Thu Feb 28 18:10:48 2013 -0500 @@ -0,0 +1,31 @@ + + format reads from NCBI SRA. + ./fastq-dump --log-level fatal --report never --accession '${input.name}' --stdout $split $aligned '$input' > $output + fastq-dump --version + + + + + + + + + + + + + + + + + + + + + + fastq-dump + + + This tool extracts fastqsanger reads from SRA archives using fastq-dump. The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. + + diff -r 000000000000 -r f1de190a2aef sra_tools-ffdd41766195/sam_dump.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-ffdd41766195/sam_dump.xml Thu Feb 28 18:10:48 2013 -0500 @@ -0,0 +1,33 @@ + + format reads from NCBI SRA. + sam-dump $header $aligned $primary '$input' > $output + sam-dump --version + + + + + + + + + + + + + + + + + + + + + + + sam-dump + + + This tool extracts SAM format reads from SRA archives using sam-dump. The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. +Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. + + diff -r 000000000000 -r f1de190a2aef sra_tools-ffdd41766195/sra.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-ffdd41766195/sra.py Thu Feb 28 18:10:48 2013 -0500 @@ -0,0 +1,46 @@ +""" +Sra class +""" + +import galaxy.datatypes.binary +from galaxy.datatypes.binary import Binary +import data, logging, binascii +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes import metadata +from galaxy.datatypes.sniff import * +from galaxy import eggs +import pkg_resources +pkg_resources.require( "bx-python" ) +import os, subprocess, tempfile +import struct + +class Sra( Binary ): + """ Sequence Read Archive (SRA) """ + file_ext = "sra" + + def __init__( self, **kwd ): + Binary.__init__( self, **kwd ) + def sniff( self, filename ): + # The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ. For details + # about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure + try: + header = open( filename ).read(8) + if binascii.b2a_hex( header ) == binascii.hexlify( 'NCBI.sra' ): + return True + return False + except: + return False + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "Binary sra file" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "Binary sra file (%s)" % ( data.nice_size( dataset.get_size() ) ) + +Binary.register_sniffable_binary_format("sra", "sra", Sra) diff -r 000000000000 -r f1de190a2aef sra_tools-ffdd41766195/sra_fetch.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-ffdd41766195/sra_fetch.py Thu Feb 28 18:10:48 2013 -0500 @@ -0,0 +1,30 @@ +from ftplib import FTP +import sys + +# Get accession number from argument +accession = sys.argv[1] +outfile = sys.argv[2] +prefix = accession[0:3] +middle = accession[3:6] +suffix = accession[6:9] + +# NCBI SRA FTP site +ftp = FTP('ftp-trace.ncbi.nih.gov') + +# Open file and transfer requested SRA as a file +# Try to change the working directory until it works +sra = open(outfile, 'wb') +ftp.login('ftp') +connected = False +while not connected: + try: + ftp.cwd('/sra/sra-instant/reads/ByRun/sra/' + + prefix + '/' + + prefix + middle + '/' + + prefix + middle + suffix + '/') + connected = True + except: + pass + +ftp.retrbinary('RETR ' + prefix + middle + suffix + '.sra', sra.write) +ftp.quit() diff -r 000000000000 -r f1de190a2aef sra_tools-ffdd41766195/sra_fetch.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-ffdd41766195/sra_fetch.xml Thu Feb 28 18:10:48 2013 -0500 @@ -0,0 +1,16 @@ + + by accession from NCBI SRA. + sra_fetch.py '$accession' '$output' + + + + + + + + sra_fetch.py + + + This tool fetches SRA archives from NCBI over FTP using the python ftplib. + +