# HG changeset patch # User Matt Shirley # Date 1378260150 14400 # Node ID 88a13a2fd23fb564e10b0e0be41683be697d6bcf # Parent 3cb113532930d0e7e4139e37863b461e978a2512 Disable caching, update descriptions, prune deprecated tools. diff -r 3cb113532930 -r 88a13a2fd23f fastq_dump.xml --- a/fastq_dump.xml Mon Aug 05 12:54:03 2013 -0400 +++ b/fastq_dump.xml Tue Sep 03 22:02:30 2013 -0400 @@ -116,6 +116,7 @@ This tool extracts reads from SRA archives using fastq-dump. + Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. diff -r 3cb113532930 -r 88a13a2fd23f sam_dump.xml --- a/sam_dump.xml Mon Aug 05 12:54:03 2013 -0400 +++ b/sam_dump.xml Tue Sep 03 22:02:30 2013 -0400 @@ -92,6 +92,7 @@ This tool extracts reads from sra archives using sam-dump. + Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. diff -r 3cb113532930 -r 88a13a2fd23f sra_fetch.py --- a/sra_fetch.py Mon Aug 05 12:54:03 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -import sys -import os -from ftplib import FTP -try: - import argparse -except ImportError: - import getopt - -def main(args): - """ Get accession number from argument """ - if module_exists('argparse'): - args = dict(vars(args)) - prefix = args['accession'][0:3] - middle = args['accession'][3:6] - suffix = args['accession'][6:9] - - ftp = FTP('ftp-trace.ncbi.nih.gov') - # Open file and transfer requested SRA as a file - # Try to change the working directory until it works - with open(args['out'], 'wb') as sra: - ftpPath = os.path.join('/sra/sra-instant/reads/ByRun/sra/', - prefix, - prefix + middle, - prefix + middle + suffix) - ftp.login('ftp') - connected = False - while not connected: - try: - ftp.cwd(ftpPath) - connected = True - except: - pass - ftp.retrbinary('RETR ' + prefix + middle + suffix + '.sra', sra.write) - ftp.quit() - -def module_exists(module_name): - try: - __import__(module_name) - except ImportError: - return False - else: - return True - -def arguments(): - parser = argparse.ArgumentParser(description="Download an SRA from the NCBI SRA FTP") - parser.add_argument('-a', '--accession', type=str, help="SRA accession ex: SRR000001") - parser.add_argument('-o', '--out', type=str, help="Name for SRA file ") - args = parser.parse_args() - return args - -if __name__ == "__main__": - if module_exists('argparse'): - args = arguments() - else: - ## fall back to getopt for python < 2.7 - args = dict() - options, remainder = getopt.getopt(sys.argv[1:], 'a:o:', ['accession=', 'out=']) - for opt, arg in options: - if opt in ('-a', '--accession'): - args['accession'] = arg - elif opt in ('-o', '--out'): - args['out'] = arg - - main(args) diff -r 3cb113532930 -r 88a13a2fd23f sra_fetch.xml --- a/sra_fetch.xml Mon Aug 05 12:54:03 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ - - by accession from NCBI SRA. - sra_fetch.py --accession '$accession' --out '$output' - - - - - - - - This tool fetches sra archives by accession from NCBI over FTP. - - diff -r 3cb113532930 -r 88a13a2fd23f sra_pileup.xml --- a/sra_pileup.xml Mon Aug 05 12:54:03 2013 -0400 +++ b/sra_pileup.xml Tue Sep 03 22:02:30 2013 -0400 @@ -44,6 +44,7 @@ This tool produces pileup format from sra archives using sra-pileup. + Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. The sra-pileup program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. diff -r 3cb113532930 -r 88a13a2fd23f sra_validate.xml --- a/sra_validate.xml Mon Aug 05 12:54:03 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ - - and report any errors. - vdb-validate -md5 '$input' 2> $output - vdb-validate --version - - - - - - - - - - - sra_toolkit - - - This tool checks the data integrity of an sra archive using vdb-validate. - The vdb-validate program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. - Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. - - diff -r 3cb113532930 -r 88a13a2fd23f tool_dependencies.xml --- a/tool_dependencies.xml Mon Aug 05 12:54:03 2013 -0400 +++ b/tool_dependencies.xml Tue Sep 03 22:02:30 2013 -0400 @@ -10,13 +10,16 @@ $INSTALL_DIR/bin $INSTALL_DIR/ncbi $INSTALL_DIR/ncbi/public - sed -i -e "s|\$(HOME)|$INSTALL_DIR|g" bin64/ncbi/default.kfg + sed -i -e "s|\$(HOME)|$INSTALL_DIR|g" -e "s|cache-enabled = \"true\"|cache-enabled = \"false\"|" bin64/ncbi/default.kfg cp --recursive --dereference bin64/* $INSTALL_DIR/bin $INSTALL_DIR/bin - Tools from NCBI SRA Toolkit for extracting FASTQ and SAM format reads from SRA format archives. Various tools for working with SRA from NCBI. -This software release was designed to run under Linux, MacOSX operating systems on Intel x86-compatible 64 bit architectures. + Tools from NCBI SRA Toolkit for extracting FASTQ and SAM format reads from SRA format archives. +This software release was designed to run under Linux, MacOSX operating systems on Intel x86-compatible 64 bit architectures. +When running on Amazon EC2, be sure to keep in mind the size limitation of EBS storage devices when requesting a +download of a large SRA data set. + Build Requirements: ar bash @@ -25,6 +28,10 @@ libxml2 libcurl4 zlib + +On a debian based Linux OS use: + + apt-get install build-essential libxml2-dev libcurl4-openssl-dev zlib-dev