changeset 46:88a13a2fd23f draft

Disable caching, update descriptions, prune deprecated tools.
author Matt Shirley <mdshw5@gmail.com>
date Tue, 03 Sep 2013 22:02:30 -0400
parents 3cb113532930
children 04cc8176e86f
files fastq_dump.xml sam_dump.xml sra_fetch.py sra_fetch.xml sra_pileup.xml sra_validate.xml tool_dependencies.xml
diffstat 7 files changed, 13 insertions(+), 102 deletions(-) [+]
line wrap: on
line diff
--- a/fastq_dump.xml	Mon Aug 05 12:54:03 2013 -0400
+++ b/fastq_dump.xml	Tue Sep 03 22:02:30 2013 -0400
@@ -116,6 +116,7 @@
   </requirements>
   <help>
     This tool extracts reads from SRA archives using fastq-dump. 
+    Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. 
     The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
     Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
   </help>
--- a/sam_dump.xml	Mon Aug 05 12:54:03 2013 -0400
+++ b/sam_dump.xml	Tue Sep 03 22:02:30 2013 -0400
@@ -92,6 +92,7 @@
   </requirements>
   <help>
     This tool extracts reads from sra archives using sam-dump. 
+    Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. 
     The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
     Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
   </help>
--- a/sra_fetch.py	Mon Aug 05 12:54:03 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-import sys
-import os
-from ftplib import FTP
-try:
-    import argparse
-except ImportError:
-    import getopt
-
-def main(args):
-    """ Get accession number from argument """
-    if module_exists('argparse'):
-        args = dict(vars(args))
-    prefix = args['accession'][0:3]
-    middle = args['accession'][3:6]
-    suffix = args['accession'][6:9]
-
-    ftp = FTP('ftp-trace.ncbi.nih.gov')
-    # Open file and transfer requested SRA as a file
-    # Try to change the working directory until it works
-    with open(args['out'], 'wb') as sra:
-        ftpPath = os.path.join('/sra/sra-instant/reads/ByRun/sra/',
-                               prefix,
-                               prefix + middle,
-                               prefix + middle + suffix)
-        ftp.login('ftp')
-        connected = False
-        while not connected:
-            try:
-                ftp.cwd(ftpPath)
-                connected = True
-            except:
-                pass
-        ftp.retrbinary('RETR ' + prefix + middle + suffix + '.sra', sra.write)
-        ftp.quit()
-
-def module_exists(module_name):
-    try:
-        __import__(module_name)
-    except ImportError:
-        return False
-    else:
-        return True
-
-def arguments():
-    parser = argparse.ArgumentParser(description="Download an SRA from the NCBI SRA FTP")
-    parser.add_argument('-a', '--accession', type=str, help="SRA accession ex: SRR000001")
-    parser.add_argument('-o', '--out', type=str, help="Name for SRA file ")
-    args = parser.parse_args()
-    return args
-
-if __name__ == "__main__":
-    if module_exists('argparse'):
-        args = arguments()
-    else:
-        ## fall back to getopt for python < 2.7
-        args = dict()
-        options, remainder = getopt.getopt(sys.argv[1:], 'a:o:', ['accession=', 'out='])
-        for opt, arg in options:
-            if opt in ('-a', '--accession'):
-                args['accession'] = arg
-            elif opt in ('-o', '--out'):
-                args['out'] = arg
-
-    main(args)
--- a/sra_fetch.xml	Mon Aug 05 12:54:03 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-<tool id="sra_fetch" name="Fetch SRA archive" version="1.0.3">
-  <description> by accession from NCBI SRA.</description>
-  <command interpreter="python">sra_fetch.py --accession '$accession' --out '$output'</command>
-  <inputs>
-    <param name="accession" size="13" type="text" value="SRR000001" label="sra run accession"/>
-  </inputs>
-  <outputs>
-    <data format="sra" name="output" label="${accession.value}"/>
-  </outputs>
-  <help>
-    This tool fetches sra archives by accession from NCBI over FTP.
-  </help>
-</tool>
--- a/sra_pileup.xml	Mon Aug 05 12:54:03 2013 -0400
+++ b/sra_pileup.xml	Tue Sep 03 22:02:30 2013 -0400
@@ -44,6 +44,7 @@
   </requirements>
   <help>
     This tool produces pileup format from sra archives using sra-pileup. 
+    Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. 
     The sra-pileup program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
     Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
   </help>
--- a/sra_validate.xml	Mon Aug 05 12:54:03 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-<tool id="sra_validate" name="Validate sra" version="1.0.2">
-  <description> and report any errors.</description>
-  <command>vdb-validate -md5 '$input' 2> $output </command>
-  <version_string>vdb-validate --version</version_string>
-  <inputs>
-    <param format="sra" name="input" type="data" label="sra archive"/>
-  </inputs>
-  <outputs>
-    <data format="txt" name="output"/>
-  </outputs>
-  <stdio>
-    <exit_code range="127" level="fatal" description="Cannot find vdb-validate binary"/>
-  </stdio>
-  <requirements>
-    <requirement type="package" version="2.3.2-4">sra_toolkit</requirement>
-  </requirements>
-  <help>
-    This tool checks the data integrity of an sra archive using vdb-validate. 
-    The vdb-validate program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
-    Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
-  </help>
-</tool>
--- a/tool_dependencies.xml	Mon Aug 05 12:54:03 2013 -0400
+++ b/tool_dependencies.xml	Tue Sep 03 22:02:30 2013 -0400
@@ -10,13 +10,16 @@
 	<action type="make_directory">$INSTALL_DIR/bin</action>	
 	<action type="make_directory">$INSTALL_DIR/ncbi</action>	
 	<action type="make_directory">$INSTALL_DIR/ncbi/public</action>	
-	<action type="shell_command">sed -i -e "s|\$(HOME)|$INSTALL_DIR|g" bin64/ncbi/default.kfg</action>
+	<action type="shell_command">sed -i -e "s|\$(HOME)|$INSTALL_DIR|g" -e "s|cache-enabled = \"true\"|cache-enabled = \"false\"|" bin64/ncbi/default.kfg</action>
 	<action type="shell_command">cp --recursive --dereference bin64/* $INSTALL_DIR/bin</action>
 	<action type="set_environment"><environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable></action>
       </actions>
     </install>
-    <readme>Tools from NCBI SRA Toolkit for extracting FASTQ and SAM format reads from SRA format archives. Various tools for working with SRA from NCBI.
-This software release was designed to run under Linux, MacOSX operating systems on Intel x86-compatible 64 bit architectures.
+    <readme>Tools from NCBI SRA Toolkit for extracting FASTQ and SAM format reads from SRA format archives. 
+This software release was designed to run under Linux, MacOSX operating systems on Intel x86-compatible 64 bit architectures. 
+When running on Amazon EC2, be sure to keep in mind the size limitation of EBS storage devices when requesting a 
+download of a large SRA data set.
+
 Build Requirements:
   ar 
   bash
@@ -25,6 +28,10 @@
   libxml2
   libcurl4
   zlib
+  
+On a debian based Linux OS use:
+
+  apt-get install build-essential libxml2-dev libcurl4-openssl-dev zlib-dev
     </readme>
   </package>
 </tool_dependency>