changeset 22:423f3eb06428 draft

more fixes for sra datatype, added sra_pileup
author Matt Shirley <mdshw5@gmail.com>
date Wed, 19 Jun 2013 14:11:37 -0400
parents d850324e82cf
children 064f4fb1af40
files datatypes_conf.xml fastq_dump.xml sam_dump.xml sra.py sra_fetch.xml sra_pileup.xml
diffstat 6 files changed, 39 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/datatypes_conf.xml	Wed Jun 19 13:31:58 2013 -0400
+++ b/datatypes_conf.xml	Wed Jun 19 14:11:37 2013 -0400
@@ -4,6 +4,6 @@
     <datatype_file name="sra.py"/>
   </datatype_files>
   <registration>
-    <datatype extension="sra" type="galaxy.datatypes.sra:SRA" display_in_upload="true"/>
+    <datatype extension="sra" type="galaxy.datatypes.sra:sra" display_in_upload="true"/>
   </registration>
 </datatypes>
--- a/fastq_dump.xml	Wed Jun 19 13:31:58 2013 -0400
+++ b/fastq_dump.xml	Wed Jun 19 14:11:37 2013 -0400
@@ -1,9 +1,9 @@
 <tool id="fastq_dump" name="Extract fastq" version="1.0.2">
-  <description> format reads from NCBI SRA.</description>
+  <description> format reads from NCBI sra.</description>
   <command>fastq-dump --log-level fatal --accession '${input.name}' --stdout $split $aligned '$input' > $output </command>
   <version_string>fastq-dump --version</version_string>
   <inputs>
-    <param format="SRA" name="input" type="data" label="sra archive"/>
+    <param format="sra" name="input" type="data" label="sra archive"/>
     <param format="text" name="split" type="select" value="">
       <label>Split read pairs</label>
       <option value="">No</option>
@@ -26,6 +26,8 @@
     <requirement type="package" version="2.3.2-4">sra_toolkit</requirement>
   </requirements>
   <help>
-    This tool extracts fastqsanger reads from SRA archives using fastq-dump. The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+    This tool extracts fastq format reads from sra archives using fastq-dump. 
+    The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+    Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
   </help>
 </tool>
--- a/sam_dump.xml	Wed Jun 19 13:31:58 2013 -0400
+++ b/sam_dump.xml	Wed Jun 19 14:11:37 2013 -0400
@@ -1,5 +1,5 @@
 <tool id="sam_dump" name="Extract SAM" version="1.0.2">
-  <description> format reads from NCBI SRA.</description>
+  <description> format reads from NCBI sra.</description>
   <command>sam-dump $header $aligned $primary '$input' > $output</command>
   <version_string>sam-dump --version</version_string>
   <inputs>
@@ -27,7 +27,7 @@
     <requirement type="package" version="2.3.2-4">sra_toolkit</requirement>
   </requirements>
   <help>
-    This tool extracts SAM format reads from SRA archives using sam-dump. 
+    This tool extracts sam format reads from sra archives using sam-dump. 
     The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
     Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
   </help>
--- a/sra.py	Wed Jun 19 13:31:58 2013 -0400
+++ b/sra.py	Wed Jun 19 14:11:37 2013 -0400
@@ -1,5 +1,5 @@
 """
-SRA class
+NCBI sra class
 """
 import logging
 import binascii
@@ -10,14 +10,15 @@
 
 log = logging.getLogger(__name__)
 
-class SRA( Binary ):
+class sra( Binary ):
     """ Sequence Read Archive (SRA) """
     file_ext = 'sra'
 
     def __init__( self, **kwd ):
         Binary.__init__( self, **kwd )
     def sniff( self, filename ):
-        """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ. 
+        """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ 
+        submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
         For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure 
         """
         try:
@@ -30,7 +31,7 @@
             return False
     def set_peek(self, dataset, is_multi_byte=False):
         if not dataset.dataset.purged:
-            dataset.peek  = 'Binary SRA file'
+            dataset.peek  = 'Binary sra file'
             dataset.blurb = data.nice_size(dataset.get_size())
         else:
             dataset.peek = 'file does not exist'
@@ -39,7 +40,7 @@
         try:
             return dataset.peek
         except:
-            return 'Binary SRA file (%s)' % ( data.nice_size(dataset.get_size()))
+            return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
 
 if hasattr(Binary, 'register_sniffable_binary_format'):
-    Binary.register_sniffable_binary_format('SRA', 'SRA', SRA)
+    Binary.register_sniffable_binary_format('sra', 'sra', sra)
--- a/sra_fetch.xml	Wed Jun 19 13:31:58 2013 -0400
+++ b/sra_fetch.xml	Wed Jun 19 14:11:37 2013 -0400
@@ -1,13 +1,13 @@
-<tool id="sra_fetch" name="Fetch SRA" version="1.0.3">
-  <description> by accession from NCBI SRA.</description>
+<tool id="sra_fetch" name="Fetch sra run" version="1.0.3">
+  <description> by accession from NCBI sra.</description>
   <command interpreter="python">sra_fetch.py --accession '$accession' --out '$output'</command>
   <inputs>
-    <param name="accession" size="13" type="text" value="SRR000001" label="SRA run accession"/>
+    <param name="accession" size="13" type="text" value="SRR000001" label="sra run accession"/>
   </inputs>
   <outputs>
-    <data format="sra" name="output" label="Fetch ${accession.value}"/>
+    <data format="sra" name="output" label="Fetch sra ${accession.value}"/>
   </outputs>
   <help>
-    This tool fetches SRA archives from NCBI over FTP using the python ftplib.
+    This tool fetches sra archives by accession from NCBI over ftp.
   </help>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_pileup.xml	Wed Jun 19 14:11:37 2013 -0400
@@ -0,0 +1,19 @@
+<tool id="sra_pileup" name="Generate pileup format" version="1.0.0">
+  <description> from NCBI sra.</description>
+  <command>sra-pileup '$input' > $output</command>
+  <version_string>sra-pileup --version</version_string>
+  <inputs>
+    <param format="sra" name="input" type="data" label="sra archive"/>
+  </inputs>
+  <outputs>
+    <data format="sam" name="output"/>
+  </outputs>
+  <requirements>
+    <requirement type="package" version="2.3.2-4">sra_toolkit</requirement>
+  </requirements>
+  <help>
+    This tool produces pileup format from sra archives using sra-pileup. 
+    The sra-pileup program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+    Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
+  </help>
+</tool>