Mercurial > repos > matt-shirley > sra_tools
changeset 22:423f3eb06428 draft
more fixes for sra datatype, added sra_pileup
author | Matt Shirley <mdshw5@gmail.com> |
---|---|
date | Wed, 19 Jun 2013 14:11:37 -0400 |
parents | d850324e82cf |
children | 064f4fb1af40 |
files | datatypes_conf.xml fastq_dump.xml sam_dump.xml sra.py sra_fetch.xml sra_pileup.xml |
diffstat | 6 files changed, 39 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/datatypes_conf.xml Wed Jun 19 13:31:58 2013 -0400 +++ b/datatypes_conf.xml Wed Jun 19 14:11:37 2013 -0400 @@ -4,6 +4,6 @@ <datatype_file name="sra.py"/> </datatype_files> <registration> - <datatype extension="sra" type="galaxy.datatypes.sra:SRA" display_in_upload="true"/> + <datatype extension="sra" type="galaxy.datatypes.sra:sra" display_in_upload="true"/> </registration> </datatypes>
--- a/fastq_dump.xml Wed Jun 19 13:31:58 2013 -0400 +++ b/fastq_dump.xml Wed Jun 19 14:11:37 2013 -0400 @@ -1,9 +1,9 @@ <tool id="fastq_dump" name="Extract fastq" version="1.0.2"> - <description> format reads from NCBI SRA.</description> + <description> format reads from NCBI sra.</description> <command>fastq-dump --log-level fatal --accession '${input.name}' --stdout $split $aligned '$input' > $output </command> <version_string>fastq-dump --version</version_string> <inputs> - <param format="SRA" name="input" type="data" label="sra archive"/> + <param format="sra" name="input" type="data" label="sra archive"/> <param format="text" name="split" type="select" value=""> <label>Split read pairs</label> <option value="">No</option> @@ -26,6 +26,8 @@ <requirement type="package" version="2.3.2-4">sra_toolkit</requirement> </requirements> <help> - This tool extracts fastqsanger reads from SRA archives using fastq-dump. The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. + This tool extracts fastq format reads from sra archives using fastq-dump. + The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. + Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. </help> </tool>
--- a/sam_dump.xml Wed Jun 19 13:31:58 2013 -0400 +++ b/sam_dump.xml Wed Jun 19 14:11:37 2013 -0400 @@ -1,5 +1,5 @@ <tool id="sam_dump" name="Extract SAM" version="1.0.2"> - <description> format reads from NCBI SRA.</description> + <description> format reads from NCBI sra.</description> <command>sam-dump $header $aligned $primary '$input' > $output</command> <version_string>sam-dump --version</version_string> <inputs> @@ -27,7 +27,7 @@ <requirement type="package" version="2.3.2-4">sra_toolkit</requirement> </requirements> <help> - This tool extracts SAM format reads from SRA archives using sam-dump. + This tool extracts sam format reads from sra archives using sam-dump. The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. </help>
--- a/sra.py Wed Jun 19 13:31:58 2013 -0400 +++ b/sra.py Wed Jun 19 14:11:37 2013 -0400 @@ -1,5 +1,5 @@ """ -SRA class +NCBI sra class """ import logging import binascii @@ -10,14 +10,15 @@ log = logging.getLogger(__name__) -class SRA( Binary ): +class sra( Binary ): """ Sequence Read Archive (SRA) """ file_ext = 'sra' def __init__( self, **kwd ): Binary.__init__( self, **kwd ) def sniff( self, filename ): - """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ. + """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ + submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'. For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure """ try: @@ -30,7 +31,7 @@ return False def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: - dataset.peek = 'Binary SRA file' + dataset.peek = 'Binary sra file' dataset.blurb = data.nice_size(dataset.get_size()) else: dataset.peek = 'file does not exist' @@ -39,7 +40,7 @@ try: return dataset.peek except: - return 'Binary SRA file (%s)' % ( data.nice_size(dataset.get_size())) + return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) if hasattr(Binary, 'register_sniffable_binary_format'): - Binary.register_sniffable_binary_format('SRA', 'SRA', SRA) + Binary.register_sniffable_binary_format('sra', 'sra', sra)
--- a/sra_fetch.xml Wed Jun 19 13:31:58 2013 -0400 +++ b/sra_fetch.xml Wed Jun 19 14:11:37 2013 -0400 @@ -1,13 +1,13 @@ -<tool id="sra_fetch" name="Fetch SRA" version="1.0.3"> - <description> by accession from NCBI SRA.</description> +<tool id="sra_fetch" name="Fetch sra run" version="1.0.3"> + <description> by accession from NCBI sra.</description> <command interpreter="python">sra_fetch.py --accession '$accession' --out '$output'</command> <inputs> - <param name="accession" size="13" type="text" value="SRR000001" label="SRA run accession"/> + <param name="accession" size="13" type="text" value="SRR000001" label="sra run accession"/> </inputs> <outputs> - <data format="sra" name="output" label="Fetch ${accession.value}"/> + <data format="sra" name="output" label="Fetch sra ${accession.value}"/> </outputs> <help> - This tool fetches SRA archives from NCBI over FTP using the python ftplib. + This tool fetches sra archives by accession from NCBI over ftp. </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_pileup.xml Wed Jun 19 14:11:37 2013 -0400 @@ -0,0 +1,19 @@ +<tool id="sra_pileup" name="Generate pileup format" version="1.0.0"> + <description> from NCBI sra.</description> + <command>sra-pileup '$input' > $output</command> + <version_string>sra-pileup --version</version_string> + <inputs> + <param format="sra" name="input" type="data" label="sra archive"/> + </inputs> + <outputs> + <data format="sam" name="output"/> + </outputs> + <requirements> + <requirement type="package" version="2.3.2-4">sra_toolkit</requirement> + </requirements> + <help> + This tool produces pileup format from sra archives using sra-pileup. + The sra-pileup program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. + Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. + </help> +</tool>