# HG changeset patch # User peterjc # Date 1363790367 14400 # Node ID b3a3ba0c1d47345d1bdf49df80f3df5b0450f69c # Parent f9a7783ed7b688f3ac68aab79dcbc6599d3d1c2d Uploaded v0.0.15 which updates the BLAST database definitions. Fixes a MetadataElement bug and includes more of the optional BLAST database files (contribution from Nicola Soranzo). diff -r f9a7783ed7b6 -r b3a3ba0c1d47 blast.py --- a/blast.py Fri Nov 09 06:50:05 2012 -0500 +++ b/blast.py Wed Mar 20 10:39:27 2013 -0400 @@ -7,6 +7,7 @@ from galaxy.datatypes.xml import GenericXml from galaxy.datatypes.metadata import MetadataElement + class BlastXml( GenericXml ): """NCBI Blast XML Output data""" file_ext = "blastxml" @@ -174,18 +175,24 @@ """Class for nucleotide BLAST database files.""" file_ext = 'blastdbn' composite_type ='basic' - MetadataElement( readonly=True, optional=True, visible=False, no_value=0 ) - def __init__(self,**kwd): + def __init__(self, **kwd): Data.__init__(self, **kwd) - self.add_composite_file('blastdb.nhr') - self.add_composite_file('blastdb.nin') - self.add_composite_file('blastdb.nsq') - self.add_composite_file('blastdb.nhd', optional=True) - self.add_composite_file('blastdb.nsi', optional=True) - self.add_composite_file('blastdb.nhi', optional=True) - self.add_composite_file('blastdb.nog', optional=True) - self.add_composite_file('blastdb.nsd', optional=True) + self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers + self.add_composite_file('blastdb.nin', is_binary=True) # index file + self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences + self.add_composite_file('blastdb.nal', is_binary=False, optional=True) # alias ( -gi_mask option of makeblastdb) + self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) # sorted sequence hash values ( -hash_index option of makeblastdb) + self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) # index of sequence hash values ( -hash_index option of makeblastdb) + self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) + self.add_composite_file('blastdb.nni', is_binary=True, optional=True) # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) + self.add_composite_file('blastdb.nog', is_binary=True, optional=True) # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb) + self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) + self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) +# self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data +# self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column +# self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column +# The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. def display_data(self, trans, data, preview=False, filename=None, to_ext=None, size=None, offset=None, **kwd): @@ -195,25 +202,29 @@ """ return "This is a BLAST nucleotide database." + class BlastProtDb( _BlastDb, Data ): """Class for protein BLAST database files.""" file_ext = 'blastdbp' composite_type ='basic' - MetadataElement( readonly=True, optional=True, visible=False, no_value=0 ) - def __init__(self,**kwd): + def __init__(self, **kwd): Data.__init__(self, **kwd) - self.add_composite_file('blastdb.phr') - self.add_composite_file('blastdb.pin') - self.add_composite_file('blastdb.psq') - self.add_composite_file('blastdb.pnd', optional=True) - self.add_composite_file('blastdb.pni', optional=True) - self.add_composite_file('blastdb.psd', optional=True) - self.add_composite_file('blastdb.psi', optional=True) - self.add_composite_file('blastdb.psq', optional=True) - self.add_composite_file('blastdb.phd', optional=True) - self.add_composite_file('blastdb.phi', optional=True) - self.add_composite_file('blastdb.pog', optional=True) +# Component file comments are as in BlastNucDb except where noted + self.add_composite_file('blastdb.phr', is_binary=True) + self.add_composite_file('blastdb.pin', is_binary=True) + self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences + self.add_composite_file('blastdb.phd', is_binary=True, optional=True) + self.add_composite_file('blastdb.phi', is_binary=True, optional=True) + self.add_composite_file('blastdb.pnd', is_binary=True, optional=True) + self.add_composite_file('blastdb.pni', is_binary=True, optional=True) + self.add_composite_file('blastdb.pog', is_binary=True, optional=True) + self.add_composite_file('blastdb.psd', is_binary=True, optional=True) + self.add_composite_file('blastdb.psi', is_binary=True, optional=True) +# self.add_composite_file('blastdb.paa', is_binary=True, optional=True) +# self.add_composite_file('blastdb.pab', is_binary=True, optional=True) +# self.add_composite_file('blastdb.pac', is_binary=True, optional=True) +# The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. def display_data(self, trans, data, preview=False, filename=None, to_ext=None, size=None, offset=None, **kwd): diff -r f9a7783ed7b6 -r b3a3ba0c1d47 blast_datatypes.txt --- a/blast_datatypes.txt Fri Nov 09 06:50:05 2012 -0500 +++ b/blast_datatypes.txt Wed Mar 20 10:39:27 2013 -0400 @@ -1,8 +1,11 @@ Galaxy datatypes for NCBI BLAST+ suite ====================================== -These Galaxy datatypes are copyright 2010-2012 by Peter Cock, The James Hutton +These Galaxy datatypes are copyright 2010-2013 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. +Contributions/revisions copyright 2012 Edward Kirton. All rights reserved. +Contributions/revisions copyright 2013 Nicola Soranzo. All rights reserved. + See the licence text below. Note that these files (and the associated BLAST+ wrappers) were originally @@ -23,6 +26,8 @@ v0.0.13 - Uses blast.py instead of xml.py to define the datatypes v0.0.14 - Includes datatypes for protein and nucleotide BLAST databases (based on work by Edward Kirton) +v0.0.15 - Fixes a MetadataElement bug and includes more of the optional + BLAST database files (contribution from Nicola Soranzo) Installation @@ -61,7 +66,7 @@ BLAST+ datatypes and wrappers, and other tools are being developed on the following hg branch: http://bitbucket.org/peterjc/galaxy-central/src/tools -For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball I use +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use the following command from the Galaxy tools/ncbi_blast_plus folder: $ tar -czf blast_datatypes.tar.gz blast_datatypes.txt datatypes_conf.xml blast.py