Mercurial > repos > devteam > blast_datatypes
changeset 15:310ec0f47485 draft
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
author | peterjc |
---|---|
date | Thu, 09 Feb 2017 11:16:00 -0500 |
parents | 623a3fbe5340 |
children | 63befb860c3e |
files | blast.py |
diffstat | 1 files changed, 59 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/blast.py Fri Feb 03 12:34:03 2017 -0500 +++ b/blast.py Thu Feb 09 11:16:00 2017 -0500 @@ -7,12 +7,17 @@ import os from time import sleep -from galaxy.datatypes.data import get_file_peek -from galaxy.datatypes.data import Data, Text +from galaxy.datatypes.data import Data, Text, get_file_peek from galaxy.datatypes.xml import GenericXml log = logging.getLogger(__name__) +# Note implicit string concatenation here to avoid excessively long lines: +_DOCTYPES = ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" ' + '"http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', + '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" ' + '"NCBI_BlastOutput.dtd">'] + class BlastXml(GenericXml): """NCBI Blast XML Output data""" @@ -48,8 +53,7 @@ handle.close() return False line = handle.readline() - if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', - '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']: + if line.strip() not in _DOCTYPES: handle.close() return False line = handle.readline() @@ -96,8 +100,7 @@ raise ValueError("%s is not an XML file!" % f) line = h.readline() header += line - if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', - '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']: + if line.strip() not in _DOCTYPES: out.write(header) # for diagnosis out.close() h.close() @@ -220,21 +223,50 @@ self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers self.add_composite_file('blastdb.nin', is_binary=True) # index file self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences - self.add_composite_file('blastdb.nal', is_binary=False, optional=True) # alias ( -gi_mask option of makeblastdb) - self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) # sorted sequence hash values ( -hash_index option of makeblastdb) - self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) # index of sequence hash values ( -hash_index option of makeblastdb) - self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) - self.add_composite_file('blastdb.nni', is_binary=True, optional=True) # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) - self.add_composite_file('blastdb.nog', is_binary=True, optional=True) # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb) - self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) - self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) -# self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True) # first volume of the MegaBLAST index generated by makembindex -# The previous line should be repeated for each index volume, with filename extensions like '.01.idx', '.02.idx', etc. - self.add_composite_file('blastdb.shd', is_binary=True, optional=True) # MegaBLAST index superheader (-old_style_index false option of makembindex) -# self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data -# self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column -# self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column -# The previous 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. + + # alias ( -gi_mask option of makeblastdb) + self.add_composite_file('blastdb.nal', is_binary=False, optional=True) + + # sorted sequence hash values ( -hash_index option of makeblastdb) + self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) + + # index of sequence hash values ( -hash_index option of makeblastdb) + self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) + + # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) + self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) + + # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) + self.add_composite_file('blastdb.nni', is_binary=True, optional=True) + + # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb) + self.add_composite_file('blastdb.nog', is_binary=True, optional=True) + + # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) + self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) + + # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) + self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) + + # first volume of the MegaBLAST index generated by makembindex + # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True) + # The previous line should be repeated for each index volume, with filename + # extensions like '.01.idx', '.02.idx', etc. + + # MegaBLAST index superheader (-old_style_index false option of makembindex) + # self.add_composite_file('blastdb.shd', is_binary=True, optional=True) + + # index of a WriteDB column for e.g. mask data + # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) + + # data of a WriteDB column + # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) + + # multiple byte order for a WriteDB column + # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) + + # The previous 3 lines should be repeated for each WriteDB column, with filename + # extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. class BlastProtDb(_BlastDb, Data): @@ -245,7 +277,7 @@ def __init__(self, **kwd): Data.__init__(self, **kwd) -# Component file comments are as in BlastNucDb except where noted + # Component file comments are as in BlastNucDb except where noted self.add_composite_file('blastdb.phr', is_binary=True) self.add_composite_file('blastdb.pin', is_binary=True) self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences @@ -256,10 +288,11 @@ self.add_composite_file('blastdb.pog', is_binary=True, optional=True) self.add_composite_file('blastdb.psd', is_binary=True, optional=True) self.add_composite_file('blastdb.psi', is_binary=True, optional=True) -# self.add_composite_file('blastdb.paa', is_binary=True, optional=True) -# self.add_composite_file('blastdb.pab', is_binary=True, optional=True) -# self.add_composite_file('blastdb.pac', is_binary=True, optional=True) -# The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. + # self.add_composite_file('blastdb.paa', is_binary=True, optional=True) + # self.add_composite_file('blastdb.pab', is_binary=True, optional=True) + # self.add_composite_file('blastdb.pac', is_binary=True, optional=True) + # The last 3 lines should be repeated for each WriteDB column, with filename + # extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. class BlastDomainDb(_BlastDb, Data):