Wiff Composite Dataset

# HG changeset patch # User iracooke # Date 1394494820 14400 # Node ID b22ebbb05260806ffff445f1d4bd9e228099aade # Parent 7edcae6959863acbe7a8c7e79d6c039bbca70f91 Uploaded diff -r 7edcae695986 -r b22ebbb05260 proteomics.py --- a/proteomics.py Mon Mar 10 19:39:31 2014 -0400 +++ b/proteomics.py Mon Mar 10 19:40:20 2014 -0400 @@ -3,63 +3,95 @@ """ import logging import re -from galaxy.datatypes.data import * -from galaxy.datatypes.xml import * +import binascii + from galaxy.datatypes.sniff import * -from galaxy.datatypes.binary import * -from galaxy.datatypes.interval import * +from galaxy.datatypes import data +from galaxy.datatypes.data import Text +from galaxy.datatypes.xml import GenericXml +from galaxy.datatypes.binary import Binary +from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes.interval import Gff log = logging.getLogger(__name__) -class ProtGff( Gff ): - """Tab delimited data in Gff format""" - file_ext = "prot_gff" - def set_peek( self, dataset, is_multi_byte=False ): - """Set the peek and blurb text""" - if not dataset.dataset.purged: - dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) - dataset.blurb = 'Proteogenomics GFF' - else: - dataset.peek = 'file does not exist' - dataset.blurb = 'file purged from disk' + +class Wiff( Binary ): + """Class for wiff files.""" + file_ext = 'wiff' + allow_datatype_change = False + composite_type = 'auto_primary_file' + + def __init__(self, **kwd): + Binary.__init__(self, **kwd) + self.add_composite_file( 'wiff', + description = 'AB SCIEX files in .wiff format. This can contain all needed information or only metadata.', + is_binary = True ) + self.add_composite_file( 'wiff_scan', + description = 'AB SCIEX spectra file (wiff.scan), if the corresponding .wiff file only contains metadata.', + optional = 'True', is_binary = True ) - def sniff( self, filename ): - handle = open(filename) - xmlns_re = re.compile("^##gff-version") - for i in range(3): - line = handle.readline() - if xmlns_re.match(line.strip()): - handle.close() - return True + def generate_primary_file( self, dataset = None ): + rval = ['Wiff Composite Dataset

'] + rval.append('

This composite dataset is composed of the following files:

%s (%s)%s
%s%s

' ) + return "\n".join( rval ) - handle.close() - return False -class Xls( Binary ): - """Class describing a binary excel spreadsheet file""" - file_ext = "xls" +if hasattr(Binary, 'register_unsniffable_binary_ext'): + Binary.register_unsniffable_binary_ext('wiff') + + +class IdpDB( Binary ): + file_ext = "idpDB" + +if hasattr(Binary, 'register_unsniffable_binary_ext'): + Binary.register_unsniffable_binary_ext('idpDB') + + +class PepXmlReport( Tabular ): + """pepxml converted to tabular report""" + file_ext = "tsv" + + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + self.column_names = ['Protein', 'Peptide', 'Assumed Charge', 'Neutral Pep Mass (calculated)', 'Neutral Mass', 'Retention Time', 'Start Scan', 'End Scan', 'Search Engine', 'PeptideProphet Probability', 'Interprophet Probabaility'] - def set_peek( self, dataset, is_multi_byte=False ): - if not dataset.dataset.purged: - dataset.peek = "Excel Spreadsheet file" - dataset.blurb = data.nice_size( dataset.get_size() ) - else: - dataset.peek = 'file does not exist' - dataset.blurb = 'file purged from disk' def display_peek( self, dataset ): - try: - return dataset.peek - except: - return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) ) + """Returns formated html of peek""" + return Tabular.make_html_table( self, dataset, column_names=self.column_names ) + -class ProteomicsXml(GenericXml): +class ProtXmlReport( Tabular ): + """protxml converted to tabular report""" + file_ext = "tsv" + comment_lines = 1 + + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + self.column_names = ["Entry Number", "Group Probability", "Protein", "Protein Link", "Protein Probability", "Percent Coverage", "Number of Unique Peptides", "Total Independent Spectra", "Percent Share of Spectrum ID's", "Description", "Protein Molecular Weight", "Protein Length", "Is Nondegenerate Evidence", "Weight", "Precursor Ion Charge", "Peptide sequence", "Peptide Link", "NSP Adjusted Probability", "Initial Probability", "Number of Total Termini", "Number of Sibling Peptides Bin", "Number of Instances", "Peptide Group Designator", "Is Evidence?"] + + def display_peek( self, dataset ): + """Returns formated html of peek""" + return Tabular.make_html_table( self, dataset, column_names=self.column_names ) + +class ProteomicsXml( GenericXml ): """ An enhanced XML datatype used to reuse code across several proteomic/mass-spec datatypes. """ def sniff(self, filename): """ Determines whether the file is the correct XML type. """ - with open(filename, 'r') as contents: + with open(filename, 'r') as contents: while True: line = contents.readline() if line == None or not line.startswith('max_lines: return False - - + + class MascotDat( Text ): """Mascot search results """ file_ext = "mascotdat" @@ -206,10 +256,10 @@ if hasattr(Binary, 'register_sniffable_binary_format'): - Binary.register_sniffable_binary_format('RAW', 'RAW', RAW) + Binary.register_sniffable_binary_format('raw', 'raw', RAW) -class Msp(Text): +class Msp( Text ): """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """ file_ext = "msp" @@ -267,10 +317,9 @@ return True # unsniffable binary format, should do something about this -class XHunterAslFormat(Binary): +class XHunterAslFormat( Binary ): """ Annotated Spectra in the HLF format http://www.thegpm.org/HUNTER/format_2006_09_15.html """ file_ext = "hlf" - if hasattr(Binary, 'register_unsniffable_binary_ext'): Binary.register_unsniffable_binary_ext('hlf')