proteomics_datatypes: proteomics.py comparison

comparison proteomics.py @ 3:b22ebbb05260 draft

Uploaded

author	iracooke
date	Mon, 10 Mar 2014 19:40:20 -0400
parents	7101f7e4b00b
children

comparison

equal deleted inserted replaced

-:7edcae695986
+:b22ebbb05260
 """
 Proteomics format classes
 """
 import logging
 import re
-from galaxy.datatypes.data import *
+import binascii
-from galaxy.datatypes.xml import *
 from galaxy.datatypes.sniff import *
-from galaxy.datatypes.binary import *
+from galaxy.datatypes import data
-from galaxy.datatypes.interval import *
+from galaxy.datatypes.data import Text
+from galaxy.datatypes.xml import GenericXml
+from galaxy.datatypes.binary import Binary
+from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes.interval import Gff
 log = logging.getLogger(__name__)
-class ProtGff( Gff ):
-"""Tab delimited data in Gff format"""
+class Wiff( Binary ):
-file_ext = "prot_gff"
+"""Class for wiff files."""
-def set_peek( self, dataset, is_multi_byte=False ):
+file_ext = 'wiff'
-"""Set the peek and blurb text"""
+allow_datatype_change = False
-if not dataset.dataset.purged:
+composite_type = 'auto_primary_file'
-dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
-dataset.blurb = 'Proteogenomics GFF'
+def __init__(self, **kwd):
-else:
+Binary.__init__(self, **kwd)
-dataset.peek = 'file does not exist'
+self.add_composite_file( 'wiff',
-dataset.blurb = 'file purged from disk'
+description = 'AB SCIEX files in .wiff format. This can contain all needed information or only metadata.',
+is_binary = True )
-def sniff( self, filename ):
+self.add_composite_file( 'wiff_scan',
-handle = open(filename)
+description = 'AB SCIEX spectra file (wiff.scan), if the corresponding .wiff file only contains metadata.',
-xmlns_re = re.compile("^##gff-version")
+optional = 'True', is_binary = True )
-for i in range(3):
-line = handle.readline()
+def generate_primary_file( self, dataset = None ):
-if xmlns_re.match(line.strip()):
+rval = ['<html><head><title>Wiff Composite Dataset </title></head><p/>']
-handle.close()
+rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
-return True
+for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
+fn = composite_name
-handle.close()
+opt_text = ''
-return False
+if composite_file.optional:
+opt_text = ' (optional)'
+if composite_file.get('description'):
-class Xls( Binary ):
+rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) )
-"""Class describing a binary excel spreadsheet file"""
+else:
-file_ext = "xls"
+rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) )
+rval.append( '</ul></div></html>' )
-def set_peek( self, dataset, is_multi_byte=False ):
+return "\n".join( rval )
-if not dataset.dataset.purged:
-dataset.peek  = "Excel Spreadsheet file"
-dataset.blurb = data.nice_size( dataset.get_size() )
-else:
+if hasattr(Binary, 'register_unsniffable_binary_ext'):
-dataset.peek = 'file does not exist'
+Binary.register_unsniffable_binary_ext('wiff')
-dataset.blurb = 'file purged from disk'
+class IdpDB( Binary ):
+file_ext = "idpDB"
+if hasattr(Binary, 'register_unsniffable_binary_ext'):
+Binary.register_unsniffable_binary_ext('idpDB')
+class PepXmlReport( Tabular ):
+"""pepxml converted to tabular report"""
+file_ext = "tsv"
+def __init__(self, **kwd):
+Tabular.__init__( self, **kwd )
+self.column_names = ['Protein', 'Peptide', 'Assumed Charge', 'Neutral Pep Mass (calculated)', 'Neutral Mass', 'Retention Time', 'Start Scan', 'End Scan', 'Search Engine', 'PeptideProphet Probability', 'Interprophet Probabaility']
 def display_peek( self, dataset ):
-try:
+"""Returns formated html of peek"""
-return dataset.peek
+return Tabular.make_html_table( self, dataset, column_names=self.column_names )
-except:
-return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) )
+class ProtXmlReport( Tabular ):
-class ProteomicsXml(GenericXml):
+"""protxml converted to tabular report"""
+file_ext = "tsv"
+comment_lines = 1
+def __init__(self, **kwd):
+Tabular.__init__( self, **kwd )
+self.column_names = ["Entry Number", "Group Probability", "Protein", "Protein Link", "Protein Probability", "Percent Coverage", "Number of Unique Peptides", "Total Independent Spectra", "Percent Share of Spectrum ID's", "Description", "Protein Molecular Weight", "Protein Length", "Is Nondegenerate Evidence", "Weight", "Precursor Ion Charge", "Peptide sequence", "Peptide Link", "NSP Adjusted Probability", "Initial Probability", "Number of Total Termini", "Number of Sibling Peptides Bin", "Number of Instances", "Peptide Group Designator", "Is Evidence?"]
+def display_peek( self, dataset ):
+"""Returns formated html of peek"""
+return Tabular.make_html_table( self, dataset, column_names=self.column_names )
+class ProteomicsXml( GenericXml ):
 """ An enhanced XML datatype used to reuse code across several
 proteomic/mass-spec datatypes. """
 def sniff(self, filename):
 """ Determines whether the file is the correct XML type. """
 with open(filename, 'r') as contents:
 while True:
 line = contents.readline()
 if line == None or not line.startswith('<?'):
 break
 pattern = '^<(\w*:)?%s' % self.root # pattern match <root or <ns:root for any ns string
 dataset.blurb = self.blurb
 else:
 dataset.peek = 'file does not exist'
 dataset.blurb = 'file purged from disk'
 class PepXml(ProteomicsXml):
 """pepXML data"""
 file_ext = "pepxml"
 blurb = 'pepXML data'
 root = "msms_pipeline_analysis"
 class MzML(ProteomicsXml):
 """mzML data"""
 file_ext = "mzml"
 blurb = 'mzML Mass Spectrometry data'
 root = "protein_summary"
 class MzXML(ProteomicsXml):
 """mzXML data"""
-file_ext = "mzXML"
+file_ext = "mzxml"
 blurb = "mzXML Mass Spectrometry data"
 root = "mzXML"
 ## PSI datatypes
 class MzIdentML(ProteomicsXml):
 file_ext = "mzid"
 blurb = "XML identified peptides and proteins."
 root = "MzIdentML"
 class TraML(ProteomicsXml):
-file_ext = "traML"
+file_ext = "traml"
 blurb = "TraML transition list"
 root = "TraML"
 class MzQuantML(ProteomicsXml):
 file_ext = "mzq"
 blurb = "XML quantification data"
 root = "MzQuantML"
+class ConsensusXML(ProteomicsXml):
+file_ext = "consensusxml"
+blurb = "OpenMS multiple LC-MS map alignment file"
+root = "consensusXML"
+class FeatureXML(ProteomicsXml):
+file_ext = "featurexml"
+blurb = "OpenMS feature file"
+root = "featureMap"
+class IdXML(ProteomicsXml):
+file_ext = "idxml"
+blurb = "OpenMS identification file"
+root = "IdXML"
 class Mgf( Text ):
 """Mascot Generic Format data"""
 file_ext = "mgf"
 def set_peek( self, dataset, is_multi_byte=False ):
 dataset.blurb = 'mgf Mascot Generic Format'
 else:
 dataset.peek = 'file does not exist'
 dataset.blurb = 'file purged from disk'
 def sniff( self, filename ):
 mgf_begin_ions = "BEGIN IONS"
 max_lines=100
 for i, line in enumerate( file( filename ) ):
 line = line.rstrip( '\n\r' )
 if line==mgf_begin_ions:
 return True
 if i>max_lines:
 return False
 class MascotDat( Text ):
 """Mascot search results """
 file_ext = "mascotdat"
 def set_peek( self, dataset, is_multi_byte=False ):
 except:
 return "Thermo Finnigan RAW file (%s)" % ( data.nice_size( dataset.get_size() ) )
 if hasattr(Binary, 'register_sniffable_binary_format'):
-Binary.register_sniffable_binary_format('RAW', 'RAW', RAW)
+Binary.register_sniffable_binary_format('raw', 'raw', RAW)
-class Msp(Text):
+class Msp( Text ):
 """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """
 file_ext = "msp"
 @staticmethod
 def next_line_starts_with(contents, prefix):
 return False
 return True
 # unsniffable binary format, should do something about this
-class XHunterAslFormat(Binary):
+class XHunterAslFormat( Binary ):
 """ Annotated Spectra in the HLF format http://www.thegpm.org/HUNTER/format_2006_09_15.html """
 file_ext = "hlf"
 if hasattr(Binary, 'register_unsniffable_binary_ext'):
 Binary.register_unsniffable_binary_ext('hlf')

Mercurial > repos > iracooke > proteomics_datatypes

comparison proteomics.py @ 3:b22ebbb05260 draft