changeset 8:6ab4a0bf67df draft

Make splib a composite datatype
author iracooke
date Wed, 20 May 2015 00:26:37 -0400
parents 9cfabf0b942d
children 349af9298ff1
files README README.rst bundle.sh datatypes_conf.xml proteomics.py
diffstat 5 files changed, 122 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/README	Sun Dec 14 22:42:08 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-# What is it?
-
-Galaxy datatype and display-application definitions for Proteomics data
-
-# Installation
-
-Install into your local galaxy instance from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/
-
-˝To visualize data you will need to install the protviz visualization web application.  This is available at
-[https://bitbucket.org/Andrew_Brock/proteomics-visualise](https://bitbucket.org/Andrew_Brock/proteomics-visualise)
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Wed May 20 00:26:37 2015 -0400
@@ -0,0 +1,66 @@
+Proteomics Datatypes
+====================
+
+Support for a proteomics and mass spectrometry datatypes for Galaxy_
+
+.. _Galaxy: http://galaxyproject.org/
+
+
+Included Datatypes
+------------------
+
+================      =====================  =======     ====================================
+Class                 Extension              Sniffer     Application or Purpose
+================      =====================  =======     ====================================
+Binary                cps                    False       SearchGUI_ / PeptideShaker_
+PepXML_               raw_pepxml             True        TPP_ Search Engine Outputs
+PepXML_               peptideprophet_pepxml  True        TPP_ PeptideProphet Outputs
+PepXML_               interprophet_pepxml    True        TPP_ iProphet Outputs
+ProtXML_              protxml                True        TPP_ ProteinProphet Outputs
+GenericXML            trafoxml               False       OpenMS_ RT Transforms
+GenericXML            qcml_                  False       OpenMS_ QC Data
+PepXmlReport          pepxml.tsv             False       Tabular PepXML
+ProtXmlReport         protxml.tsv            False       Tabular ProtXML
+MascotDat             mascotdat              False       Raw Mascot Search Results
+MzML_                 mzml                   True        Raw Mass Spec Data
+Mgf                   mgf                    True        MS/MS Spectra
+IdpDB                 idpdb                  False       
+Wiff                  wiff                   False       Raw Mass Spec Data
+MzXML                 mzxml                  True        Raw Mass Spec Data
+MzQuantML_            mzq                    True        HUPO standard for quant data
+MzIdentML_            mzid                   True        HUPO standard for ID data
+TraML_                traml                  True        HUPO standard for transition lists
+FeatureXML            featurexml             False       OpenMS_
+ConsensusXML          consensusxml           False       OpenMS_
+IdXML                 idxml                  False       OpenMS_
+TandemXML_            tandem                 False       XTandem output
+RAW                   raw                    False       Raw Mass Spec Data
+Msp_                  msp                    True        Spectral Library
+SPLib_                splib 				 True		 Spectral Library
+Ms2                   ms2                    False       MS/MS Spectra
+XHunterAslFormat      hlf                    False       Spectral Library
+================      =====================  =======     ====================================
+
+
+.. _qcml: http://code.google.com/p/qcml
+.. _PepXML: http://tools.proteomecenter.org/wiki/index.php?title=Formats:pepXML
+.. _ProtXML: http://tools.proteomecenter.org/wiki/index.php?title=Formats:protXML
+.. _TPP: http://tools.proteomecenter.org/wiki/index.php?title=Software:TPP
+.. _OpenMS: https://github.com/OpenMS/OpenMS
+.. _SearchGUI: https://code.google.com/p/searchgui/
+.. _PeptideShaker: https://code.google.com/p/peptide-shaker/
+.. _MzML: http://www.psidev.info/mzml_1_0_0%20
+.. _MzQuantML: http://www.psidev.info/mzquantml
+.. _MzIdentML: http://www.psidev.info/mzidentml
+.. _TraML: http://www.psidev.info/traml
+.. _TandemXML: http://www.thegpm.org/docs/X_series_output_form.pdf
+.. _Msp: http://chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf
+.. _SPLib: http://tools.proteomecenter.org/wiki/index.php?title=Software:SpectraST#Developer.27s_Guide
+
+Visualization
+-------------
+
+This package also installs display applications for mzML, PepXML and ProtXML formats. These are designed to work with the protviz_ web application.  Configuring this application requires a fair number of extra steps though.  See the protviz_ documentation for more info.  
+
+.. _protviz: https://bitbucket.org/Andrew_Brock/proteomics-visualise
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bundle.sh	Wed May 20 00:26:37 2015 -0400
@@ -0,0 +1,2 @@
+tar --exclude=.DS_Store --exclude=.hgcheck --exclude=.git --exclude=*.tar --exclude=*.bz2 \
+--exclude=bundle.sh --exclude=.hg -cvf package.tar ./;bzip2 -f package.tar
\ No newline at end of file
--- a/datatypes_conf.xml	Sun Dec 14 22:42:08 2014 -0500
+++ b/datatypes_conf.xml	Wed May 20 00:26:37 2015 -0400
@@ -46,6 +46,7 @@
     <datatype extension="msp" type="galaxy.datatypes.proteomics:Msp" display_in_upload="true" />
     <datatype extension="ms2" type="galaxy.datatypes.proteomics:Ms2" display_in_upload="true" />
     <datatype extension="hlf" type="galaxy.datatypes.proteomics:XHunterAslFormat" mimetype="application/octet-stream" display_in_upload="true" />
+    <datatype extension="splib" type="galaxy.datatypes.proteomics:SPLib" display_in_upload="true" />
   </registration>
   <sniffers>
     <sniffer type="galaxy.datatypes.proteomics:MzML"/>
@@ -57,5 +58,7 @@
     <sniffer type="galaxy.datatypes.proteomics:TraML"/>
     <sniffer type="galaxy.datatypes.proteomics:MzIdentML"/>
     <sniffer type="galaxy.datatypes.proteomics:MzQuantML"/>
+    <sniffer type="galaxy.datatypes.proteomics:Msp"/>
+    <sniffer type="galaxy.datatypes.proteomics:SPLib"/>
   </sniffers>
 </datatypes>
--- a/proteomics.py	Sun Dec 14 22:42:08 2014 -0500
+++ b/proteomics.py	Wed May 20 00:26:37 2015 -0400
@@ -285,6 +285,57 @@
         with open(filename, 'r') as contents:
             return Msp.next_line_starts_with(contents, "Name:") and Msp.next_line_starts_with(contents, "MW:")
 
+
+class SPLib( Msp ):
+    """SpectraST Spectral Library. Closely related to msp format"""
+    file_ext = "splib"
+    composite_type = 'auto_primary_file'
+
+    def __init__(self, **kwd):
+        Msp.__init__(self, **kwd)
+        self.add_composite_file( 'library.splib', 
+            description = 'Spectral Library. Contains actual library spectra', 
+            is_binary = False )
+        self.add_composite_file( 'library.spidx', 
+            description = 'Spectrum index',  is_binary = False )
+        self.add_composite_file( 'library.pepidx',
+            description = 'Peptide index', is_binary = False)
+
+
+    def generate_primary_file( self, dataset = None ):
+        rval = ['<html><head><title>Spectral Library Composite Dataset </title></head><p/>']
+        rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
+        for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
+            fn = composite_name
+            opt_text = ''
+            if composite_file.optional:
+                opt_text = ' (optional)'
+            if composite_file.get('description'):
+                rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) )
+            else:
+                rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) )
+        rval.append( '</ul></div></html>' )
+        return "\n".join( rval )
+
+
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        """Set the peek and blurb text"""
+        if not dataset.dataset.purged:
+            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+            dataset.blurb = 'splib Spectral Library Format'
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+
+    def sniff(self, filename):
+        """ Determines whether the file is a SpectraST generated file. 
+        """
+        with open(filename, 'r') as contents:
+            return Msp.next_line_starts_with(contents, "Name:") and Msp.next_line_starts_with(contents, "LibID:")
+
+
 class Ms2(Text):
     file_ext = "ms2"