diff link_scan_datasets.py @ 5:274fdc50169b draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/spectrast commit e5b5b15b0a995c8588ff62f92fd0a2329fb7a6a5-dirty
author jjohnson
date Wed, 25 Jul 2018 15:05:34 -0400
parents 7f02fc51bddf
children
line wrap: on
line diff
--- a/link_scan_datasets.py	Wed Jul 25 14:41:20 2018 -0400
+++ b/link_scan_datasets.py	Wed Jul 25 15:05:34 2018 -0400
@@ -3,13 +3,10 @@
 from __future__ import print_function
 
 import argparse
-import difflib
-from difflib import SequenceMatcher
 import os
 import os.path
-import sys
 import xml.sax
-
+from difflib import SequenceMatcher
 
 
 def __main__():
@@ -17,23 +14,25 @@
         description='link spectrum datasets to the name used' +
                     ' in the identification dataset')
     parser.add_argument(
-        'ident_files', nargs='+', 
+        'ident_files', nargs='+',
         help='Pepxml or mzIdentML')
     parser.add_argument(
-        '-n', '--scan_name', default=[], action='append', 
+        '-n', '--scan_name', default=[], action='append',
         help='Name for scan file')
     parser.add_argument(
-        '-f', '--scan_file', default=[], action='append', 
+        '-f', '--scan_file', default=[], action='append',
         help='Path for scan file')
     args = parser.parse_args()
 
-    class MzidHandler( xml.sax.ContentHandler):
+    class MzidHandler(xml.sax.ContentHandler):
+
         def __init__(self):
             xml.sax.ContentHandler.__init__(self)
             self.spectraDataFiles = []
             self.spectraDataNames = []
             self.searchDatabaseFiles = []
             self.searchDatabaseNames = []
+
         def startElement(self, tag, attrs):
             if tag == 'SpectraData':
                 id = attrs['id']
@@ -51,35 +50,41 @@
                 self.searchDatabaseFiles.append(filename)
                 self.searchDatabaseNames.append(name if name else id)
                 print ("SearchDatabase: %s  %s" % (name if name else id, path))
-        def endElement( self, name):
-            pass
-        def characters( self, data):
+
+        def endElement(self, name):
             pass
 
-    class PepXmlHandler( xml.sax.ContentHandler):
+        def characters(self, data):
+            pass
+
+    class PepXmlHandler(xml.sax.ContentHandler):
+
         def __init__(self):
             xml.sax.ContentHandler.__init__(self)
             self.spectraDataFiles = []
             self.spectraDataNames = []
+
         def startElement(self, tag, attrs):
             if tag == 'msms_run_summary':
                 basename = attrs['base_name']
                 name = os.path.basename(basename)
                 ext = attrs['raw_data']
-                path = '%s%s' % (basename,ext)
+                path = '%s%s' % (basename, ext)
                 filename = os.path.basename(path)
                 self.spectraDataFiles.append(filename)
-                self.spectraDataNames.append(name) 
+                self.spectraDataNames.append(name)
                 print ("SpectraData: %s  %s" % (name, path))
-        def endElement( self, name):
+
+        def endElement(self, name):
             pass
-        def characters( self, data):
+
+        def characters(self, data):
             pass
 
     parser = xml.sax.make_parser()
     parser.setFeature(xml.sax.handler.feature_namespaces, 0)
     handler = PepXmlHandler()
-    parser.setContentHandler( handler )
+    parser.setContentHandler(handler)
     for ident in args.ident_files:
         parser.parse(ident)
 
@@ -89,12 +94,13 @@
         if name in spectra_names:
             return name
         try:
-            r = [SequenceMatcher(None, name, spectra_names[x]).ratio() for x in range(len(spectra_names))]
+            r = [SequenceMatcher(None, name, spectra_names[x]).ratio()
+                 for x in range(len(spectra_names))]
             return spectra_names[r.index(max(r))]
         except Exception, e:
             print ("best_match: %s  %s" % (name, e))
 
-    for i,name in enumerate(args.scan_name):
+    for i, name in enumerate(args.scan_name):
         path = args.scan_file[i] if len(args.scan_file) > i else ''
         (root, ext) = os.path.splitext(name)
         print ("SpectraFile: %s  %s" % (name, path))
@@ -107,4 +113,3 @@
 
 if __name__ == "__main__":
     __main__()
-