Mercurial > repos > jjohnson > spectrast
diff link_scan_datasets.py @ 5:274fdc50169b draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/spectrast commit e5b5b15b0a995c8588ff62f92fd0a2329fb7a6a5-dirty
author | jjohnson |
---|---|
date | Wed, 25 Jul 2018 15:05:34 -0400 |
parents | 7f02fc51bddf |
children |
line wrap: on
line diff
--- a/link_scan_datasets.py Wed Jul 25 14:41:20 2018 -0400 +++ b/link_scan_datasets.py Wed Jul 25 15:05:34 2018 -0400 @@ -3,13 +3,10 @@ from __future__ import print_function import argparse -import difflib -from difflib import SequenceMatcher import os import os.path -import sys import xml.sax - +from difflib import SequenceMatcher def __main__(): @@ -17,23 +14,25 @@ description='link spectrum datasets to the name used' + ' in the identification dataset') parser.add_argument( - 'ident_files', nargs='+', + 'ident_files', nargs='+', help='Pepxml or mzIdentML') parser.add_argument( - '-n', '--scan_name', default=[], action='append', + '-n', '--scan_name', default=[], action='append', help='Name for scan file') parser.add_argument( - '-f', '--scan_file', default=[], action='append', + '-f', '--scan_file', default=[], action='append', help='Path for scan file') args = parser.parse_args() - class MzidHandler( xml.sax.ContentHandler): + class MzidHandler(xml.sax.ContentHandler): + def __init__(self): xml.sax.ContentHandler.__init__(self) self.spectraDataFiles = [] self.spectraDataNames = [] self.searchDatabaseFiles = [] self.searchDatabaseNames = [] + def startElement(self, tag, attrs): if tag == 'SpectraData': id = attrs['id'] @@ -51,35 +50,41 @@ self.searchDatabaseFiles.append(filename) self.searchDatabaseNames.append(name if name else id) print ("SearchDatabase: %s %s" % (name if name else id, path)) - def endElement( self, name): - pass - def characters( self, data): + + def endElement(self, name): pass - class PepXmlHandler( xml.sax.ContentHandler): + def characters(self, data): + pass + + class PepXmlHandler(xml.sax.ContentHandler): + def __init__(self): xml.sax.ContentHandler.__init__(self) self.spectraDataFiles = [] self.spectraDataNames = [] + def startElement(self, tag, attrs): if tag == 'msms_run_summary': basename = attrs['base_name'] name = os.path.basename(basename) ext = attrs['raw_data'] - path = '%s%s' % (basename,ext) + path = '%s%s' % (basename, ext) filename = os.path.basename(path) self.spectraDataFiles.append(filename) - self.spectraDataNames.append(name) + self.spectraDataNames.append(name) print ("SpectraData: %s %s" % (name, path)) - def endElement( self, name): + + def endElement(self, name): pass - def characters( self, data): + + def characters(self, data): pass parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 0) handler = PepXmlHandler() - parser.setContentHandler( handler ) + parser.setContentHandler(handler) for ident in args.ident_files: parser.parse(ident) @@ -89,12 +94,13 @@ if name in spectra_names: return name try: - r = [SequenceMatcher(None, name, spectra_names[x]).ratio() for x in range(len(spectra_names))] + r = [SequenceMatcher(None, name, spectra_names[x]).ratio() + for x in range(len(spectra_names))] return spectra_names[r.index(max(r))] except Exception, e: print ("best_match: %s %s" % (name, e)) - for i,name in enumerate(args.scan_name): + for i, name in enumerate(args.scan_name): path = args.scan_file[i] if len(args.scan_file) > i else '' (root, ext) = os.path.splitext(name) print ("SpectraFile: %s %s" % (name, path)) @@ -107,4 +113,3 @@ if __name__ == "__main__": __main__() -