Mercurial > repos > jjohnson > spectrast
comparison link_scan_datasets.py @ 5:274fdc50169b draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/spectrast commit e5b5b15b0a995c8588ff62f92fd0a2329fb7a6a5-dirty
author | jjohnson |
---|---|
date | Wed, 25 Jul 2018 15:05:34 -0400 |
parents | 7f02fc51bddf |
children |
comparison
equal
deleted
inserted
replaced
4:c9bfe6adb7cd | 5:274fdc50169b |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 from __future__ import print_function | 3 from __future__ import print_function |
4 | 4 |
5 import argparse | 5 import argparse |
6 import difflib | |
7 from difflib import SequenceMatcher | |
8 import os | 6 import os |
9 import os.path | 7 import os.path |
10 import sys | |
11 import xml.sax | 8 import xml.sax |
12 | 9 from difflib import SequenceMatcher |
13 | 10 |
14 | 11 |
15 def __main__(): | 12 def __main__(): |
16 parser = argparse.ArgumentParser( | 13 parser = argparse.ArgumentParser( |
17 description='link spectrum datasets to the name used' + | 14 description='link spectrum datasets to the name used' + |
18 ' in the identification dataset') | 15 ' in the identification dataset') |
19 parser.add_argument( | 16 parser.add_argument( |
20 'ident_files', nargs='+', | 17 'ident_files', nargs='+', |
21 help='Pepxml or mzIdentML') | 18 help='Pepxml or mzIdentML') |
22 parser.add_argument( | 19 parser.add_argument( |
23 '-n', '--scan_name', default=[], action='append', | 20 '-n', '--scan_name', default=[], action='append', |
24 help='Name for scan file') | 21 help='Name for scan file') |
25 parser.add_argument( | 22 parser.add_argument( |
26 '-f', '--scan_file', default=[], action='append', | 23 '-f', '--scan_file', default=[], action='append', |
27 help='Path for scan file') | 24 help='Path for scan file') |
28 args = parser.parse_args() | 25 args = parser.parse_args() |
29 | 26 |
30 class MzidHandler( xml.sax.ContentHandler): | 27 class MzidHandler(xml.sax.ContentHandler): |
28 | |
31 def __init__(self): | 29 def __init__(self): |
32 xml.sax.ContentHandler.__init__(self) | 30 xml.sax.ContentHandler.__init__(self) |
33 self.spectraDataFiles = [] | 31 self.spectraDataFiles = [] |
34 self.spectraDataNames = [] | 32 self.spectraDataNames = [] |
35 self.searchDatabaseFiles = [] | 33 self.searchDatabaseFiles = [] |
36 self.searchDatabaseNames = [] | 34 self.searchDatabaseNames = [] |
35 | |
37 def startElement(self, tag, attrs): | 36 def startElement(self, tag, attrs): |
38 if tag == 'SpectraData': | 37 if tag == 'SpectraData': |
39 id = attrs['id'] | 38 id = attrs['id'] |
40 path = attrs['location'] | 39 path = attrs['location'] |
41 filename = os.path.basename(path) | 40 filename = os.path.basename(path) |
49 filename = os.path.basename(path) | 48 filename = os.path.basename(path) |
50 name = attrs['name'] if 'name' in attrs else None | 49 name = attrs['name'] if 'name' in attrs else None |
51 self.searchDatabaseFiles.append(filename) | 50 self.searchDatabaseFiles.append(filename) |
52 self.searchDatabaseNames.append(name if name else id) | 51 self.searchDatabaseNames.append(name if name else id) |
53 print ("SearchDatabase: %s %s" % (name if name else id, path)) | 52 print ("SearchDatabase: %s %s" % (name if name else id, path)) |
54 def endElement( self, name): | 53 |
55 pass | 54 def endElement(self, name): |
56 def characters( self, data): | |
57 pass | 55 pass |
58 | 56 |
59 class PepXmlHandler( xml.sax.ContentHandler): | 57 def characters(self, data): |
58 pass | |
59 | |
60 class PepXmlHandler(xml.sax.ContentHandler): | |
61 | |
60 def __init__(self): | 62 def __init__(self): |
61 xml.sax.ContentHandler.__init__(self) | 63 xml.sax.ContentHandler.__init__(self) |
62 self.spectraDataFiles = [] | 64 self.spectraDataFiles = [] |
63 self.spectraDataNames = [] | 65 self.spectraDataNames = [] |
66 | |
64 def startElement(self, tag, attrs): | 67 def startElement(self, tag, attrs): |
65 if tag == 'msms_run_summary': | 68 if tag == 'msms_run_summary': |
66 basename = attrs['base_name'] | 69 basename = attrs['base_name'] |
67 name = os.path.basename(basename) | 70 name = os.path.basename(basename) |
68 ext = attrs['raw_data'] | 71 ext = attrs['raw_data'] |
69 path = '%s%s' % (basename,ext) | 72 path = '%s%s' % (basename, ext) |
70 filename = os.path.basename(path) | 73 filename = os.path.basename(path) |
71 self.spectraDataFiles.append(filename) | 74 self.spectraDataFiles.append(filename) |
72 self.spectraDataNames.append(name) | 75 self.spectraDataNames.append(name) |
73 print ("SpectraData: %s %s" % (name, path)) | 76 print ("SpectraData: %s %s" % (name, path)) |
74 def endElement( self, name): | 77 |
78 def endElement(self, name): | |
75 pass | 79 pass |
76 def characters( self, data): | 80 |
81 def characters(self, data): | |
77 pass | 82 pass |
78 | 83 |
79 parser = xml.sax.make_parser() | 84 parser = xml.sax.make_parser() |
80 parser.setFeature(xml.sax.handler.feature_namespaces, 0) | 85 parser.setFeature(xml.sax.handler.feature_namespaces, 0) |
81 handler = PepXmlHandler() | 86 handler = PepXmlHandler() |
82 parser.setContentHandler( handler ) | 87 parser.setContentHandler(handler) |
83 for ident in args.ident_files: | 88 for ident in args.ident_files: |
84 parser.parse(ident) | 89 parser.parse(ident) |
85 | 90 |
86 spectra_names = handler.spectraDataFiles | 91 spectra_names = handler.spectraDataFiles |
87 | 92 |
88 def best_match(name): | 93 def best_match(name): |
89 if name in spectra_names: | 94 if name in spectra_names: |
90 return name | 95 return name |
91 try: | 96 try: |
92 r = [SequenceMatcher(None, name, spectra_names[x]).ratio() for x in range(len(spectra_names))] | 97 r = [SequenceMatcher(None, name, spectra_names[x]).ratio() |
98 for x in range(len(spectra_names))] | |
93 return spectra_names[r.index(max(r))] | 99 return spectra_names[r.index(max(r))] |
94 except Exception, e: | 100 except Exception, e: |
95 print ("best_match: %s %s" % (name, e)) | 101 print ("best_match: %s %s" % (name, e)) |
96 | 102 |
97 for i,name in enumerate(args.scan_name): | 103 for i, name in enumerate(args.scan_name): |
98 path = args.scan_file[i] if len(args.scan_file) > i else '' | 104 path = args.scan_file[i] if len(args.scan_file) > i else '' |
99 (root, ext) = os.path.splitext(name) | 105 (root, ext) = os.path.splitext(name) |
100 print ("SpectraFile: %s %s" % (name, path)) | 106 print ("SpectraFile: %s %s" % (name, path)) |
101 iname = best_match(name) | 107 iname = best_match(name) |
102 print ("IdentName: %s %s" % (name, iname)) | 108 print ("IdentName: %s %s" % (name, iname)) |
105 print ("%s -> %s" % (iname, path)) | 111 print ("%s -> %s" % (iname, path)) |
106 | 112 |
107 | 113 |
108 if __name__ == "__main__": | 114 if __name__ == "__main__": |
109 __main__() | 115 __main__() |
110 |