Mercurial > repos > jjohnson > spectrast
comparison link_scan_datasets.py @ 3:7f02fc51bddf draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/spectrast commit 379705f578f9a0465f497894c7d2b5f68b6a55e6-dirty
author | jjohnson |
---|---|
date | Wed, 25 Jul 2018 10:58:17 -0400 |
parents | |
children | 274fdc50169b |
comparison
equal
deleted
inserted
replaced
2:e67b0cc10377 | 3:7f02fc51bddf |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from __future__ import print_function | |
4 | |
5 import argparse | |
6 import difflib | |
7 from difflib import SequenceMatcher | |
8 import os | |
9 import os.path | |
10 import sys | |
11 import xml.sax | |
12 | |
13 | |
14 | |
15 def __main__(): | |
16 parser = argparse.ArgumentParser( | |
17 description='link spectrum datasets to the name used' + | |
18 ' in the identification dataset') | |
19 parser.add_argument( | |
20 'ident_files', nargs='+', | |
21 help='Pepxml or mzIdentML') | |
22 parser.add_argument( | |
23 '-n', '--scan_name', default=[], action='append', | |
24 help='Name for scan file') | |
25 parser.add_argument( | |
26 '-f', '--scan_file', default=[], action='append', | |
27 help='Path for scan file') | |
28 args = parser.parse_args() | |
29 | |
30 class MzidHandler( xml.sax.ContentHandler): | |
31 def __init__(self): | |
32 xml.sax.ContentHandler.__init__(self) | |
33 self.spectraDataFiles = [] | |
34 self.spectraDataNames = [] | |
35 self.searchDatabaseFiles = [] | |
36 self.searchDatabaseNames = [] | |
37 def startElement(self, tag, attrs): | |
38 if tag == 'SpectraData': | |
39 id = attrs['id'] | |
40 path = attrs['location'] | |
41 filename = os.path.basename(path) | |
42 name = attrs['name'] if 'name' in attrs else None | |
43 self.spectraDataFiles.append(filename) | |
44 self.spectraDataNames.append(name if name else id) | |
45 print ("SpectraData: %s %s" % (name if name else id, path)) | |
46 if tag == 'SearchDatabase': | |
47 id = attrs['id'] | |
48 path = attrs['location'] | |
49 filename = os.path.basename(path) | |
50 name = attrs['name'] if 'name' in attrs else None | |
51 self.searchDatabaseFiles.append(filename) | |
52 self.searchDatabaseNames.append(name if name else id) | |
53 print ("SearchDatabase: %s %s" % (name if name else id, path)) | |
54 def endElement( self, name): | |
55 pass | |
56 def characters( self, data): | |
57 pass | |
58 | |
59 class PepXmlHandler( xml.sax.ContentHandler): | |
60 def __init__(self): | |
61 xml.sax.ContentHandler.__init__(self) | |
62 self.spectraDataFiles = [] | |
63 self.spectraDataNames = [] | |
64 def startElement(self, tag, attrs): | |
65 if tag == 'msms_run_summary': | |
66 basename = attrs['base_name'] | |
67 name = os.path.basename(basename) | |
68 ext = attrs['raw_data'] | |
69 path = '%s%s' % (basename,ext) | |
70 filename = os.path.basename(path) | |
71 self.spectraDataFiles.append(filename) | |
72 self.spectraDataNames.append(name) | |
73 print ("SpectraData: %s %s" % (name, path)) | |
74 def endElement( self, name): | |
75 pass | |
76 def characters( self, data): | |
77 pass | |
78 | |
79 parser = xml.sax.make_parser() | |
80 parser.setFeature(xml.sax.handler.feature_namespaces, 0) | |
81 handler = PepXmlHandler() | |
82 parser.setContentHandler( handler ) | |
83 for ident in args.ident_files: | |
84 parser.parse(ident) | |
85 | |
86 spectra_names = handler.spectraDataFiles | |
87 | |
88 def best_match(name): | |
89 if name in spectra_names: | |
90 return name | |
91 try: | |
92 r = [SequenceMatcher(None, name, spectra_names[x]).ratio() for x in range(len(spectra_names))] | |
93 return spectra_names[r.index(max(r))] | |
94 except Exception, e: | |
95 print ("best_match: %s %s" % (name, e)) | |
96 | |
97 for i,name in enumerate(args.scan_name): | |
98 path = args.scan_file[i] if len(args.scan_file) > i else '' | |
99 (root, ext) = os.path.splitext(name) | |
100 print ("SpectraFile: %s %s" % (name, path)) | |
101 iname = best_match(name) | |
102 print ("IdentName: %s %s" % (name, iname)) | |
103 if not os.path.exists(iname) and os.path.exists(path): | |
104 os.symlink(path, iname) | |
105 print ("%s -> %s" % (iname, path)) | |
106 | |
107 | |
108 if __name__ == "__main__": | |
109 __main__() | |
110 |