comparison ctd2galaxy.py @ 0:61d9bdb6d519 draft

Uploaded
author holtgrewe
date Thu, 18 Apr 2013 08:03:38 -0400
parents
children 170e48a55078
comparison
equal deleted inserted replaced
-1:000000000000 0:61d9bdb6d519
1 #!/usr/bin/env python
2 """Conversion of the CTD format into Galaxy XML.
3
4 The CTD parser should be reusable but is not in its own module since it is
5 only used here at the moment.
6 """
7
8 import argparse
9 import operator
10 import sys
11 import xml.sax
12 import xml.sax.saxutils
13
14 class CTDFormatException(Exception):
15 """Raised when there is a format error in CTD."""
16
17
18 class CLIElement(object):
19 """Represents a <clielement> tag.
20
21 :ivar option_identifier: with parameters (e.g. --param), empty if argument.
22 :type option_identifier: str
23 :ivar is_list: whether the element is a list.
24 :type is_list: bool
25 :ivar param_node: link to ParametersNode, set after parsing, None if unset
26 :ivar is_list: w or not this element is a list.
27 :type is_list: bool
28 """
29
30 def __init__(self, option_identifier='', mapping_path='', is_list=False):
31 """Initialize object."""
32 self.option_identifier = option_identifier
33 self.param_node = None # Link to ParametersNode, set after parsing.
34 self.mapping_path = mapping_path
35 self.is_list = is_list
36
37 def __str__(self):
38 """String representaiton of CLIElement."""
39 t = (self.option_identifier, self.mapping_path, self.is_list)
40 return 'CLIElement(%s, %s, %s)' % tuple(map(repr, list(t)))
41
42
43 class ParametersNode(object):
44 """Represents a <NODE> tag inside the <PARAMETERS> tags.
45
46 :ivar name: name attribute of the node
47 :ivar description: text for description attribute of the node
48 :ivar value: value attribute of the node
49 :ivar type_: type attribute of the node
50 :ivar tags: tags attribute of the node
51 :ivar supported_formats: supported_format attribute of the node
52 :ivar restrictions: restrictions attribute of the node
53 :ivar path: the path to the node
54 :ivar path: list of strings
55 :ivar parent: link to the parent of the node
56 :ivar children: children of the node
57 :type children: dict with name to node mapping
58 :ivar cli_element: CLIElement that this parameter is mapped to.
59 """
60
61 def __init__(self, kind='', name='', description='', value='', type_='', tags='',
62 restrictions='', supported_formats=''):
63 """Initialize the object."""
64 self.kind = kind
65 self.name = name
66 self.description = description
67 self.value = value
68 self.type_ = type_
69 self.tags = tags
70 self.supported_formats = supported_formats
71 self.restrictions = restrictions
72 self.path = None # root if is None
73 self.parent = None # not set, usually a list
74 self.children = {}
75 self.cli_element = None
76
77 def computePath(self, is_root=True, path=[]):
78 """Compute path entry from parent links.
79
80 :param is_root: whether or not this is the root node
81 :type is_root: bool
82 :param path: path to this node, excluding root
83 :type path: list of strings
84 """
85 self.path = list(path)
86 if not is_root:
87 self.path.append(self.name)
88 if not self.children:
89 return # nothing to do: early exit.
90 for name, child in self.children.items():
91 child.computePath(False, self.path)
92
93 def applyFunc(self, f):
94 """Apply f to self and all children."""
95 f(self)
96 for c in self.children.values():
97 c.applyFunc(f)
98
99 def find(self, path):
100 """Return ParametersNode object at the path below the node."""
101 if not path:
102 return self
103 if not self.children.get(path[0]):
104 return None
105 return self.children[path[0]].find(path[1:])
106
107 def __str__(self):
108 """Return string representation."""
109 t = (self.name, self.description, self.value, self.type_, self.tags,
110 self.supported_formats, self.children, self.path)
111 return 'ParametersNode(%s, %s, %s, %s, %s, %s, %s, path=%s)' % tuple(map(repr, t))
112
113 def __repr__(self):
114 """Return programmatic representation, same as __str__()."""
115 return str(self)
116
117
118 class Tool(object):
119 """Represents the top-level <tool> tag from a CTD file.
120
121 :ivar name: name attribute value
122 :type name: str
123 :ivar executable_name: executableName attribute value
124 :type executable_name: str
125 :ivar version: version attribute value
126 :type version: str
127 :ivar description: description attribute value
128 :type description: str
129 :ivar manual: manual attribute value
130 :type manual: str
131 :ivar doc_url: docurl attribute value
132 :type doc_url: str
133 :ivar category: category attribute value
134 :type category: str
135 :ivar cli_elements: list of CLIElement objects
136 :ivar parameters: root parameters node
137 :type parameters: ParametersNode
138 """
139
140 def __init__(self, name='', executable_name='', version='',
141 description='', manual='', doc_url='',
142 category=''):
143 self.name = name
144 self.executable_name = executable_name
145 self.version = version
146 self.description = description
147 self.manual = manual
148 self.doc_url = doc_url
149 self.category = category
150 self.cli_elements = []
151 self.parameters = None
152
153 def parsingDone(self):
154 """Called after parsing is done.
155
156 The method will compute the paths of the parameter nodes and link the
157 CLIElement objects in self.cli_elements to the ParameterNode objects.
158 """
159 self.parameters.computePath()
160 for ce in self.cli_elements:
161 if not ce.option_identifier:
162 continue # Skip arguments
163 path = ce.mapping_path.split('.')
164 node = self.parameters.find(path)
165 if not node:
166 raise CTDFormatException('Unknown parameter %s' % '.'.join(path))
167 ce.param_node = node
168 node.cli_element = ce
169
170 def __str__(self):
171 t = (self.name, self.executable_name, self.version, self.description,
172 self.manual, self.doc_url, self.category)
173 return 'Tool(%s, %s, %s, %s, %s, %s, %s)' % tuple(map(repr, list(t)))
174
175
176
177 class CTDHandler(xml.sax.handler.ContentHandler):
178 def __init__(self):
179 self.result = None
180 # A stack of tag names that are currently open.
181 self.stack = []
182 # The current parameter to append nodes below.
183 self.parameter_node = None
184
185 def startElement(self, name, attrs):
186 """Handle start of element."""
187 # Maintain a stack of open tags.
188 self.stack.append(name)
189 # Handle the individual cases. The innermost tag is self.stack[-1].
190 if self.stack == ['tool']:
191 # Create the top level Tool object.
192 self.tool = Tool()
193 self.result = self.tool
194 elif self.stack == ['tool', 'cli', 'clielement']:
195 # Create a new CLIElement object for a <clieelement> tag.
196 if not attrs.get('isList'):
197 raise CTDFormatException('No attribute isList in <clielement>.')
198 if attrs.get('optionIdentifier') is None:
199 raise CTDFormatException('no attribute optionIdentifier in <clielement>.')
200 is_list = (attrs.get('isList') == 'false')
201 option_identifier = attrs.get('optionIdentifier')
202 self.tool.cli_elements.append(CLIElement(option_identifier=option_identifier, is_list=is_list))
203 elif self.stack == ['tool', 'cli', 'clielement', 'mapping']:
204 # Handle a <mapping> sub entry of a <clieelement> tag.
205 if not attrs.get('referenceName'):
206 raise CTDFormatException('no attribute referenceName in <mapping>')
207 self.tool.cli_elements[-1].mapping_path = attrs['referenceName']
208 elif self.stack == ['tool', 'PARAMETERS']:
209 # Handle the <PARAMETERS> entry by creating a new top parameters node.
210 self.tool.parameters = ParametersNode(kind='node', name='<root>')
211 self.parameter_node = self.tool.parameters
212 elif self.stack[:2] == ['tool', 'PARAMETERS'] and self.stack[-1] == 'NODE':
213 # Create a new node ParametersNode for the <PARAMETERS> entry.
214 if not attrs.get('name'):
215 raise CTDFormatException('no attribute name in <NODE>')
216 name = attrs.get('name')
217 node = ParametersNode(kind='node', name=name)
218 node.parent = self.parameter_node
219 self.parameter_node.children[name] = node
220 self.parameter_node = node
221 elif self.stack[:2] == ['tool', 'PARAMETERS'] and self.stack[-1] == 'ITEM':
222 # Create a new item ParametersNode for the <ITEM> entry.
223 if not attrs.get('name'):
224 raise CTDFormatException('no attribute name in <ITEM>')
225 name = attrs.get('name')
226 value = attrs.get('value')
227 type_ = attrs.get('type')
228 tags = attrs.get('tags')
229 description = attrs.get('description')
230 restrictions = attrs.get('restrictions')
231 supported_formats = attrs.get('supported_formats')
232 child = ParametersNode(
233 kind='item', name=name, description=description, value=value,
234 type_=type_, tags=tags, supported_formats=supported_formats,
235 restrictions=restrictions)
236 self.parameter_node.children[name] = child
237
238 def endElement(self, name):
239 """Handle closing tag."""
240 # Maintain stack.
241 self.stack.pop()
242 # Go up one node in the parameters tree if </NODE>
243 if name == 'NODE':
244 self.parameter_node = self.parameter_node.parent
245
246 def characters(self, content):
247 """Handle characters in XML file."""
248 if self.stack == ['tool', 'name']:
249 self.tool.name += content
250 elif self.stack == ['tool', 'executableName']:
251 self.tool.executable_name += content
252 elif self.stack == ['tool', 'version']:
253 self.tool.version += content
254 elif self.stack == ['tool', 'description']:
255 self.tool.description += content
256 elif self.stack == ['tool', 'manual']:
257 self.tool.manual += content
258 elif self.stack == ['tool', 'docurl']:
259 self.tool.doc_url += content
260 elif self.stack == ['tool', 'category']:
261 self.tool.category += content
262
263
264 class CTDParser(object):
265 """Parser for CTD files."""
266
267 def __init__(self):
268 self.handler = CTDHandler()
269
270 def parse(self, path):
271 # Parse XML into Tool object.
272 parser = xml.sax.make_parser()
273 parser.setContentHandler(self.handler)
274 parser.parse(path)
275 # Compute paths for tool's parameters.
276 self.handler.result.parsingDone()
277 return self.handler.result
278
279
280 class XMLWriter(object):
281 """Base class for XML writers.
282
283
284 :ivar result: list of strings that are joined for the final XML
285 :ivar indent_level: int with the indentation level
286 """
287
288 def __init__(self):
289 self.result = []
290 self.indent_level = 0
291
292 def indent(self):
293 """Return indentation whitespace."""
294 return ' ' * self.indent_level
295
296 def appendTag(self, tag, text='', args={}):
297 """Append a tag to self.result with text content only or no content at all."""
298 e = xml.sax.saxutils.quoteattr
299 args_str = ' '.join('%s=%s' % (key, e(str(value))) for key, value in args.items())
300 if args_str:
301 args_str = ' '+ args_str
302 vals = {'indent': self.indent(),
303 'tag': tag,
304 'text': text.strip(),
305 'args': args_str}
306 if text:
307 self.result.append('%(indent)s<%(tag)s%(args)s>%(text)s</%(tag)s>\n' % vals)
308 else:
309 self.result.append('%(indent)s<%(tag)s%(args)s />\n' % vals)
310
311 def openTag(self, tag, args={}):
312 """Append an opening tag to self.result."""
313 e = xml.sax.saxutils.quoteattr
314 args_str = ' '.join('%s=%s' % (key, e(str(value))) for key, value in args.items())
315 if args_str:
316 args_str = ' ' + args_str
317 vals = {'indent': self.indent(),
318 'tag': tag,
319 'args': args_str}
320 self.result.append('%(indent)s<%(tag)s%(args)s>\n' % vals)
321
322 def closeTag(self, tag):
323 """Append a closing tag to self.result."""
324 vals = {'indent': self.indent(), 'tag': tag}
325 self.result.append('%(indent)s</%(tag)s>\n' % vals)
326
327 def handleParameters(self, node):
328 """Recursion for appending tags for ParametersNode."""
329 for pn in node.children.values():
330 if pn.kind == 'item':
331 args = {'name': pn.name,
332 'value': pn.value,
333 'type': pn.type_,
334 'description': pn.description,
335 'restrictions': pn.restrictions,
336 'tags': pn.tags}
337 self.appendTag('ITEM', args=args)
338 else: # node.kind == 'node'
339 args = {'name': pn.name,
340 'description': pn.description}
341 self.openTag('NODE', args=args)
342 self.indent_level += 1
343 self.handleParameters(pn)
344 self.indent_level -= 1
345 self.closeTag('NODE')
346
347
348 class CTDWriter(XMLWriter):
349 """Write a Tool to CTD format."""
350
351 def run(self, tool, f):
352 """Write the given Tool to file f."""
353 self.result.append('<?xml version="1.0" encoding="UTF-8"?>\n')
354 self.openTag('tool')
355 self.indent_level += 1
356 self.appendTag('name', tool.name)
357 self.appendTag('executableName', tool.executable_name)
358 self.appendTag('version', tool.version)
359 self.appendTag('description', tool.description)
360 self.appendTag('manual', tool.manual)
361 self.appendTag('docurl', tool.doc_url)
362 self.appendTag('category', tool.category)
363 # <cli> and <clielement> group
364 self.openTag('cli')
365 self.indent_level += 1
366 for ce in tool.cli_elements:
367 self.openTag('clielement', args={'optionIdentifier': ce.option_identifier,
368 'isList': {True: 'true', False: 'false'}[ce.is_list]})
369 self.indent_level += 1
370 self.appendTag('mapping', args={'referenceName': ce.mapping_path})
371 self.indent_level -= 1
372 self.closeTag('clielement')
373 self.indent_level -= 1
374 self.closeTag('cli')
375 # <PARAMETERS>, <NODE>, <ITEM> group
376 self.openTag('PARAMETERS', args={'version': 1.4,
377 'xsi:noNamespaceSchemaLocation': 'http://open-ms.sourceforge.net/schemas/Param_1_4.xsd',
378 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance'})
379 self.indent_level += 1
380 self.handleParameters(tool.parameters)
381 self.indent_level -= 1
382 self.closeTag('PARAMETERS')
383 self.indent_level -= 1
384 self.closeTag('tool')
385 # Write result
386 for x in self.result:
387 f.write(x)
388
389
390 class GalaxyWriter(XMLWriter):
391 """Write a Tool to the Galaxy format."""
392
393 def run(self, tool, f):
394 """Write the given Tool to file f."""
395 self.result.append('<?xml version="1.0" encoding="UTF-8"?>\n')
396 self.openTag('tool', {'id': tool.executable_name, 'name': tool.name})
397 self.indent_level += 1
398 self.addCommandTag(tool)
399 self.appendTag('description', text=tool.description)
400 self.openTag('inputs')
401 self.indent_level += 1
402 tool.parameters.applyFunc(lambda x: self.addInputParam(x))
403 self.indent_level -= 1
404 self.closeTag('inputs')
405 self.openTag('outputs')
406 self.indent_level += 1
407 tool.parameters.applyFunc(lambda x: self.addOutputParam(x))
408 self.indent_level -= 1
409 self.closeTag('outputs')
410 self.openTag('stdio')
411 self.indent_level += 1
412 self.appendTag('exit_code', args={'range': '1:', 'level': 'fatal'})
413 self.appendTag('exit_code', args={'range': ':-1', 'level': 'fatal'})
414 self.indent_level -= 1
415 self.closeTag('stdio')
416 self.indent_level -= 1
417 self.closeTag('tool')
418 # Write result
419 for x in self.result:
420 f.write(x)
421
422 def addInputParam(self, param_node):
423 """Add a ParametersNode object if it is to go to <inputs>."""
424 if param_node.tags and 'output file' in param_node.tags.split(','):
425 return # Skip output files
426 if param_node.kind != 'item':
427 return # Skip if not item.
428 args = {}
429 if param_node.tags and 'input file' in param_node.tags.split(','):
430 args['type'] = 'data'
431 args['format'] = ','.join([x.replace('*', '').replace('.', '')
432 for x in param_node.supported_formats.split(',')])
433 args['name'] = '_'.join(param_node.path).replace('-', '_').replace('.', '_')
434 args['label'] = param_node.description
435 args['type'] = 'data'
436 self.appendTag('param', args=args)
437 else:
438 TYPE_MAP = {
439 'string': 'text',
440 'double': 'float',
441 'int': 'integer'
442 }
443 args['type'] = TYPE_MAP[param_node.type_]
444 args['name'] = '_'.join(param_node.path).replace('-', '_').replace('.', '_')
445 args['label'] = param_node.description
446 if param_node.type_ == 'string' and param_node.restrictions and \
447 sorted(param_node.restrictions.split(',')) == ['false', 'true']:
448 args['type'] = 'boolean'
449 if param_node.value == 'true':
450 args['checked'] = 'true'
451 args['truevalue'] = param_node.cli_element.option_identifier
452 args['falsevalue'] = ''
453 self.appendTag('param', args=args)
454 return
455 args['value'] = param_node.value
456 if param_node.type_ == 'string' and param_node.restrictions:
457 args['type'] = 'select'
458 self.openTag('param', args=args)
459 self.indent_level += 1
460 for v in param_node.restrictions.split(','):
461 self.appendTag('option', v, {'value': v})
462 self.indent_level -= 1
463 self.closeTag('param')
464 else:
465 self.appendTag('param', args=args)
466
467 def addOutputParam(self, param_node):
468 """Add a ParametersNode object if it is to go to <inputs>."""
469 if not param_node.tags or not 'output file' in param_node.tags.split(','):
470 return # Only add for output files.
471 args = {}
472 if '.' in param_node.supported_formats:
473 args['format'] = param_node.supported_formats.split(',')[0].split('.')[-1]
474 else:
475 args['format'] = param_node.supported_formats.split(',')[0].split('*')[-1]
476 args['name'] = '_'.join(param_node.path).replace('-', '_').replace('.', '_')
477 args['label'] = param_node.description
478 self.appendTag('data', args=args)
479
480 def addCommandTag(self, tool):
481 """Write <command> tag to self.result."""
482 lst = []
483 for ce in tool.cli_elements:
484 bool_param = False
485 if ce.param_node.type_ == 'string' and ce.param_node.restrictions and \
486 sorted(ce.param_node.restrictions.split(',')) == ['false', 'true']:
487 bool_param = True
488 if not bool_param and ce.option_identifier:
489 lst.append(ce.option_identifier)
490 # The path mapping is not ideal but should work OK.
491 lst.append('$' + ce.mapping_path.replace('-', '_').replace('.', '_'))
492 txt = [tool.executable_name] + lst
493 self.appendTag('command', text=' '.join(txt))
494
495
496 def main():
497 """Main function."""
498 # Setup argument parser.
499 parser = argparse.ArgumentParser(description='Convert CTD to Galaxy XML')
500 parser.add_argument('-i', '--in-file', metavar='FILE',
501 help='CTD file to read.', dest='in_file',
502 required=True)
503 parser.add_argument('-o', '--out-file', metavar='FILE',
504 help='File to write. Output type depends on extension.',
505 dest='out_file', required=True)
506
507 args = parser.parse_args()
508
509 # Parse input.
510 sys.stderr.write('Parsing %s...\n' % args.in_file)
511 ctd_parser = CTDParser()
512 tool = ctd_parser.parse(args.in_file)
513
514 # Write output.
515 sys.stderr.write('Writing to %s...\n' % args.out_file)
516 if args.out_file.endswith('.ctd'):
517 writer = CTDWriter()
518 else:
519 writer = GalaxyWriter()
520 with open(args.out_file, 'wb') as f:
521 writer.run(tool, f)
522
523 return 0
524
525
526 if __name__ == '__main__':
527 sys.exit(main())