Mercurial > repos > holtgrewe > ngs_roi
changeset 0:61d9bdb6d519 draft
Uploaded
author | holtgrewe |
---|---|
date | Thu, 18 Apr 2013 08:03:38 -0400 |
parents | |
children | 0ac4f6f3d984 |
files | LICENSE README bam2roi.ctd bam2roi.xml ctd2galaxy.py datatypes_conf.xml ngs_roi/__init__.py ngs_roi/app.py ngs_roi/argparse.py ngs_roi/io.py roi_details.py roi_feature_projection.xml roi_filter.sh roi_filter.xml roi_intersect.ctd roi_metrics.Rscript roi_metrics.xml roi_pca.Rscript roi_pca.xml roi_plot_thumbnails.py roi_plot_thumbnails.xml roi_report.py roi_report.xml roi_sort.sh roi_sort.xml roi_table.py roi_table.xml rois.py |
diffstat | 27 files changed, 2506 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,30 @@ +Copyright (c) 2013, Bernd Jagla <bernd.jagla@pasteur.fr>, + Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + Neither the name of the <ORGANIZATION> nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,26 @@ +Dependencies +============ + + * ngsroi R package (shipped with tar.gz) + * Python Cheetah package. + * ngs_roi binaries: + + svn co http://svn.seqan.de/seqan/trunk seqan-trunk + pushd seqan-trunk/sandbox + svn co https://projets.pasteur.fr/.../jagla jagla + popd + mkdir -p seqan-trunk-build/release && cd seqan-trunk-build/release + cmake ../../seqan-trunk + pushd sandbox/jagla/apps/ngs_roi + make + popd + ls bin/* + +Notes +===== + +You have to allow full HTML output for the tool to work properly. + +Edit your univers_wsgi.ini to include the line + + sanitize_all_html = False \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam2roi.ctd Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<tool> + <name>Bam2roi</name> + <executableName>bam2roi</executableName> + <version>0.1</version> + <description>Create ROI from BAM file.</description> + <manual>Calculated consecutive regions of coverage from alignment file IN.bam and write regions of interst to file OUT.roi. Counting is performed over the entire region (including intron and N-regions) based on the CIGAR string of the alignment record. +</manual> + <docurl>http://www.seqan.de</docurl> + <category></category> + <cli> + <clielement optionIdentifier="--verbose" isList="false"> + <mapping referenceName="bam2roi.verbose" /> + </clielement> + <clielement optionIdentifier="--vverbose" isList="false"> + <mapping referenceName="bam2roi.vverbose" /> + </clielement> + <clielement optionIdentifier="--input-file" isList="false"> + <mapping referenceName="bam2roi.input-file" /> + </clielement> + <clielement optionIdentifier="--output-file" isList="false"> + <mapping referenceName="bam2roi.output-file" /> + </clielement> + <clielement optionIdentifier="--strand-specific" isList="false"> + <mapping referenceName="bam2roi.strand-specific" /> + </clielement> + <clielement optionIdentifier="--ignore-pairing" isList="false"> + <mapping referenceName="bam2roi.ignore-pairing" /> + </clielement> + <clielement optionIdentifier="--link-over-skipped" isList="false"> + <mapping referenceName="bam2roi.link-over-skipped" /> + </clielement> + </cli> + <PARAMETERS version="1.4" xsi:noNamespaceSchemaLocation="http://open-ms.sourceforge.net/schemas/Param_1_4.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <NODE name="bam2roi" description="Create ROI from BAM file."> + <ITEM name="verbose" value="false" type="string" description="Verbose mode." restrictions="true,false" /> + <ITEM name="vverbose" value="false" type="string" description="Very verbose mode." restrictions="true,false" /> + <ITEM name="input-file" value="" type="string" description="SAM/BAM formatted file. Must be sorted by coordinate." tags="input file,required" supported_formats="*sam,*bam" /> + <ITEM name="output-file" value="" type="string" description="Output file with regions of interest." tags="output file,required" supported_formats="*roi" /> + <ITEM name="strand-specific" value="false" type="string" description="Calculate strand-specific ROIs (see section Strand Specificness below." restrictions="true,false" /> + <ITEM name="ignore-pairing" value="false" type="string" description="Ignore paired information. Also see Section ROI Creation Details." restrictions="true,false" /> + <ITEM name="link-over-skipped" value="false" type="string" description="Link over skipped bases in the read alignment." restrictions="true,false" /> + </NODE> + </PARAMETERS> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam2roi.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,59 @@ +<?xml version="1.0" encoding="UTF-8"?> +<tool id="bam2roi" name="BAM to ROI"> + <command> + bam2roi + $bam2roi_verbosity + $bam2roi_strand_specific + $bam2roi_ignore_pairing + $bam2roi_link_over_skipped + --input-file $bam2roi_input_file + --output-file $bam2roi_output_file + </command> + + <description>Create ROI from BAM file.</description> + + <!-- + Input Files and Parameters + --> + <inputs> + <param name="bam2roi_input_file" type="data" format="sam,bam" + label="Input File" + help="SAM/BAM formatted file. Must be sorted by coordinate." /> + + <param name="bam2roi_ignore_pairing" type="boolean" falsevalue="" truevalue="--ignore-pairing" + label="Ignore paired information" + help="By default, a whole fragment/template extends a ROI for paired-end data. When selected, reads are treated as in the single-end case." /> + <param name="bam2roi_link_over_skipped" type="boolean" falsevalue="" truevalue="--link-over-skipped" + label="Link over skipped bases" + help="When selected, a ROI continues over skipped bases." /> + <param name="bam2roi_strand_specific" type="boolean" falsevalue="" truevalue="--strand-specific" + label="Compute strand-specific ROIs" + label="When selected, the strands of the reads alignments are considered, e.g. there can be two or more ROIs on different strands that would overlap on the same strand." /> + + <param name="bam2roi_verbosity" type="select" label="Verbosity" force_select="true" /> + <option value="" selected="true">normal</option> + <option value="--verbose">verbose</option> + <option value="--very-verbose">very verbose</option> + </param> + </inputs> + + <!-- + Output Files + --> + <outputs> + <data label="${bam2roi_input_file.name}.roi" name="bam2roi_output_file" format="roi" /> + </outputs> + + <!-- + Recognize errors by return code and not output to stderr. + --> + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + + <!-- + Tool Help + --> + <help>No help yet.</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ctd2galaxy.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,527 @@ +#!/usr/bin/env python +"""Conversion of the CTD format into Galaxy XML. + +The CTD parser should be reusable but is not in its own module since it is +only used here at the moment. +""" + +import argparse +import operator +import sys +import xml.sax +import xml.sax.saxutils + +class CTDFormatException(Exception): + """Raised when there is a format error in CTD.""" + + +class CLIElement(object): + """Represents a <clielement> tag. + + :ivar option_identifier: with parameters (e.g. --param), empty if argument. + :type option_identifier: str + :ivar is_list: whether the element is a list. + :type is_list: bool + :ivar param_node: link to ParametersNode, set after parsing, None if unset + :ivar is_list: w or not this element is a list. + :type is_list: bool + """ + + def __init__(self, option_identifier='', mapping_path='', is_list=False): + """Initialize object.""" + self.option_identifier = option_identifier + self.param_node = None # Link to ParametersNode, set after parsing. + self.mapping_path = mapping_path + self.is_list = is_list + + def __str__(self): + """String representaiton of CLIElement.""" + t = (self.option_identifier, self.mapping_path, self.is_list) + return 'CLIElement(%s, %s, %s)' % tuple(map(repr, list(t))) + + +class ParametersNode(object): + """Represents a <NODE> tag inside the <PARAMETERS> tags. + + :ivar name: name attribute of the node + :ivar description: text for description attribute of the node + :ivar value: value attribute of the node + :ivar type_: type attribute of the node + :ivar tags: tags attribute of the node + :ivar supported_formats: supported_format attribute of the node + :ivar restrictions: restrictions attribute of the node + :ivar path: the path to the node + :ivar path: list of strings + :ivar parent: link to the parent of the node + :ivar children: children of the node + :type children: dict with name to node mapping + :ivar cli_element: CLIElement that this parameter is mapped to. + """ + + def __init__(self, kind='', name='', description='', value='', type_='', tags='', + restrictions='', supported_formats=''): + """Initialize the object.""" + self.kind = kind + self.name = name + self.description = description + self.value = value + self.type_ = type_ + self.tags = tags + self.supported_formats = supported_formats + self.restrictions = restrictions + self.path = None # root if is None + self.parent = None # not set, usually a list + self.children = {} + self.cli_element = None + + def computePath(self, is_root=True, path=[]): + """Compute path entry from parent links. + + :param is_root: whether or not this is the root node + :type is_root: bool + :param path: path to this node, excluding root + :type path: list of strings + """ + self.path = list(path) + if not is_root: + self.path.append(self.name) + if not self.children: + return # nothing to do: early exit. + for name, child in self.children.items(): + child.computePath(False, self.path) + + def applyFunc(self, f): + """Apply f to self and all children.""" + f(self) + for c in self.children.values(): + c.applyFunc(f) + + def find(self, path): + """Return ParametersNode object at the path below the node.""" + if not path: + return self + if not self.children.get(path[0]): + return None + return self.children[path[0]].find(path[1:]) + + def __str__(self): + """Return string representation.""" + t = (self.name, self.description, self.value, self.type_, self.tags, + self.supported_formats, self.children, self.path) + return 'ParametersNode(%s, %s, %s, %s, %s, %s, %s, path=%s)' % tuple(map(repr, t)) + + def __repr__(self): + """Return programmatic representation, same as __str__().""" + return str(self) + + +class Tool(object): + """Represents the top-level <tool> tag from a CTD file. + + :ivar name: name attribute value + :type name: str + :ivar executable_name: executableName attribute value + :type executable_name: str + :ivar version: version attribute value + :type version: str + :ivar description: description attribute value + :type description: str + :ivar manual: manual attribute value + :type manual: str + :ivar doc_url: docurl attribute value + :type doc_url: str + :ivar category: category attribute value + :type category: str + :ivar cli_elements: list of CLIElement objects + :ivar parameters: root parameters node + :type parameters: ParametersNode + """ + + def __init__(self, name='', executable_name='', version='', + description='', manual='', doc_url='', + category=''): + self.name = name + self.executable_name = executable_name + self.version = version + self.description = description + self.manual = manual + self.doc_url = doc_url + self.category = category + self.cli_elements = [] + self.parameters = None + + def parsingDone(self): + """Called after parsing is done. + + The method will compute the paths of the parameter nodes and link the + CLIElement objects in self.cli_elements to the ParameterNode objects. + """ + self.parameters.computePath() + for ce in self.cli_elements: + if not ce.option_identifier: + continue # Skip arguments + path = ce.mapping_path.split('.') + node = self.parameters.find(path) + if not node: + raise CTDFormatException('Unknown parameter %s' % '.'.join(path)) + ce.param_node = node + node.cli_element = ce + + def __str__(self): + t = (self.name, self.executable_name, self.version, self.description, + self.manual, self.doc_url, self.category) + return 'Tool(%s, %s, %s, %s, %s, %s, %s)' % tuple(map(repr, list(t))) + + + +class CTDHandler(xml.sax.handler.ContentHandler): + def __init__(self): + self.result = None + # A stack of tag names that are currently open. + self.stack = [] + # The current parameter to append nodes below. + self.parameter_node = None + + def startElement(self, name, attrs): + """Handle start of element.""" + # Maintain a stack of open tags. + self.stack.append(name) + # Handle the individual cases. The innermost tag is self.stack[-1]. + if self.stack == ['tool']: + # Create the top level Tool object. + self.tool = Tool() + self.result = self.tool + elif self.stack == ['tool', 'cli', 'clielement']: + # Create a new CLIElement object for a <clieelement> tag. + if not attrs.get('isList'): + raise CTDFormatException('No attribute isList in <clielement>.') + if attrs.get('optionIdentifier') is None: + raise CTDFormatException('no attribute optionIdentifier in <clielement>.') + is_list = (attrs.get('isList') == 'false') + option_identifier = attrs.get('optionIdentifier') + self.tool.cli_elements.append(CLIElement(option_identifier=option_identifier, is_list=is_list)) + elif self.stack == ['tool', 'cli', 'clielement', 'mapping']: + # Handle a <mapping> sub entry of a <clieelement> tag. + if not attrs.get('referenceName'): + raise CTDFormatException('no attribute referenceName in <mapping>') + self.tool.cli_elements[-1].mapping_path = attrs['referenceName'] + elif self.stack == ['tool', 'PARAMETERS']: + # Handle the <PARAMETERS> entry by creating a new top parameters node. + self.tool.parameters = ParametersNode(kind='node', name='<root>') + self.parameter_node = self.tool.parameters + elif self.stack[:2] == ['tool', 'PARAMETERS'] and self.stack[-1] == 'NODE': + # Create a new node ParametersNode for the <PARAMETERS> entry. + if not attrs.get('name'): + raise CTDFormatException('no attribute name in <NODE>') + name = attrs.get('name') + node = ParametersNode(kind='node', name=name) + node.parent = self.parameter_node + self.parameter_node.children[name] = node + self.parameter_node = node + elif self.stack[:2] == ['tool', 'PARAMETERS'] and self.stack[-1] == 'ITEM': + # Create a new item ParametersNode for the <ITEM> entry. + if not attrs.get('name'): + raise CTDFormatException('no attribute name in <ITEM>') + name = attrs.get('name') + value = attrs.get('value') + type_ = attrs.get('type') + tags = attrs.get('tags') + description = attrs.get('description') + restrictions = attrs.get('restrictions') + supported_formats = attrs.get('supported_formats') + child = ParametersNode( + kind='item', name=name, description=description, value=value, + type_=type_, tags=tags, supported_formats=supported_formats, + restrictions=restrictions) + self.parameter_node.children[name] = child + + def endElement(self, name): + """Handle closing tag.""" + # Maintain stack. + self.stack.pop() + # Go up one node in the parameters tree if </NODE> + if name == 'NODE': + self.parameter_node = self.parameter_node.parent + + def characters(self, content): + """Handle characters in XML file.""" + if self.stack == ['tool', 'name']: + self.tool.name += content + elif self.stack == ['tool', 'executableName']: + self.tool.executable_name += content + elif self.stack == ['tool', 'version']: + self.tool.version += content + elif self.stack == ['tool', 'description']: + self.tool.description += content + elif self.stack == ['tool', 'manual']: + self.tool.manual += content + elif self.stack == ['tool', 'docurl']: + self.tool.doc_url += content + elif self.stack == ['tool', 'category']: + self.tool.category += content + + +class CTDParser(object): + """Parser for CTD files.""" + + def __init__(self): + self.handler = CTDHandler() + + def parse(self, path): + # Parse XML into Tool object. + parser = xml.sax.make_parser() + parser.setContentHandler(self.handler) + parser.parse(path) + # Compute paths for tool's parameters. + self.handler.result.parsingDone() + return self.handler.result + + +class XMLWriter(object): + """Base class for XML writers. + + + :ivar result: list of strings that are joined for the final XML + :ivar indent_level: int with the indentation level + """ + + def __init__(self): + self.result = [] + self.indent_level = 0 + + def indent(self): + """Return indentation whitespace.""" + return ' ' * self.indent_level + + def appendTag(self, tag, text='', args={}): + """Append a tag to self.result with text content only or no content at all.""" + e = xml.sax.saxutils.quoteattr + args_str = ' '.join('%s=%s' % (key, e(str(value))) for key, value in args.items()) + if args_str: + args_str = ' '+ args_str + vals = {'indent': self.indent(), + 'tag': tag, + 'text': text.strip(), + 'args': args_str} + if text: + self.result.append('%(indent)s<%(tag)s%(args)s>%(text)s</%(tag)s>\n' % vals) + else: + self.result.append('%(indent)s<%(tag)s%(args)s />\n' % vals) + + def openTag(self, tag, args={}): + """Append an opening tag to self.result.""" + e = xml.sax.saxutils.quoteattr + args_str = ' '.join('%s=%s' % (key, e(str(value))) for key, value in args.items()) + if args_str: + args_str = ' ' + args_str + vals = {'indent': self.indent(), + 'tag': tag, + 'args': args_str} + self.result.append('%(indent)s<%(tag)s%(args)s>\n' % vals) + + def closeTag(self, tag): + """Append a closing tag to self.result.""" + vals = {'indent': self.indent(), 'tag': tag} + self.result.append('%(indent)s</%(tag)s>\n' % vals) + + def handleParameters(self, node): + """Recursion for appending tags for ParametersNode.""" + for pn in node.children.values(): + if pn.kind == 'item': + args = {'name': pn.name, + 'value': pn.value, + 'type': pn.type_, + 'description': pn.description, + 'restrictions': pn.restrictions, + 'tags': pn.tags} + self.appendTag('ITEM', args=args) + else: # node.kind == 'node' + args = {'name': pn.name, + 'description': pn.description} + self.openTag('NODE', args=args) + self.indent_level += 1 + self.handleParameters(pn) + self.indent_level -= 1 + self.closeTag('NODE') + + +class CTDWriter(XMLWriter): + """Write a Tool to CTD format.""" + + def run(self, tool, f): + """Write the given Tool to file f.""" + self.result.append('<?xml version="1.0" encoding="UTF-8"?>\n') + self.openTag('tool') + self.indent_level += 1 + self.appendTag('name', tool.name) + self.appendTag('executableName', tool.executable_name) + self.appendTag('version', tool.version) + self.appendTag('description', tool.description) + self.appendTag('manual', tool.manual) + self.appendTag('docurl', tool.doc_url) + self.appendTag('category', tool.category) + # <cli> and <clielement> group + self.openTag('cli') + self.indent_level += 1 + for ce in tool.cli_elements: + self.openTag('clielement', args={'optionIdentifier': ce.option_identifier, + 'isList': {True: 'true', False: 'false'}[ce.is_list]}) + self.indent_level += 1 + self.appendTag('mapping', args={'referenceName': ce.mapping_path}) + self.indent_level -= 1 + self.closeTag('clielement') + self.indent_level -= 1 + self.closeTag('cli') + # <PARAMETERS>, <NODE>, <ITEM> group + self.openTag('PARAMETERS', args={'version': 1.4, + 'xsi:noNamespaceSchemaLocation': 'http://open-ms.sourceforge.net/schemas/Param_1_4.xsd', + 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance'}) + self.indent_level += 1 + self.handleParameters(tool.parameters) + self.indent_level -= 1 + self.closeTag('PARAMETERS') + self.indent_level -= 1 + self.closeTag('tool') + # Write result + for x in self.result: + f.write(x) + + +class GalaxyWriter(XMLWriter): + """Write a Tool to the Galaxy format.""" + + def run(self, tool, f): + """Write the given Tool to file f.""" + self.result.append('<?xml version="1.0" encoding="UTF-8"?>\n') + self.openTag('tool', {'id': tool.executable_name, 'name': tool.name}) + self.indent_level += 1 + self.addCommandTag(tool) + self.appendTag('description', text=tool.description) + self.openTag('inputs') + self.indent_level += 1 + tool.parameters.applyFunc(lambda x: self.addInputParam(x)) + self.indent_level -= 1 + self.closeTag('inputs') + self.openTag('outputs') + self.indent_level += 1 + tool.parameters.applyFunc(lambda x: self.addOutputParam(x)) + self.indent_level -= 1 + self.closeTag('outputs') + self.openTag('stdio') + self.indent_level += 1 + self.appendTag('exit_code', args={'range': '1:', 'level': 'fatal'}) + self.appendTag('exit_code', args={'range': ':-1', 'level': 'fatal'}) + self.indent_level -= 1 + self.closeTag('stdio') + self.indent_level -= 1 + self.closeTag('tool') + # Write result + for x in self.result: + f.write(x) + + def addInputParam(self, param_node): + """Add a ParametersNode object if it is to go to <inputs>.""" + if param_node.tags and 'output file' in param_node.tags.split(','): + return # Skip output files + if param_node.kind != 'item': + return # Skip if not item. + args = {} + if param_node.tags and 'input file' in param_node.tags.split(','): + args['type'] = 'data' + args['format'] = ','.join([x.replace('*', '').replace('.', '') + for x in param_node.supported_formats.split(',')]) + args['name'] = '_'.join(param_node.path).replace('-', '_').replace('.', '_') + args['label'] = param_node.description + args['type'] = 'data' + self.appendTag('param', args=args) + else: + TYPE_MAP = { + 'string': 'text', + 'double': 'float', + 'int': 'integer' + } + args['type'] = TYPE_MAP[param_node.type_] + args['name'] = '_'.join(param_node.path).replace('-', '_').replace('.', '_') + args['label'] = param_node.description + if param_node.type_ == 'string' and param_node.restrictions and \ + sorted(param_node.restrictions.split(',')) == ['false', 'true']: + args['type'] = 'boolean' + if param_node.value == 'true': + args['checked'] = 'true' + args['truevalue'] = param_node.cli_element.option_identifier + args['falsevalue'] = '' + self.appendTag('param', args=args) + return + args['value'] = param_node.value + if param_node.type_ == 'string' and param_node.restrictions: + args['type'] = 'select' + self.openTag('param', args=args) + self.indent_level += 1 + for v in param_node.restrictions.split(','): + self.appendTag('option', v, {'value': v}) + self.indent_level -= 1 + self.closeTag('param') + else: + self.appendTag('param', args=args) + + def addOutputParam(self, param_node): + """Add a ParametersNode object if it is to go to <inputs>.""" + if not param_node.tags or not 'output file' in param_node.tags.split(','): + return # Only add for output files. + args = {} + if '.' in param_node.supported_formats: + args['format'] = param_node.supported_formats.split(',')[0].split('.')[-1] + else: + args['format'] = param_node.supported_formats.split(',')[0].split('*')[-1] + args['name'] = '_'.join(param_node.path).replace('-', '_').replace('.', '_') + args['label'] = param_node.description + self.appendTag('data', args=args) + + def addCommandTag(self, tool): + """Write <command> tag to self.result.""" + lst = [] + for ce in tool.cli_elements: + bool_param = False + if ce.param_node.type_ == 'string' and ce.param_node.restrictions and \ + sorted(ce.param_node.restrictions.split(',')) == ['false', 'true']: + bool_param = True + if not bool_param and ce.option_identifier: + lst.append(ce.option_identifier) + # The path mapping is not ideal but should work OK. + lst.append('$' + ce.mapping_path.replace('-', '_').replace('.', '_')) + txt = [tool.executable_name] + lst + self.appendTag('command', text=' '.join(txt)) + + +def main(): + """Main function.""" + # Setup argument parser. + parser = argparse.ArgumentParser(description='Convert CTD to Galaxy XML') + parser.add_argument('-i', '--in-file', metavar='FILE', + help='CTD file to read.', dest='in_file', + required=True) + parser.add_argument('-o', '--out-file', metavar='FILE', + help='File to write. Output type depends on extension.', + dest='out_file', required=True) + + args = parser.parse_args() + + # Parse input. + sys.stderr.write('Parsing %s...\n' % args.in_file) + ctd_parser = CTDParser() + tool = ctd_parser.parse(args.in_file) + + # Write output. + sys.stderr.write('Writing to %s...\n' % args.out_file) + if args.out_file.endswith('.ctd'): + writer = CTDWriter() + else: + writer = GalaxyWriter() + with open(args.out_file, 'wb') as f: + writer.run(tool, f) + + return 0 + + +if __name__ == '__main__': + sys.exit(main())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,10 @@ +<?xml version="1.0"?> +<!-- +Data types for the NGS ROI repository. +--> +<datatypes> + <registration> + <!-- ROI data type. Seed bam2roi -h for a description of the format. --> + <datatype extension="roi" type="galaxy.datatypes.tabular:Tabular" subclass="True"/> + </registration> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ngs_roi/app.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,64 @@ +#!/usr/bin/env python +"""Support code for writing the NGS ROI report generation apps.""" + +__author__ = 'Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + + +import os +import os.path +import subprocess + + +class App(object): + """Base class with helper functionality for NGS ROI report generators.""" + + def __init__(self, args): + self.args = args + + def prepareOutDir(self): + """Create output directory if necessary.""" + if not os.path.exists(self.args.out_dir): + os.makedirs(self.args.out_dir) + + def buildHref(self, ref, begin_pos, end_pos): + vals = {'ref': ref, 'start_pos': begin_pos + 1, 'end_pos': end_pos} + if self.args.link_type == 'local_igv': + vals['host'] = self.args.igv_host + vals['port'] = self.args.igv_port + return 'http://%(host)s:%(port)d/goto?locus=%(ref)s:%(start_pos)d-%(end_pos)d' % vals + else: # self.args.link_type == 'ucsc' + vals['org'] = self.args.ucsc_org + vals['db'] = self.args.ucsc_db + vals['ref'] = self.args.ucsc_chr_prefix + vals['ref'] + return 'http://genome.ucsc.edu/cgi-bin/hgTracks?org=%(org)s&db=%(db)s&position=%(ref)s:%(start_pos)d-%(end_pos)d' % vals + + +class PlotThumbnailsRunner(object): + """Helper class for running the roi_plot_thumbnails program. + + :ivar args: Arguments as returned by argparse. + """ + + def __init__(self, args): + self.args = args + + def run(self): + cmd_args = ['-if', self.args.in_file, + '-o', os.path.join(self.args.out_dir, 'thumbnail_'), + '--max-rois', self.args.max_rois, + '--max-value', self.args.max_value, + '--num-cols', self.args.num_cols, + '--num-rows', self.args.num_rows, + '--plot-height', self.args.plot_height, + '--plot-width', self.args.plot_width, + '--border-width', self.args.border_width, + '--spacing', self.args.spacing] + cmd_args = ['roi_plot_thumbnails'] + map(str, cmd_args) + #import pdb; pdb.set_trace() + p = subprocess.Popen(cmd_args, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + res = p.wait() + if res: + print 'ERROR', p.stdin, p.stderr + return res
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ngs_roi/argparse.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,99 @@ +#!/usr/bin/env python +"""Helper for ROI tools when using argparse module. + +This module contains helper functions for setup of argparse.ArgumentParser +that is common to multiple report generating apps. +""" + +__author__ = 'Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + + +import os.path + + +def addFileArguments(parser): + """Adds --in-file, --out-file, and --out-dir to argument parser. + + These parameters are used for the input ROI file, the output HTML file and + the directory to write supplementary files to. + """ + group = parser.add_argument_group('Input / Output', + 'Input and output related parameters.') + group.add_argument('--in-file', metavar='ROI', dest='in_file', + required=True, help='ROI file to read') + group.add_argument('--out-file', dest='out_file', required=True, + help='path to output HTML file') + group.add_argument('--out-dir', dest='out_dir', + help='directory to write supplementary files to; ' + 'defaults to path of --out-file.') + + +def applyFileDefaults(args): + """Apply file-related default values (copying paths if not set).""" + if not args.out_dir: + args.out_dir = os.path.dirname(args.out_file) or '.' + + +def addPlotGridArguments(parser, default_plot_height=30, default_plot_width=30): + """Adds arguments related to plot grids. + + This is used for the thumbnail plotting but also for the tables. + """ + group = parser.add_argument_group('Plot Grid Configuration', + 'Arguments for the plot image grid.') + + group.add_argument('--max-rois', dest='max_rois', metavar='NUM', + type=int, default=0, + help='Maximal number of ROIs, 0 for all.') + group.add_argument('--max-value', dest='max_value', metavar='NUM', + type=int, default=0, + help='Largest y value to plot, 0 for all.') + + group.add_argument('--num-rows', dest='num_rows', metavar='ROWS', + type=int, default=50, + help='Number of rows per grid.') + group.add_argument('--num-cols', dest='num_cols', metavar='COLS', + type=int, default=40, + help='Number of columns per grid.') + + group.add_argument('--plot-height', dest='plot_height', metavar='HEIGHT', + type=int, default=default_plot_height, help='Height of one plot in px.') + group.add_argument('--plot-width', dest='plot_width', metavar='WIDTH', + type=int, default=default_plot_width, help='Width of one plot in px.') + + group.add_argument('--border-width', dest='border_width', metavar='WIDTH', + type=int, default=0, help='Border width.') + group.add_argument('--spacing', dest='spacing', metavar='SPACING', + type=int, default=2, help='Spacing.') + + +def addLinkArguments(parser): + """Adds arguments related to link creation. + + These parameters control the created links to local IGV browser HTTP + remote control or the UCSC genome browser. + """ + group = parser.add_argument_group('HTML Links', 'Arguments for HTML link creation.') + + group.add_argument('--link-target', dest='link_target', metavar='TARGET', + default='_blank', choices=['_blank', '_top'], + help='Select the link target to create (_blank or _top).') + + group.add_argument('--link-type', dest='link_type', metavar='TARGET', + default='local_igv', choices=['local_igv', 'ucsc'], + help='Select the type of links to create. One of ' + '"local_igv" and "ucsc".') + + group.add_argument('--igv-host', dest='igv_host', metavar='HOST', + default='localhost', help='Host for IGV link.') + group.add_argument('--igv-port', dest='igv_port', metavar='PORT', + type=int, default='60151', help='Port for IGV link.') + + group.add_argument('--ucsc-org', dest='ucsc_org', metavar='ORG', + default='human', help='Organism for UCSC browser link.') + group.add_argument('--ucsc-db', dest='ucsc_db', metavar='DB', + default='hg18', help='Assembly version for UCSC browser link.') + group.add_argument('--ucsc-chr-prefix', dest='ucsc_chr_prefix', metavar='PREFIX', + default='', help='Prefix for chromosome names in UCSC browser.')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ngs_roi/io.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,140 @@ +#!/usr/bin/env python +"""Input / Output Routines for ROI files. + +You can load the records sequentially one by one by iterating over a +RoiFile object: + +f1 = ngs_roi.io.RoiFile('in.roi') +for r in f1: + print r.ref, r.start_pos, r.end_pos + +f2 = ngs_roi.io.RoiFile('in.roi') +print f1.next().ref +print f1.next().ref +print f1.next().ref + +Alternatively, you can load all or a subset of the records: + +ten_records = ngs_roi.io.load('in.roi', 10) # first ten +all_records = ngs_roi.io.load('in.roi') +""" + +__author__ = 'Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + + +class RoiRecord(object): + """Represent one record in a ROI file. + + :ivar ref:name of reference + :type ref:str + :ivar start_pos:0-based start position + :type start_pos:int + :ivar end_pos:0-based end position + :type end_pos:int + :ivar region_name:name of the region + :type region_name:str + :ivar region_length:length of the region + :type region_length:int + :ivar strand:strand of the region, one of ['+', '-'] + :type strand:str + :ivar max_count:highest coverage + :type max_count:int + :ivar data:values of the extended data fields + :type data:list of str + :ivar points:the coverage over the region length + :type points:list of ints + + :ivar data_keys:list with key names for the data fields + :type data_keys:list of str or None + """ + + def __init__(self, ref, start_pos, end_pos, region_name, region_length, + strand, max_count, data, points, data_keys=None): + """Initialize RoiRecord.""" + self.ref = ref + self.start_pos = start_pos + self.end_pos = end_pos + self.strand = strand + self.region_length = region_length + self.region_name = region_name + self.max_count = max_count + self.data = data + self.points = points + self.data_keys = data_keys + + def __str__(self): + return 'RoiRecord(%s, %s, %s, %s, %s, %s, %s, %s, len([...])==%s, %s)' % \ + (repr(self.ref), self.start_pos, self.end_pos, + self.region_name, self.region_length, repr(self.strand), + self.max_count, self.data, len(self.points), self.data_keys) + def __repr__(self): + return self.__str__() + + +class RoiFile(object): + """File of ROI records. + + Can be used as an iterator. + + :ivar data_keys:Keys of additional data + :type data_keys:list of str + """ + + def __init__(self, path): + """Open ROI file and load header.""" + self.path = path + self.f = open(path, 'rb') + self.line = None + self.data_keys = self._loadKeys(path) + + def _loadKeys(self, path): + """Load keys from header, skipping comments.""" + while True: + self.line = self.f.readline() + if self.line == '': # EOF + return None + if self.line.startswith('##'): + keys = self.line[1:].strip().split('\t') + self.line = self.f.readline() + return keys[7:-1] + if self.line.startswith('#'): + continue # comment + break + + def __iter__(self): + """Return iterator (self).""" + return self + + def next(self): + """Return next record.""" + if self.line == '': # EOF + raise StopIteration + l = self.line + self.line = self.f.readline() + return self._buildRecord(l) + + def _buildRecord(self, line): + """Build RoiRecord from ROI line.""" + vals = line.split() + region_length = int(vals[4]) + data = vals[7:-1] + points = [int(x) for x in vals[-1].split(',')] + return RoiRecord(vals[0], int(vals[1]) - 1, int(vals[2]), vals[3], + region_length, vals[5], int(vals[6]), data, points, + self.data_keys) + + + +def load(path, max_count=0): + """Load ROI file and return it as a list of RoiRecord objects. + + NA values are translated to 0. + """ + result = [] + for i, x in enumerate(RoiFile(path)): + if max_count > 0 and i >= max_count: + break + result.append(x) + return result
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_details.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,144 @@ +#!/usr/bin/env python +"""Generation of detailed ROI reports with larger plots. + +This report generation works for hundred of ROIs. +""" + +import argparse +import math +import os.path +import sys + +import Cheetah.Template +import matplotlib.pyplot as plt + +import rois + +PAGE_TPL = """ +<html> + <head><title>ROI Table</title></head> + <body> + <h1>Detailed ROI Report</h1> + #for i, roi in enumerate($records) + <h2>${roi.ref}:${roi.start_pos + 1}-${roi.end_pos+1}</h2> + <img src="${args.out_prefix}${i}.png" /> + <dl> + <dt>chr:start-end name length strand</dt> + <dd>${roi.ref}:${roi.start_pos}-${roi.end_pos} ${roi.region_name} ${roi.region_length} ${roi.strand}</dd> + <dt>metrics</dt> + <dd>#for j, key in enumerate($data_keys)#$key: ${roi.data[$j]}, #end for#</dd> + </dl> + #end for + </body> +</html> +""" + +class DetailedRoiGenerator(object): + """Generate detailed ROI report. + + :ivar args:Arguments from the comment line. + """ + + def __init__(self, args): + self.args = args + + def run(self): + """Run report generation, return status code. + + :return: integer with the result. + """ + print >>sys.stderr, 'Loading ROI' + keys, records = rois.loadRoi(self.args.in_file, self.args.max_rois) + + self.writeHtml(keys, records) + self.writePlots(records) + return 0 + + def writePlots(self, records): + for i, roi in enumerate(records): + file_name = '%s%d.png' % (os.path.join(self.args.out_dir, self.args.out_prefix), i) + print >>sys.stderr, 'Writing plot %s' % file_name + plt.figure(figsize=(6, 4)) + plt.gcf().subplots_adjust(bottom=0.1, left=0.1) + plt.plot(roi.points) + plt.ylabel('coverage') + plt.xlabel('ROI pos') + plt.savefig(file_name) + + def writeHtml(self, keys, records): + file_name = self.args.out_file + print >>sys.stderr, 'Writing HTML file %s' % file_name + + vals = {'args': self.args, 'records': records, 'data_keys': keys} + t = Cheetah.Template.Template(PAGE_TPL, searchList=vals) + + with open(file_name, 'wb') as f: + f.write(str(t)) + + +def main(): + parser = argparse.ArgumentParser(description='Plot ROI file.') + parser.add_argument('-o', '--out-file', dest='out_file', metavar='PATH', + help='Prefix of output HTML file. The tool will create ' + 'PNG files parallel to the HTML file.', + default='roi_plots.html') + parser.add_argument('-d', '--out-dir', dest='out_dir', metavar='DIR', + help='Directory to write images to. By default ' + 'this is the same as the directory for --out-file.', + default=None) + parser.add_argument('-p', '--out-prefix', dest='out_prefix', metavar='DIR', + help='Prefix of output PNG files.', default='roi_plot_grid') + parser.add_argument('-i', '--in-file', dest='in_file', metavar='FILE', + required=True, help='Path to ROI file to read.') + + group = parser.add_argument_group('Plot Configuration', 'Arguments for the plot images.') + + group.add_argument('--max-rois', dest='max_rois', metavar='NUM', + type=int, default=0, + help='Maximal number of ROIs, 0 for all.') + + group.add_argument('--num-rows', dest='num_rows', metavar='ROWS', + type=int, default=50, + help='Number of rows per grid.') + group.add_argument('--num-cols', dest='num_cols', metavar='COLS', + type=int, default=40, + help='Number of columns per grid.') + + group.add_argument('--plot-height', dest='plot_height', metavar='HEIGHT', + type=int, default=30, help='Height of one plot in px.') + group.add_argument('--plot-width', dest='plot_width', metavar='WIDTH', + type=int, default=30, help='Width of one plot in px.') + + group = parser.add_argument_group('HTML Links', 'Arguments for HTML link creation.') + + group.add_argument('--link-target', dest='link_target', metavar='TARGET', + default='_blank', choices=['_blank', '_top'], + help='Select the link target to create (_blank or _top).') + + group.add_argument('--link-type', dest='link_type', metavar='TARGET', + default='local_igv', choices=['local_igv', 'ucsc'], + help='Select the type of links to create. One of ' + '"local_igv" and "ucsc".') + + group.add_argument('--igv-host', dest='igv_host', metavar='HOST', + default='localhost', help='Host for IGV link.') + group.add_argument('--igv-port', dest='igv_port', metavar='PORT', + type=int, default='60151', help='Port for IGV link.') + + group.add_argument('--ucsc-org', dest='ucsc_org', metavar='ORG', + default='human', help='Organism for UCSC browser link.') + group.add_argument('--ucsc-db', dest='ucsc_db', metavar='DB', + default='hg18', help='Assembly version for UCSC browser link.') + group.add_argument('--ucsc-chr-prefix', dest='ucsc_chr_prefix', metavar='PREFIX', + default='', help='Prefix for chromosome names in UCSC browser.') + + args = parser.parse_args() + if not args.out_dir: + args.out_dir = os.path.dirname(args.out_file) or '.' + print args + app = DetailedRoiGenerator(args) + return app.run() + + +if __name__ == '__main__': + sys.exit(main())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_feature_projection.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,79 @@ +<?xml version="1.0" encoding="UTF-8"?> +<tool id="roi_feature_projection" name="ROI Feature Projection"> + <description>Intersect ROIs with GFF/BED</description> + + <command> + <!-- First, create link for the input files --> + #if $roi_intersect_genome# ln -s ${roi_intersect_genome} ${roi_intersect_genome}.${roi_intersect_genome.ext}; #end if + #if $roi_intersect_in_roi# ln -s ${roi_intersect_in_roi} ${roi_intersect_in_roi}.${roi_intersect_in_roi.ext}; #end if + #if $roi_intersect_out_roi# touch ${roi_intersect_out_roi}; ln -s ${roi_intersect_out_roi} ${roi_intersect_out_roi}.roi; #end if + #if $roi_intersect_in_features # ln -s ${roi_intersect_in_features} ${roi_intersect_in_features}.${roi_intersect_in_features.ext}; #end if + + <!-- Call the feature projection on the links --> + roi_intersect $roi_intersect_strand_specific + --in-roi ${roi_intersect_in_roi}.${roi_intersect_in_roi.ext} + --in-features ${roi_intersect_in_features}.${roi_intersect_in_features.ext} + --out-roi ${roi_intersect_out_roi}.roi + --mode $roi_intersect_mode + #if $roi_intersect_genome# --genome ${roi_intersect_genome}.${roi_intersect_genome.ext} #end if + #if $verbosity # $verbosity #end if + #if $roi_intersect_gff_type # --gff-type $roi_intersect_gff_type #end if + #if $roi_intersect_gff_group_by# --gff-group-by $roi_intersect_gff_group_by #end if + ; + + <!-- Get rid of the links again --> + #if $roi_intersect_genome# rm -f ${roi_intersect_genome}.${roi_intersect_genome.ext}; #end if + #if $roi_intersect_in_roi# rm -f ${roi_intersect_in_roi}.${roi_intersect_in_roi.ext}; #end if + #if $roi_intersect_in_roi# rm -f ${roi_intersect_out_roi}.roi; #end if + #if $roi_intersect_in_features# rm -f ${roi_intersect_in_features}.${roi_intersect_in_features.ext}; #end if + </command> + + <!-- + Input Files and Parameters + --> + <inputs> + <param label="BED, GFF, or GTF file to read." type="data" name="roi_intersect_in_features" format="bed,gff,gtf" /> + <param label="ROI file to read." type="data" name="roi_intersect_in_roi" format="roi" /> + <param label="Path to FASTA file with genome; optional. When given, this is used for computing the overall region's C+G content." type="data" name="roi_intersect_genome" format="fasta,fa" optional="true" /> + + <param type="select" name="verbosity"> + <option value="--quiet">verbose</option> + <option value="" selected="true">normal</option> + <option value="--verbose">verbose</option> + <option value="--very-verbose">very verbose</option> + </param> + + <param type="text" name="roi_intersect_gff_type" value="" label="The GFF/GTF record type (value of third column) to keep. Keep all if not set or input file type is not GFF/GTF." /> + <param type="text" name="roi_intersect_gff_group_by" value="" label='The GFF/GTF tag to use for grouping, e.g. "Parent", "transcript_id". No grouping if empty. When using the grouping feature, the --mode is automatically set to projection.' /> + <param falsevalue="" truevalue="--strand-specific" type="boolean" name="roi_intersect_strand_specific" label="Enable strand-specific mode if set." /> + + <param type="select" name="roi_intersect_mode" value="projection" label="The mode in which to combine the ROI and BED/GFF file. See section Combination Modes below for details."> + <option value="projection" selected="true">projection</option> + <option value="intersection">intersection</option> + <option value="union">union</option> + <option value="difference">difference</option> + </param> + </inputs> + + <!-- + Recognize errors by return code and not output to stderr. + --> + + <outputs> + <data label="${roi_intersect_in_roi} & ${roi_intersect_in_features}" + name="roi_intersect_out_roi" format="roi" /> + </outputs> + + <!-- + Recognize errors by return code and not output to stderr. + --> + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + + <!-- + Tool Help + --> + <help>No help yet.</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_filter.sh Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,79 @@ +#!/bin/bash + +# Sorting of ROI files. +# +# USAGE: sort_filter.sh [-n] -c COL -o OP -v VALUE <IN.roi >OUT.roi +# +# Read lines from IN.roi, write header lines. Otherwise, check that the +# given column is in relation OP to the value and only write if this is +# fulfilled. Use -n to compare as number. + +# Maximal number of header lines. +MAX_HEADER=1000 + +# The arguments will go here. +COL= +OP= +VALUE= +TXT=1 + +# Parse option values. +while getopts "c:o:v:n" opt; do + case $opt in + c) + COL=$OPTARG + ;; + o) + case $OPTARG in + eq) + OP="==" + ;; + geq) + OP=">=" + ;; + leq) + OP="<=" + ;; + gt) + OP=">" + ;; + lt) + OP="<" + ;; + neq) + OP="!=" + ;; + \?) + echo "Invalid operator $OPTARG" >&2 + exit 1 + ;; + esac + ;; + v) + VALUE=$OPTARG + ;; + n) + TXT=0 + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# Check that -i or -o are given. +if [[ "$COL" == "" || "$OP" == "" || "$VALUE" == "" ]]; then + echo "Missing option -c, -o, or -v" >&2 + exit 1 +fi + +if [[ "x$TXT" == "x1" ]]; then + VALUE="\"$VALUE\"" +fi + +# Execute filtering. +#echo awk "/^#/ { print \$0 } !/^#/ { if (\$$COL $OP $VALUE) print \$0 }" >&2 +awk "/^#/ { print \$0 } !/^#/ { if (\$$COL $OP $VALUE) print \$0 }" + +exit $?
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_filter.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,43 @@ +<?xml version="1.0"?> +<tool id="roi_filter" name="ROI Filter"> + <description>Filter ROI files</description> + + <command interpreter="bash"> + roi_filter.sh $numeric -c $column -o $operator -v $value <$input >$output + </command> + + <!-- + Input Files and Parameters + --> + <inputs> + <param name="input" format="roi" type="data" label="ROI file to filter." /> + + <param name="column" type="integer" label="column (starts with 1)" value="5" /> + + <param name="operator" type="select" label="operator"> + <option value="gt" selected="true">></option> + <option value="lt"><</option> + <option value="geq">>=</option> + <option value="leq"><=</option> + <option value="eq">==</option> + <option value="neq">!=</option> + </param> + + <param name="value" type="text" label="value" value="100" /> + + <param name="numeric" type="boolean" label="compare as number" checked="true" truevalue="-n" falsevalue="" /> + </inputs> + + <!-- + Recognize errors by return code and not output to stderr. + --> + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + + <!-- + Tool Help + --> + <help>No help yet.</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_intersect.ctd Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,61 @@ +<?xml version="1.0" encoding="UTF-8"?> +<tool> + <name>RoiIntersect</name> + <executableName>roi_intersect</executableName> + <version>0.1</version> + <description>Region Of Interest Intersection.</description> + <manual>Compute the intersection of a ROI file and regions from a BED or GFF file. The result is a ROI file where each interval from the BED/GFF/GTF file that overlapped with one input ROI file is a region of interest, with the coverage counts projected to the new region of interest. +</manual> + <docurl>http://www.seqan.de</docurl> + <category></category> + <cli> + <clielement optionIdentifier="--quiet" isList="false"> + <mapping referenceName="roi_intersect.quiet" /> + </clielement> + <clielement optionIdentifier="--verbose" isList="false"> + <mapping referenceName="roi_intersect.verbose" /> + </clielement> + <clielement optionIdentifier="--very-verbose" isList="false"> + <mapping referenceName="roi_intersect.very-verbose" /> + </clielement> + <clielement optionIdentifier="--in-roi" isList="false"> + <mapping referenceName="roi_intersect.in-roi" /> + </clielement> + <clielement optionIdentifier="--in-features" isList="false"> + <mapping referenceName="roi_intersect.in-features" /> + </clielement> + <clielement optionIdentifier="--out-roi" isList="false"> + <mapping referenceName="roi_intersect.out-roi" /> + </clielement> + <clielement optionIdentifier="--genome" isList="false"> + <mapping referenceName="roi_intersect.genome" /> + </clielement> + <clielement optionIdentifier="--mode" isList="false"> + <mapping referenceName="roi_intersect.mode" /> + </clielement> + <clielement optionIdentifier="--strand-specific" isList="false"> + <mapping referenceName="roi_intersect.strand-specific" /> + </clielement> + <clielement optionIdentifier="--gff-type" isList="false"> + <mapping referenceName="roi_intersect.gff-type" /> + </clielement> + <clielement optionIdentifier="--gff-group-by" isList="false"> + <mapping referenceName="roi_intersect.gff-group-by" /> + </clielement> + </cli> + <PARAMETERS version="1.4" xsi:noNamespaceSchemaLocation="http://open-ms.sourceforge.net/schemas/Param_1_4.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <NODE name="roi_intersect" description="Region Of Interest Intersection."> + <ITEM name="quiet" value="false" type="string" description="Set verbosity to a minimum." restrictions="true,false" /> + <ITEM name="verbose" value="false" type="string" description="Enable verbose output." restrictions="true,false" /> + <ITEM name="very-verbose" value="false" type="string" description="Enable very verbose output." restrictions="true,false" /> + <ITEM name="in-roi" value="" type="string" description="ROI file to read." tags="input file,required" supported_formats="*roi" /> + <ITEM name="in-features" value="" type="string" description="BED, GFF, or GTF file to read." tags="input file" supported_formats="*bed,*gff,*gtf" /> + <ITEM name="out-roi" value="" type="string" description="ROI file to write." tags="output file,required" supported_formats="*roi" /> + <ITEM name="genome" value="" type="string" description="Path to FASTA file with genome; optional. When given, this is used for computing the overall region's C+G content." tags="input file" supported_formats="*fasta,*fa" /> + <ITEM name="mode" value="projection" type="string" description="The mode in which to combine the ROI and BED/GFF file. See section Combination Modes below for details." restrictions="intersection,projection,union,difference" /> + <ITEM name="strand-specific" value="false" type="string" description="Enable strand-specific mode if set." restrictions="true,false" /> + <ITEM name="gff-type" value="" type="string" description="The GFF/GTF record type (value of third column) to keep. Keep all if not set or input file type is not GFF/GTF." /> + <ITEM name="gff-group-by" value="" type="string" description="The GFF/GTF tag to use for grouping, e.g. "Parent", "transcript_id". No grouping if empty. When using the grouping feature, the --mode is automatically set to projection." /> + </NODE> + </PARAMETERS> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_metrics.Rscript Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,103 @@ +#!/usr/bin/env Rscript + +# Compute metrics (all except PCA) for ROI files. +# +# USAGE: roi_metrics.Rscript -i IN.roi -o OUT.roi + +# TODO(holtgrew): Bernd, can you give the English names of the metrics below in the comments? + +# Author: Bernd Jagla <bernd.jagla@pasteur.fr> +# Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de> + +library('getopt') # parsing args +library('ngsroi') # ROI I/O +library('boot') # k3.linear() + +# ---------------------------------------------------------------------------- +# Command Line Parsing +# ---------------------------------------------------------------------------- + +# Parse command line options. +spec = matrix(c( + "help", "h", 0, "logical", + "infile", "i", 1, "character", + "outfile", "o", 1, "character" +), byrow=TRUE, ncol=4); +opt = getopt(spec); + +# If help was asked for print a friendly message and exit with a non-zero +# error code. +if (!is.null(opt$help) ) +{ + cat(getopt(spec, usage=TRUE)); + q(status=1); +} + +# Check arguments. +if (is.null(opt$infile)) +{ + write("-i/--infile missing.", stderr()); + q(status=1) +} +if (is.null(opt$out)) +{ + write("-o/--outfile missing.", stderr()); + q(status=1) +} + +# ---------------------------------------------------------------------------- +# Compute metrics. +# ---------------------------------------------------------------------------- + +# Read input files.x +roi = readROI(opt$infile) + +# Compute some basic statistics. +roi$min = as.numeric(lapply(roi$counts, min)) +roi$median = as.numeric(lapply(roi$counts, median)) +roi$mean = as.numeric(lapply(roi$counts, mean)) +roi$quantile75 = as.numeric(lapply(roi$counts, quantile, probs=0.75)) +roi$quantile95 = as.numeric(lapply(roi$counts, quantile, probs=0.95)) + +# Compute area under curve metric. +aoc <- function(v){ + m = max(v) + return (sum(v/m)/length(v)) +} +roi$aoc = as.numeric(lapply(roi$counts, aoc)) + +# Compute xreaXX metric. +xreaXX <- function(v){ + vo=order(v) + mx = max(v) + mi = min(v) + if((mx-mi)==0) return(0) + vn=(v-mi)/(mx-mi) + x=c(1:length(v))/length(v) + vno=vn[vo] + return(sum(x-vno)/length(v)) +} +roi$xreaXX = as.numeric(lapply(roi$counts, xreaXX)) + +# Compute r3linear metric. +roi$r3linear = as.numeric(lapply(roi$counts, k3.linear)) + +# Compute distMax3p metric. +distMax3p <- function(v) +{ + return(length(v)-which.max(rev(v))+1) +} +roi$distMax3p = as.numeric(lapply(roi$count, distMax3p)) + +# Compute distMax5p metric. +distMax5p <- function(v) +{ + return (which.max(v)) +} +roi$distMax5p = as.numeric(lapply(roi$count, distMax5p)) + +# Write output. +writeROI(roi, opt$outfile) + +# Quit with success code. +q(status=0) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_metrics.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,37 @@ +<?xml version="1.0"?> +<tool id="roi_metrics" name="ROI Metrics Computation"> + <description>Compute metrics for records in an ROI file.</description> + + <command interpreter="Rscript"> + roi_metrics.Rscript -i $input -o $output + </command> + + <!-- + Input Files and Parameters + --> + <inputs> + <param name="input" format="roi" type="data" + label="input ROI" + help="ROI file to compute metrics for." /> + </inputs> + + <!-- + Output Files + --> + <outputs> + <data name="output" format="roi" label="${input.name} +metrics" /> + </outputs> + + <!-- + Recognize errors by return code and not output to stderr. + --> + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + + <!-- + Tool Help + --> + <help>No help yet.</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_pca.Rscript Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,66 @@ +#!/usr/bin/env Rscript + +# Compute PCA metric for ROI files. +# +# USAGE: roi_pca.Rscript -i IN.roi -o OUT.roi + +# TODO(holtgrew): Bernd, can you give the English names of the metrics below in the comments? + +# Author: Bernd Jagla <bernd.jagla@pasteur.fr> +# Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de> + +library('getopt') # parsing args +library('ngsroi') # ROI I/O +library('boot') # k3.linear() + +# ---------------------------------------------------------------------------- +# Command Line Parsing +# ---------------------------------------------------------------------------- + +# Parse command line options. +spec = matrix(c( + "help", "h", 0, "logical", + "infile", "i", 1, "character", + "outfile", "o", 1, "character" +), byrow=TRUE, ncol=4); +opt = getopt(spec); + +# If help was asked for print a friendly message and exit with a non-zero +# error code. +if (!is.null(opt$help) ) +{ + cat(getopt(spec, usage=TRUE)); + q(status=1); +} + +# Check arguments. +if (is.null(opt$infile)) +{ + write("-i/--infile missing.", stderr()); + q(status=1) +} +if (is.null(opt$out)) +{ + write("-o/--outfile missing.", stderr()); + q(status=1) +} + +# ---------------------------------------------------------------------------- +# Compute metrics. +# ---------------------------------------------------------------------------- + +# Read input files.x +roi = readROI(opt$infile) + +# Compute PCA. +data=roi[,8:length(roi)-1] +pca=princomp(data, na.action=na.exclude) +prj=predict(pca, data) +roi$pca1=prj[,1] +roi$pca2=prj[,2] + +# Write output. +writeROI(roi, opt$outfile) + +# Quit with success code. +q(status=0) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_pca.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,35 @@ +<?xml version="1.0"?> +<tool id="roi_pca" name="ROI PCA Computation"> + <description>Compute PCA-based metric for records in an ROI file.</description> + + <command interpreter="Rscript"> + roi_pca.Rscript -i $input -o $output + </command> + + <!-- + Input Files and Parameters + --> + <inputs> + <param name="input" format="roi" type="data" label="ROI file to compute metrics for." /> + </inputs> + + <!-- + Output Files + --> + <outputs> + <data name="output" format="roi" label="${input.name} + PCA" /> + </outputs> + + <!-- + Recognize errors by return code and not output to stderr. + --> + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + + <!-- + Tool Help + --> + <help>No help yet.</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_plot_thumbnails.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,188 @@ +#!/usr/bin/env python +"""Thumbnail Plot Generator. + +This report generator uses the binary roi_plot_thumbnails (C++ program, must +be in PATH) to generate many PNG images with small ROI plots. It then creates +a HTML file that includes the PNG and adds an overlay image link map such that +when the user clicks on a plot, she is transfered to the according position in +a local IGV or the UCSC genome browser, for example. +""" + +__author__ = 'Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + +# TODO(holtgrew): Use Cheetah templates to generate HTML. +# TODO(holtgrew): Call roi_plot_thumbnails from Python script. +# TODO(holtgrew): from __future__ use print_function + + +import argparse +import math +import os.path +import sys + +import ngs_roi.app +import ngs_roi.argparse +import ngs_roi.io + + +class LinkRegion(object): + """Region on picture with genomic interval.""" + + def __init__(self, x1, y1, x2, y2, ref, begin_pos, end_pos): + self.x1 = x1 + self.x2 = x2 + self.y1 = y1 + self.y2 = y2 + self.ref = ref + self.begin_pos = begin_pos + self.end_pos = end_pos + + +class RoiPlotGrid(object): + """A grid of ROI plots. + + width -- width one one plot + height -- height of one plot + """ + + # Border in each direction. + BORDER = 0 + # Spacing between plots. + SPACE = 2 + + def __init__(self, width, height, columns, rows, bg_plot=(0, 0, 0, 10), + frame_col=(80, 80, 80, 255), fg_col=(0, 0, 0, 255)): + # Properties of the grid. + self.width = float(width) + self.height = float(height) + self.columns = columns + self.rows = rows + # State. + self.idx = 0 # currently drawn plot + self.canvas_width = 2 * self.BORDER + columns * width + (columns - 1) * self.SPACE + self.canvas_height = 2 * self.BORDER + rows * height + (rows - 1) * self.SPACE + # Colors. + self.bg_plot = bg_plot + self.frame_col = frame_col + self.fg_color = fg_col + # List of link region for image map. + self.link_regions = [] + + def plotStart(self, idx): + """Return pair with start coordinates of plot.""" + row = idx / self.columns + col = idx % self.columns + x = self.BORDER + col * (self.width + self.SPACE) + y = self.BORDER + row * (self.height + self.SPACE) + return x, y + + def plotRecord(self, record): + """Register plotting of a record.""" + start_x, start_y = self.plotStart(self.idx) + self.idx += 1 + # Register link region. + self.link_regions.append(LinkRegion( + start_x, start_y, start_x + self.width, start_y + self.height, + record.ref, record.start_pos, record.end_pos)) + + +class GridLinks(object): + """Link information for one grid.""" + + def __init__(self, file_name, link_regions): + self.file_name = file_name + self.link_regions = link_regions + + +class PlotThumbnailsApp(ngs_roi.app.App): + def __init__(self, args): + # Call parent's constructor. + ngs_roi.app.App.__init__(self, args) + self.prepareOutDir() + # Initialize members. + self.grid = None + self.plot_idx = 0 + self.grid_links = [] + + def run(self): + # Load ROI records. + print >>sys.stderr, 'Loading ROI' + + # Create plots. + runner = ngs_roi.app.PlotThumbnailsRunner(self.args) + runner.run() + + # Create HTML. + print >>sys.stderr, 'Creating HTML...' + num_plots = self.args.num_cols * self.args.num_rows # plots on grid + for i, roi in enumerate(ngs_roi.io.RoiFile(self.args.in_file)): + if self.args.max_rois > 0 and i >= self.args.max_rois: + break + # Write out old grid (if any) and create new one. + if i % num_plots == 0: + if self.grid: + print >>sys.stderr, ' Writing plot %d...' % self.plot_idx + self.writeGrid() + self.grid = RoiPlotGrid(self.args.plot_width, self.args.plot_height, + self.args.num_cols, self.args.num_rows) + # Put the next plot on the grid. + self.grid.plotRecord(roi) + print >>sys.stderr, ' Writing plot %d...' % self.plot_idx + self.writeGrid() # Write last grid. + self.createHtml(self.args.out_file) + return 0 + + def writeGrid(self): + """Register writing of grid.""" + if not self.grid: + return + # Append grid info. + file_name = 'thumbnail_%d.png' % self.plot_idx + file_name = os.path.join(self.args.out_dir, file_name) + self.plot_idx += 1 + self.grid_links.append(GridLinks(os.path.basename(file_name), self.grid.link_regions)) + + def createHtml(self, file_name): + print >>sys.stderr, 'Writing HTML to %s' % file_name + with open(file_name, 'wb') as f: + f.write('<html><body>\n') + f.write('<iframe name="empty" height="0" width="0" src="about:blank"></iframe>\n') + for gl in self.grid_links: + vals = (gl.file_name, gl.file_name, self.grid.canvas_width, self.grid.canvas_height) + f.write('<img src="%s" usemap="#%s" width="%d" height="%d" />\n' % vals) + f.write('<map name="%s">\n' % gl.file_name) + for lr in gl.link_regions: + locus = (lr.ref, lr.begin_pos + 1, lr.end_pos) + vals = {'x1': lr.x1, 'x2': lr.x2, 'y1': lr.y1, 'y2': lr.y2, + 'title': '%s %d-%d' % locus, + 'href': self.buildHref(lr.ref, lr.begin_pos, lr.end_pos), + 'onclick': ''} + # Add onclick handler to prevent opening of new window. + vals['target_attr'] = '' + if self.args.link_target: + vals['target_attr'] = ' target="%s"' % self.args.link_target + if self.args.link_type == 'local_igv': + vals['target_attr'] = ' target="empty"' + f.write(' <area shape="rect" coords="%(x1)d,%(y1)d,%(x2)d,%(y2)d" ' + 'alt="%(title)s" title="%(title)s" href="%(href)s"%(target_attr)s />\n' % vals) + f.write('</map>\n') + f.write('</body></html>\n') + + +def main(): + """Program entry point.""" + parser = argparse.ArgumentParser(description='Plot ROI file.') + ngs_roi.argparse.addFileArguments(parser) + ngs_roi.argparse.addPlotGridArguments(parser) + ngs_roi.argparse.addLinkArguments(parser) + args = parser.parse_args() + ngs_roi.argparse.applyFileDefaults(args) + + app = PlotThumbnailsApp(args) + return app.run() + + +if __name__ == '__main__': + sys.exit(main())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_plot_thumbnails.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,70 @@ +<?xml version="1.0"?> +<tool id="roi_plot_thumbnails" name="ROI Thumbnail Plots"> + <description>Create ROI thumbnail plot grids.</description> + <!-- First execute HTMl generation (will also create output path), then + perform PNG creation. --> + <command interpreter="python"> + roi_plot_thumbnails.py --in-file $input --out-file "$out_file" --out-dir "$out_file.files_path" + #if $max_rois # --max-rois $max_rois #end if + #if $num_rows # --num-rows $num_rows #end if + #if $num_cols # --num-cols $num_cols #end if + #if $plot_height # --plot-height $plot_height #end if + #if $plot_width # --plot-width $plot_width #end if + #if $link_target # --link-target $link_target #end if + #if $link_type # --link-type $link_type #end if + #if $igv_host # --igv-host $igv_host #end if + #if $igv_port # --igv-port $igv_port #end if + #if $ucsc_org # --ucsc-org $ucsc_org #end if + #if $ucsc_db # --ucsc-db $ucsc_db #end if + #if $ucsc_chr_prefix # --ucsc-chr-prefix "$ucsc_chr_prefix" #end if + + && + + roi_plot_grid -if $input -o "${out_file.files_path}/roi_plot_grid" + #if $max_rois # --max-rois $max_rois #end if + #if $num_rows # --num-rows $num_rows #end if + #if $num_cols # --num-cols $num_cols #end if + #if $plot_height # --plot-height $plot_height #end if + #if $plot_width # --plot-width $plot_width #end if + #if $max_value # --max-value $max_value #end if + </command> + <inputs> + <param name="input" format="roi" type="data" label="ROI file to generate statistics for."/> + <param name="max_rois" type="integer" label="Maximal total number of records to process. 0 for all." value="5000" /> + <param name="max_value" type="integer" label="Maximal value to plot. 0 for no limit." value="0" /> + <param name="num_rows" type="integer" label="Number of rows per grid." value="50" /> + <param name="num_cols" type="integer" label="Number of columns per grid." value="40" /> + <param name="plot_height" type="integer" label="Height of one plot in px." value="30" /> + <param name="plot_width" type="integer" label="Width of one plot in px." value="30" /> + + <param name="link_type" type="select" label="Link target."> + <option value="local_igv" label="Local IGV Instance" /> + <option value="ucsc" label="UCSC Genome Browser" /> + </param> + + <param name="link_target" type="select" label="Open links in."> + <option value="_blank" selected="true">new window</option> + <option value="_top">this window</option> + <option value="">this frame</option> + </param> + + <param name="igv_host" type="text" label="Host for the IGV link." value="localhost" /> + <param name="igv_port" type="integer" label="Port for the IGV link." value="60151" /> + + <param name="ucsc_org" type="text" label="UCSC Genome Browser org value." value="human" /> + <param name="ucsc_db" type="text" label="UCSC Genome Browser db value." value="hg18" /> + <param name="ucsc_chr_prefix" type="text" label="Prefix to add to contig names." value="" /> + </inputs> + <outputs> + <data name="out_file" format="html" label="${input.name} Plot Grid" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + + <help> + You don't need help! + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_report.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,128 @@ +#!/usr/bin/env python +"""Create ROI overview report. + +This report consists of plots of all metrics (y: metric, x: rank of value). +Each plot is written out as a PNG file and we also create one output HTML file +that shows all HTML files. + +Plotting is done using the fine matplotlib. +""" + +from __future__ import print_function + +__author__ = 'Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + + +import sys +import os +import os.path +import argparse + +import Cheetah.Template +import matplotlib.pyplot as plt + +import ngs_roi.io +import ngs_roi.app +import ngs_roi.argparse + + +# The HTML template to use for generating the HTML page. +REPORT_TPL = """ +<html> + <head> + <title>ROI Report</title> + </head> + <body> + <h1>ROI Report</h1> + <h2>Table of Contents</h2> + <ul> + #for figure in $figures + <li><a href="#$figure.slug">$figure.title</a></li> + #end for + </ul> + <h2>Plots</h2> + #for figure in $figures + <h3 id="$figure.slug">$figure.title</h3> + <img src="$figure.file_name" title="$figure.title" /> + #end for + </body> +</html> +""" + + +class ReportBuilder(ngs_roi.app.App): + """This class is used for building the report.""" + + def __init__(self, args): + self.args = args + self.in_file = self.args.in_file + self.out_file = self.args.out_file + self.out_dir = self.args.out_dir + self.prepareOutDir() + + def plotAndWrite(self, file_name, numbers, ylabel): + """Create plot of numbers and write as PNG. + + :param file_name:path to write plot to as image + :param numbers:list of numbers to plot + :param ylabel:label for y axis + """ + plt.figure() + plt.plot(numbers) + plt.ylabel(ylabel) + plt.savefig(file_name) + + def run(self): + # Load ROI. + print('Loading ROI...', file=sys.stderr) + records = ngs_roi.io.load(self.in_file) + keys = records[0].data_keys + # Create ROI plots. + print('Creating plots...', file=sys.stderr) + METRICS = [('start position', 'start_pos', lambda x: x.start_pos), + ('end position', 'end_pos', lambda x: x.end_pos), + ('region length', 'region_length', lambda x: x.region_length), + ('max count', 'max_count', lambda x: x.max_count)] + def getData(i): + def func(x): + try: + res = float(x.data[i]) + except ValueError: + res = 0 + return res + return func + for i, key in enumerate(keys): + slug = ''.join(x for x in key if x.isalnum()) + METRICS.append((key, slug, getData(i))) + figure_infos = [] + for title, slug, func in METRICS: + values = [func(x) for x in records] + file_name = 'report_%s.png' % slug + file_path = os.path.join(self.out_dir, file_name) + self.plotAndWrite(file_path, sorted(values), title) + figure_infos.append({'file_name': file_name, 'title': title, 'slug': slug}) + # Create report HTML. + name_space = {'figures': figure_infos} + t = Cheetah.Template.Template(REPORT_TPL, searchList=name_space) + with open(os.path.join(self.out_dir, 'index.html'), 'wb') as f: + f.write(str(t)) + with open(os.path.join(self.out_file), 'wb') as f: + f.write(str(t)) + +def main(): + """Program entry point.""" + + parser = argparse.ArgumentParser(description='Create ROI report.') + + ngs_roi.argparse.addFileArguments(parser) + args = parser.parse_args() + ngs_roi.argparse.applyFileDefaults(args) + + report_builder = ReportBuilder(args) + return report_builder.run() + + +if __name__ == '__main__': + sys.exit(main())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_report.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,36 @@ +<?xml version="1.0"?> +<tool id="roi_report" name="ROI Overview Report"> + <description>ROI Overview Report</description> + + <command interpreter="python"> + roi_report.py --in-file $input --out-file "$out_file" --out-dir "$out_file.files_path" + </command> + + <!-- + Input Files and Parameters + --> + <inputs> + <param name="input" format="roi" type="data" + label="ROI file to generate report for."/> + </inputs> + + <!-- + Output Files + --> + <outputs> + <data name="out_file" format="html" label="${input.name} Report" /> + </outputs> + + <!-- + Recognize errors by return code and not output to stderr. + --> + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + + <!-- + Tool Help + --> + <help>No help yet.</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_sort.sh Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,86 @@ +#!/bin/bash + +# Sorting of ROI files. +# +# USAGE: sort_roi.sh [OPTIONS] -i IN.roi -o OUT.roi +# +# Options: +# -r reverse orientation +# -p sort by position (ref, start, end) -- default +# -c COL sort by column COL +# -n COL sort by column COL, compare as number + +# Maximal number of header lines. +MAX_HEADER=1000 + +# The parameters that we will pass to sort. +SORT_POS_ARGS="-k 1,1 -k 2,2n -k 3,3n" +SORT_POS_ARGS_REV="-k 1,1r -k 2,2nr -k 3,3nr" +SORT_COL_ARGS="-k" + +# The arguments will go here. +SORT_BY=pos +SORT_COL=0 +REVERSE= + +# Parse option values. +while getopts "pc:i:o:n:r" opt; do + case $opt in + i) + IN_FILE=$OPTARG + ;; + o) + OUT_FILE=$OPTARG + ;; + p) + SORT_BY=pos + ;; + c) + SORT_BY=c + SORT_COL=$OPTARG + ;; + n) + SORT_BY=n + SORT_COL=$OPTARG + ;; + r) + REVERSE=r + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# Check that -i or -o are given. +if [[ "$IN_FILE" == "" || "$OUT_FILE" == "" ]]; then + echo "Missing option -i or -o" >&2 + exit 1 +fi + +# Setup sort args. +case $SORT_BY in + c) + SORT_ARGS="-k ${SORT_COL},${SORT_COL}${REVERSE}" + ;; + n) + SORT_ARGS="-k ${SORT_COL},${SORT_COL}n${REVERSE}" + ;; + pos) + if [[ "$REVERSE" == "r" ]]; then + SORT_ARGS=${SORT_POS_ARGS_REV} + else + SORT_ARGS=${SORT_POS_ARGS} + fi + ;; +esac + +# Execute sorting. +#echo "SORT_ARGS=${SORT_ARGS}" 1>&2 +( + head -n ${MAX_HEADER} ${IN_FILE} | grep '^#'; + sort ${SORT_ARGS} <(grep -v '^#' ${IN_FILE}); +) > ${OUT_FILE} + +exit $?
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_sort.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,48 @@ +<?xml version="1.0"?> +<tool id="roi_sort" name="ROI Sorting"> + <description>Sort ROI file</description> + + <command interpreter="bash"> + roi_sort.sh -i $input -o $output $reverse + #if str($by) == 'position' # -p #end if + #if str($by) == 'text_column' # -c $column #end if + #if str($by) == 'numeric_column' # -n $column #end if + </command> + + <!-- + Input Files and Parameters + --> + <inputs> + <param name="input" format="roi" type="data" label="ROI file to sort." /> + + <param name="by" type="select" label="Sort by"> + <option value="position">position</option> + <option value="text_column">column as text</option> + <option value="numeric_column">column as number</option> + </param> + + <param name="column" type="integer" label="Column to sort by" value="5" /> + + <param name="reverse" type="boolean" label="reverse" truevalue="-r" falsevalue="" /> + </inputs> + + <!-- + Output Files + --> + <outputs> + <data name="output" format="roi" label="${input.name}_sorted_${by}" /> + </outputs> + + <!-- + Recognize errors by return code and not output to stderr. + --> + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + + <!-- + Tool Help + --> + <help>No help yet.</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_table.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,176 @@ +#!/usr/bin/env python +"""ROI Table Generator + +Generates a HTML page with a table of ROI record details. Besides showing the +numeric ROI information, it also gives little roi plots in a column. + +For the little ROI plots, it calls the program roi_plot_thumbnails that has to +be in the PATH. +""" + +__author__ = 'Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + + +# TODO(holtgrew): Actually call roi_plot_thumbnails +# TODO(holtgrew): from __future__ use print_function + + +import argparse +import math +import os.path +import sys + +import Cheetah.Template + +import ngs_roi.app +import ngs_roi.argparse +import ngs_roi.io + + +# Main template. +PAGE_TPL = """ +<html> + <head><title>ROI Table</title></head> + <body> + <h1>ROI Table</h1> + $table + </body> +</html> +""" + +# Template for a table. +TABLE_TPL = """ +<table border="1"> + <tr> + <th>plot</th> + <th>chr</th> + <th>start</th> + <th>end</th> + <th>name</th> + <th>length</th> + <th>strand</th> + <th>max_count</th> + #for i, key in enumerate($data_keys) + <th>$key</th> + #end for + </tr> + #for id, roi in enumerate($records) + <tr> + <td><div style="width:${args.plot_width}; margin:2px; height:${args.plot_height+1}; background:url(thumbnail_${imgId($id)}.png) -${imgX($id)} -${imgY($id)};"></div></td> + <td>$roi.ref</td> + <td style="text-align:right;">$fmtPos($roi.start_pos + 1)</td> + <td style="text-align:right;">$fmtPos($roi.end_pos)</td> + <td><a href="$href($roi)">$roi.region_name</a></td> + <td style="text-align:right;">$fmtPos($roi.region_length)</td> + <td style="text-align:center;">$roi.strand</td> + <td style="text-align:right;">$roi.max_count</td> + #for i, key in enumerate($data_keys) + <td>$roi.data[$i]</td> + #end for + </tr> + #end for +</table> +""" + + +class RoiTable(object): + """A table of ROI records with small plots.""" + + def __init__(self, args, keys, records, app): + self.args = args + self.keys = keys + self.records = records + self.app = app + + def tplFuncs(self): + def intWithCommas(x): + if type(x) not in [type(0), type(0L)]: + raise TypeError("Parameter must be an integer.") + if x < 0: + return '-' + intWithCommas(-x) + result = '' + while x >= 1000: + x, r = divmod(x, 1000) + result = ",%03d%s" % (r, result) + return "%d%s" % (x, result) + + def imgId(idx): + """Image id from roi record id.""" + return idx / (self.args.num_rows * self.args.num_cols) + + def imgX(idx): + """x position in image from record id.""" + x = idx % self.args.num_cols + res = x * self.args.plot_width + if x > 0: + res += (x - 1) * 2 + return res + + def imgY(idx): + """y position in image from record id.""" + y = idx / self.args.num_cols + res = y * self.args.plot_height + res += y * 2 + return res + + return {'fmtPos': intWithCommas, 'imgId': imgId, 'imgX': imgX, 'imgY': imgY} + + def render(self): + """Returns string with rendered table.""" + vals = {'data_keys': self.keys, 'records': self.records, 'args': self.args, + 'href': lambda x:self.app.buildHref(x.ref, x.start_pos, x.end_pos)} + vals.update(self.tplFuncs()) + t = Cheetah.Template.Template(TABLE_TPL, searchList=vals) + return str(t) + + +class TableApp(ngs_roi.app.App): + def __init__(self, args): + # Call parent's constructor and create output directory. + ngs_roi.app.App.__init__(self, args) + self.prepareOutDir() + + def run(self): + # Load ROI records. + print >>sys.stderr, 'Loading ROI' + records = ngs_roi.io.load(self.args.in_file, self.args.max_rois) + keys = [] + if records: + keys = records[0].data_keys + + # Create plots. + print >>sys.stderr, 'Creating plots...' + runner = ngs_roi.app.PlotThumbnailsRunner(self.args) + runner.run() + + # Create table. + print >>sys.stderr, 'Creating table...' + self.createHtml(self.args.out_file, keys, records) + return 0 + + def createHtml(self, file_name, keys, records): + print >>sys.stderr, 'Writing %s' % self.args.out_file + vals = {'table': RoiTable(self.args, keys, records, self).render()} + t = Cheetah.Template.Template(PAGE_TPL, searchList=vals) + with open(self.args.out_file, 'wb') as f: + f.write(str(t)) + + +def main(): + parser = argparse.ArgumentParser(description='Plot ROI file.') + + ngs_roi.argparse.addFileArguments(parser) + ngs_roi.argparse.addPlotGridArguments(parser, default_plot_height=60, + default_plot_width=90) + ngs_roi.argparse.addLinkArguments(parser) + args = parser.parse_args() + ngs_roi.argparse.applyFileDefaults(args) + + app = TableApp(args) + return app.run() + + +if __name__ == '__main__': + sys.exit(main())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_table.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,74 @@ +<?xml version="1.0"?> +<tool id="roi_table" name="ROI Table"> + <description>ROI Overview Report</description> + + <command interpreter="python"> + roi_table.py --in-file $input --out-file "$out_file" --out-dir "$out_file.files_path" + #if $max_rois # --max-rois $max_rois #end if + #if $plot_height # --plot-height $plot_height #end if + #if $plot_width # --plot-width $plot_width #end if + #if $link_target # --link-target $link_target #end if + #if $link_type # --link-type $link_type #end if + #if $igv_host # --igv-host $igv_host #end if + #if $igv_port # --igv-port $igv_port #end if + #if $ucsc_org # --ucsc-org $ucsc_org #end if + #if $ucsc_db # --ucsc-db $ucsc_db #end if + #if $ucsc_chr_prefix # --ucsc-chr-prefix "$ucsc_chr_prefix" #end if + </command> + + <!-- + Input Files and Parameters + --> + <inputs> + <param name="input" format="roi" type="data" + label="ROI file to generate table for."/> + + <param name="max_rois" type="integer" value="5000" + label="Maximal total number of records to process. 0 for all." /> + <param name="max_value" type="integer" value="0" + label="Maximal value to plot. 0 for no limit." /> + + <param name="plot_height" type="integer" value="60" + label="Height of one plot in px." /> + <param name="plot_width" type="integer" value="60" + label="Width of one plot in px." /> + + <param name="link_type" type="select" label="Link target."> + <option value="local_igv" label="Local IGV Instance" /> + <option value="ucsc" label="UCSC Genome Browser" /> + </param> + + <param name="link_target" type="select" label="Open links in."> + <option value="_blank" selected="true">new window</option> + <option value="_top">this window</option> + <option value="">this frame</option> + </param> + + <param name="igv_host" type="text" label="Host for the IGV link." value="localhost" /> + <param name="igv_port" type="integer" label="Port for the IGV link." value="60151" /> + + <param name="ucsc_org" type="text" label="UCSC Genome Browser org value." value="human" /> + <param name="ucsc_db" type="text" label="UCSC Genome Browser db value." value="hg18" /> + <param name="ucsc_chr_prefix" type="text" label="Prefix to add to contig names." value="" /> + </inputs> + + <!-- + Output Files + --> + <outputs> + <data name="out_file" format="html" label="${input.name} Table" /> + </outputs> + + <!-- + Recognize errors by return code and not output to stderr. + --> + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + + <!-- + Tool Help + --> + <help>No help yet.</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rois.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +class RoiRecord(object): + """Represent one record in a ROI file.""" + + def __init__(self, ref, start_pos, end_pos, region_name, region_length, + strand, max_count, data, points): + """Initialize RoiRecord.""" + self.ref = ref + self.start_pos = start_pos + self.end_pos = end_pos + self.strand = strand + self.region_length = region_length + self.region_name = region_name + self.max_count = max_count + self.data = data + self.points = points + + def __str__(self): + return 'RoiRecord(%s, %s, %s, %s, %s, %s, %s, %s, len([...])==%s)' % \ + (repr(self.ref), self.start_pos, self.end_pos, + self.region_name, self.region_length, repr(self.strand), + self.max_count, self.data, len(self.points)) + def __repr__(self): + return self.__str__() + + +def loadRoi(path, max_count=0): + """Load ROI file and return it as a list of RoiRecord objects. + + NA values are translated to 0. + """ + data_keys = [] + result = [] + i = 0 + with open(path, 'rb') as f: + for line in f: + if line.startswith('##'): + data_keys = line[2:].split('\t')[7:-1] + if line[0] == '#': + continue + if max_count > 0 and i >= max_count: + break + i += 1 + vals = line.split() + region_length = int(vals[4]) + data = vals[7:-1] + points = [int(x) for x in vals[-1].split(',')] + r = RoiRecord(vals[0], int(vals[1]) - 1, int(vals[2]), vals[3], + region_length, vals[5], int(vals[6]), data, points) + result.append(r) + #print ' => Loaded %d records.' % len(result) + return data_keys, result