# HG changeset patch # User holtgrewe # Date 1366286618 14400 # Node ID 61d9bdb6d5195738557c70ac3b050a4addf62288 Uploaded diff -r 000000000000 -r 61d9bdb6d519 LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,30 @@ +Copyright (c) 2013, Bernd Jagla , + Manuel Holtgrewe + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + Neither the name of the nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff -r 000000000000 -r 61d9bdb6d519 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,26 @@ +Dependencies +============ + + * ngsroi R package (shipped with tar.gz) + * Python Cheetah package. + * ngs_roi binaries: + + svn co http://svn.seqan.de/seqan/trunk seqan-trunk + pushd seqan-trunk/sandbox + svn co https://projets.pasteur.fr/.../jagla jagla + popd + mkdir -p seqan-trunk-build/release && cd seqan-trunk-build/release + cmake ../../seqan-trunk + pushd sandbox/jagla/apps/ngs_roi + make + popd + ls bin/* + +Notes +===== + +You have to allow full HTML output for the tool to work properly. + +Edit your univers_wsgi.ini to include the line + + sanitize_all_html = False \ No newline at end of file diff -r 000000000000 -r 61d9bdb6d519 bam2roi.ctd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam2roi.ctd Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,45 @@ + + + Bam2roi + bam2roi + 0.1 + Create ROI from BAM file. + Calculated consecutive regions of coverage from alignment file IN.bam and write regions of interst to file OUT.roi. Counting is performed over the entire region (including intron and N-regions) based on the CIGAR string of the alignment record. + + http://www.seqan.de + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 61d9bdb6d519 bam2roi.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam2roi.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,59 @@ + + + + bam2roi + $bam2roi_verbosity + $bam2roi_strand_specific + $bam2roi_ignore_pairing + $bam2roi_link_over_skipped + --input-file $bam2roi_input_file + --output-file $bam2roi_output_file + + + Create ROI from BAM file. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + No help yet. + diff -r 000000000000 -r 61d9bdb6d519 ctd2galaxy.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ctd2galaxy.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,527 @@ +#!/usr/bin/env python +"""Conversion of the CTD format into Galaxy XML. + +The CTD parser should be reusable but is not in its own module since it is +only used here at the moment. +""" + +import argparse +import operator +import sys +import xml.sax +import xml.sax.saxutils + +class CTDFormatException(Exception): + """Raised when there is a format error in CTD.""" + + +class CLIElement(object): + """Represents a tag. + + :ivar option_identifier: with parameters (e.g. --param), empty if argument. + :type option_identifier: str + :ivar is_list: whether the element is a list. + :type is_list: bool + :ivar param_node: link to ParametersNode, set after parsing, None if unset + :ivar is_list: w or not this element is a list. + :type is_list: bool + """ + + def __init__(self, option_identifier='', mapping_path='', is_list=False): + """Initialize object.""" + self.option_identifier = option_identifier + self.param_node = None # Link to ParametersNode, set after parsing. + self.mapping_path = mapping_path + self.is_list = is_list + + def __str__(self): + """String representaiton of CLIElement.""" + t = (self.option_identifier, self.mapping_path, self.is_list) + return 'CLIElement(%s, %s, %s)' % tuple(map(repr, list(t))) + + +class ParametersNode(object): + """Represents a tag inside the tags. + + :ivar name: name attribute of the node + :ivar description: text for description attribute of the node + :ivar value: value attribute of the node + :ivar type_: type attribute of the node + :ivar tags: tags attribute of the node + :ivar supported_formats: supported_format attribute of the node + :ivar restrictions: restrictions attribute of the node + :ivar path: the path to the node + :ivar path: list of strings + :ivar parent: link to the parent of the node + :ivar children: children of the node + :type children: dict with name to node mapping + :ivar cli_element: CLIElement that this parameter is mapped to. + """ + + def __init__(self, kind='', name='', description='', value='', type_='', tags='', + restrictions='', supported_formats=''): + """Initialize the object.""" + self.kind = kind + self.name = name + self.description = description + self.value = value + self.type_ = type_ + self.tags = tags + self.supported_formats = supported_formats + self.restrictions = restrictions + self.path = None # root if is None + self.parent = None # not set, usually a list + self.children = {} + self.cli_element = None + + def computePath(self, is_root=True, path=[]): + """Compute path entry from parent links. + + :param is_root: whether or not this is the root node + :type is_root: bool + :param path: path to this node, excluding root + :type path: list of strings + """ + self.path = list(path) + if not is_root: + self.path.append(self.name) + if not self.children: + return # nothing to do: early exit. + for name, child in self.children.items(): + child.computePath(False, self.path) + + def applyFunc(self, f): + """Apply f to self and all children.""" + f(self) + for c in self.children.values(): + c.applyFunc(f) + + def find(self, path): + """Return ParametersNode object at the path below the node.""" + if not path: + return self + if not self.children.get(path[0]): + return None + return self.children[path[0]].find(path[1:]) + + def __str__(self): + """Return string representation.""" + t = (self.name, self.description, self.value, self.type_, self.tags, + self.supported_formats, self.children, self.path) + return 'ParametersNode(%s, %s, %s, %s, %s, %s, %s, path=%s)' % tuple(map(repr, t)) + + def __repr__(self): + """Return programmatic representation, same as __str__().""" + return str(self) + + +class Tool(object): + """Represents the top-level tag from a CTD file. + + :ivar name: name attribute value + :type name: str + :ivar executable_name: executableName attribute value + :type executable_name: str + :ivar version: version attribute value + :type version: str + :ivar description: description attribute value + :type description: str + :ivar manual: manual attribute value + :type manual: str + :ivar doc_url: docurl attribute value + :type doc_url: str + :ivar category: category attribute value + :type category: str + :ivar cli_elements: list of CLIElement objects + :ivar parameters: root parameters node + :type parameters: ParametersNode + """ + + def __init__(self, name='', executable_name='', version='', + description='', manual='', doc_url='', + category=''): + self.name = name + self.executable_name = executable_name + self.version = version + self.description = description + self.manual = manual + self.doc_url = doc_url + self.category = category + self.cli_elements = [] + self.parameters = None + + def parsingDone(self): + """Called after parsing is done. + + The method will compute the paths of the parameter nodes and link the + CLIElement objects in self.cli_elements to the ParameterNode objects. + """ + self.parameters.computePath() + for ce in self.cli_elements: + if not ce.option_identifier: + continue # Skip arguments + path = ce.mapping_path.split('.') + node = self.parameters.find(path) + if not node: + raise CTDFormatException('Unknown parameter %s' % '.'.join(path)) + ce.param_node = node + node.cli_element = ce + + def __str__(self): + t = (self.name, self.executable_name, self.version, self.description, + self.manual, self.doc_url, self.category) + return 'Tool(%s, %s, %s, %s, %s, %s, %s)' % tuple(map(repr, list(t))) + + + +class CTDHandler(xml.sax.handler.ContentHandler): + def __init__(self): + self.result = None + # A stack of tag names that are currently open. + self.stack = [] + # The current parameter to append nodes below. + self.parameter_node = None + + def startElement(self, name, attrs): + """Handle start of element.""" + # Maintain a stack of open tags. + self.stack.append(name) + # Handle the individual cases. The innermost tag is self.stack[-1]. + if self.stack == ['tool']: + # Create the top level Tool object. + self.tool = Tool() + self.result = self.tool + elif self.stack == ['tool', 'cli', 'clielement']: + # Create a new CLIElement object for a tag. + if not attrs.get('isList'): + raise CTDFormatException('No attribute isList in .') + if attrs.get('optionIdentifier') is None: + raise CTDFormatException('no attribute optionIdentifier in .') + is_list = (attrs.get('isList') == 'false') + option_identifier = attrs.get('optionIdentifier') + self.tool.cli_elements.append(CLIElement(option_identifier=option_identifier, is_list=is_list)) + elif self.stack == ['tool', 'cli', 'clielement', 'mapping']: + # Handle a sub entry of a tag. + if not attrs.get('referenceName'): + raise CTDFormatException('no attribute referenceName in ') + self.tool.cli_elements[-1].mapping_path = attrs['referenceName'] + elif self.stack == ['tool', 'PARAMETERS']: + # Handle the entry by creating a new top parameters node. + self.tool.parameters = ParametersNode(kind='node', name='') + self.parameter_node = self.tool.parameters + elif self.stack[:2] == ['tool', 'PARAMETERS'] and self.stack[-1] == 'NODE': + # Create a new node ParametersNode for the entry. + if not attrs.get('name'): + raise CTDFormatException('no attribute name in ') + name = attrs.get('name') + node = ParametersNode(kind='node', name=name) + node.parent = self.parameter_node + self.parameter_node.children[name] = node + self.parameter_node = node + elif self.stack[:2] == ['tool', 'PARAMETERS'] and self.stack[-1] == 'ITEM': + # Create a new item ParametersNode for the entry. + if not attrs.get('name'): + raise CTDFormatException('no attribute name in ') + name = attrs.get('name') + value = attrs.get('value') + type_ = attrs.get('type') + tags = attrs.get('tags') + description = attrs.get('description') + restrictions = attrs.get('restrictions') + supported_formats = attrs.get('supported_formats') + child = ParametersNode( + kind='item', name=name, description=description, value=value, + type_=type_, tags=tags, supported_formats=supported_formats, + restrictions=restrictions) + self.parameter_node.children[name] = child + + def endElement(self, name): + """Handle closing tag.""" + # Maintain stack. + self.stack.pop() + # Go up one node in the parameters tree if + if name == 'NODE': + self.parameter_node = self.parameter_node.parent + + def characters(self, content): + """Handle characters in XML file.""" + if self.stack == ['tool', 'name']: + self.tool.name += content + elif self.stack == ['tool', 'executableName']: + self.tool.executable_name += content + elif self.stack == ['tool', 'version']: + self.tool.version += content + elif self.stack == ['tool', 'description']: + self.tool.description += content + elif self.stack == ['tool', 'manual']: + self.tool.manual += content + elif self.stack == ['tool', 'docurl']: + self.tool.doc_url += content + elif self.stack == ['tool', 'category']: + self.tool.category += content + + +class CTDParser(object): + """Parser for CTD files.""" + + def __init__(self): + self.handler = CTDHandler() + + def parse(self, path): + # Parse XML into Tool object. + parser = xml.sax.make_parser() + parser.setContentHandler(self.handler) + parser.parse(path) + # Compute paths for tool's parameters. + self.handler.result.parsingDone() + return self.handler.result + + +class XMLWriter(object): + """Base class for XML writers. + + + :ivar result: list of strings that are joined for the final XML + :ivar indent_level: int with the indentation level + """ + + def __init__(self): + self.result = [] + self.indent_level = 0 + + def indent(self): + """Return indentation whitespace.""" + return ' ' * self.indent_level + + def appendTag(self, tag, text='', args={}): + """Append a tag to self.result with text content only or no content at all.""" + e = xml.sax.saxutils.quoteattr + args_str = ' '.join('%s=%s' % (key, e(str(value))) for key, value in args.items()) + if args_str: + args_str = ' '+ args_str + vals = {'indent': self.indent(), + 'tag': tag, + 'text': text.strip(), + 'args': args_str} + if text: + self.result.append('%(indent)s<%(tag)s%(args)s>%(text)s\n' % vals) + else: + self.result.append('%(indent)s<%(tag)s%(args)s />\n' % vals) + + def openTag(self, tag, args={}): + """Append an opening tag to self.result.""" + e = xml.sax.saxutils.quoteattr + args_str = ' '.join('%s=%s' % (key, e(str(value))) for key, value in args.items()) + if args_str: + args_str = ' ' + args_str + vals = {'indent': self.indent(), + 'tag': tag, + 'args': args_str} + self.result.append('%(indent)s<%(tag)s%(args)s>\n' % vals) + + def closeTag(self, tag): + """Append a closing tag to self.result.""" + vals = {'indent': self.indent(), 'tag': tag} + self.result.append('%(indent)s\n' % vals) + + def handleParameters(self, node): + """Recursion for appending tags for ParametersNode.""" + for pn in node.children.values(): + if pn.kind == 'item': + args = {'name': pn.name, + 'value': pn.value, + 'type': pn.type_, + 'description': pn.description, + 'restrictions': pn.restrictions, + 'tags': pn.tags} + self.appendTag('ITEM', args=args) + else: # node.kind == 'node' + args = {'name': pn.name, + 'description': pn.description} + self.openTag('NODE', args=args) + self.indent_level += 1 + self.handleParameters(pn) + self.indent_level -= 1 + self.closeTag('NODE') + + +class CTDWriter(XMLWriter): + """Write a Tool to CTD format.""" + + def run(self, tool, f): + """Write the given Tool to file f.""" + self.result.append('\n') + self.openTag('tool') + self.indent_level += 1 + self.appendTag('name', tool.name) + self.appendTag('executableName', tool.executable_name) + self.appendTag('version', tool.version) + self.appendTag('description', tool.description) + self.appendTag('manual', tool.manual) + self.appendTag('docurl', tool.doc_url) + self.appendTag('category', tool.category) + # and group + self.openTag('cli') + self.indent_level += 1 + for ce in tool.cli_elements: + self.openTag('clielement', args={'optionIdentifier': ce.option_identifier, + 'isList': {True: 'true', False: 'false'}[ce.is_list]}) + self.indent_level += 1 + self.appendTag('mapping', args={'referenceName': ce.mapping_path}) + self.indent_level -= 1 + self.closeTag('clielement') + self.indent_level -= 1 + self.closeTag('cli') + # , , group + self.openTag('PARAMETERS', args={'version': 1.4, + 'xsi:noNamespaceSchemaLocation': 'http://open-ms.sourceforge.net/schemas/Param_1_4.xsd', + 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance'}) + self.indent_level += 1 + self.handleParameters(tool.parameters) + self.indent_level -= 1 + self.closeTag('PARAMETERS') + self.indent_level -= 1 + self.closeTag('tool') + # Write result + for x in self.result: + f.write(x) + + +class GalaxyWriter(XMLWriter): + """Write a Tool to the Galaxy format.""" + + def run(self, tool, f): + """Write the given Tool to file f.""" + self.result.append('\n') + self.openTag('tool', {'id': tool.executable_name, 'name': tool.name}) + self.indent_level += 1 + self.addCommandTag(tool) + self.appendTag('description', text=tool.description) + self.openTag('inputs') + self.indent_level += 1 + tool.parameters.applyFunc(lambda x: self.addInputParam(x)) + self.indent_level -= 1 + self.closeTag('inputs') + self.openTag('outputs') + self.indent_level += 1 + tool.parameters.applyFunc(lambda x: self.addOutputParam(x)) + self.indent_level -= 1 + self.closeTag('outputs') + self.openTag('stdio') + self.indent_level += 1 + self.appendTag('exit_code', args={'range': '1:', 'level': 'fatal'}) + self.appendTag('exit_code', args={'range': ':-1', 'level': 'fatal'}) + self.indent_level -= 1 + self.closeTag('stdio') + self.indent_level -= 1 + self.closeTag('tool') + # Write result + for x in self.result: + f.write(x) + + def addInputParam(self, param_node): + """Add a ParametersNode object if it is to go to .""" + if param_node.tags and 'output file' in param_node.tags.split(','): + return # Skip output files + if param_node.kind != 'item': + return # Skip if not item. + args = {} + if param_node.tags and 'input file' in param_node.tags.split(','): + args['type'] = 'data' + args['format'] = ','.join([x.replace('*', '').replace('.', '') + for x in param_node.supported_formats.split(',')]) + args['name'] = '_'.join(param_node.path).replace('-', '_').replace('.', '_') + args['label'] = param_node.description + args['type'] = 'data' + self.appendTag('param', args=args) + else: + TYPE_MAP = { + 'string': 'text', + 'double': 'float', + 'int': 'integer' + } + args['type'] = TYPE_MAP[param_node.type_] + args['name'] = '_'.join(param_node.path).replace('-', '_').replace('.', '_') + args['label'] = param_node.description + if param_node.type_ == 'string' and param_node.restrictions and \ + sorted(param_node.restrictions.split(',')) == ['false', 'true']: + args['type'] = 'boolean' + if param_node.value == 'true': + args['checked'] = 'true' + args['truevalue'] = param_node.cli_element.option_identifier + args['falsevalue'] = '' + self.appendTag('param', args=args) + return + args['value'] = param_node.value + if param_node.type_ == 'string' and param_node.restrictions: + args['type'] = 'select' + self.openTag('param', args=args) + self.indent_level += 1 + for v in param_node.restrictions.split(','): + self.appendTag('option', v, {'value': v}) + self.indent_level -= 1 + self.closeTag('param') + else: + self.appendTag('param', args=args) + + def addOutputParam(self, param_node): + """Add a ParametersNode object if it is to go to .""" + if not param_node.tags or not 'output file' in param_node.tags.split(','): + return # Only add for output files. + args = {} + if '.' in param_node.supported_formats: + args['format'] = param_node.supported_formats.split(',')[0].split('.')[-1] + else: + args['format'] = param_node.supported_formats.split(',')[0].split('*')[-1] + args['name'] = '_'.join(param_node.path).replace('-', '_').replace('.', '_') + args['label'] = param_node.description + self.appendTag('data', args=args) + + def addCommandTag(self, tool): + """Write tag to self.result.""" + lst = [] + for ce in tool.cli_elements: + bool_param = False + if ce.param_node.type_ == 'string' and ce.param_node.restrictions and \ + sorted(ce.param_node.restrictions.split(',')) == ['false', 'true']: + bool_param = True + if not bool_param and ce.option_identifier: + lst.append(ce.option_identifier) + # The path mapping is not ideal but should work OK. + lst.append('$' + ce.mapping_path.replace('-', '_').replace('.', '_')) + txt = [tool.executable_name] + lst + self.appendTag('command', text=' '.join(txt)) + + +def main(): + """Main function.""" + # Setup argument parser. + parser = argparse.ArgumentParser(description='Convert CTD to Galaxy XML') + parser.add_argument('-i', '--in-file', metavar='FILE', + help='CTD file to read.', dest='in_file', + required=True) + parser.add_argument('-o', '--out-file', metavar='FILE', + help='File to write. Output type depends on extension.', + dest='out_file', required=True) + + args = parser.parse_args() + + # Parse input. + sys.stderr.write('Parsing %s...\n' % args.in_file) + ctd_parser = CTDParser() + tool = ctd_parser.parse(args.in_file) + + # Write output. + sys.stderr.write('Writing to %s...\n' % args.out_file) + if args.out_file.endswith('.ctd'): + writer = CTDWriter() + else: + writer = GalaxyWriter() + with open(args.out_file, 'wb') as f: + writer.run(tool, f) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff -r 000000000000 -r 61d9bdb6d519 datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,10 @@ + + + + + + + + diff -r 000000000000 -r 61d9bdb6d519 ngs_roi/__init__.py diff -r 000000000000 -r 61d9bdb6d519 ngs_roi/app.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ngs_roi/app.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,64 @@ +#!/usr/bin/env python +"""Support code for writing the NGS ROI report generation apps.""" + +__author__ = 'Manuel Holtgrewe ' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + + +import os +import os.path +import subprocess + + +class App(object): + """Base class with helper functionality for NGS ROI report generators.""" + + def __init__(self, args): + self.args = args + + def prepareOutDir(self): + """Create output directory if necessary.""" + if not os.path.exists(self.args.out_dir): + os.makedirs(self.args.out_dir) + + def buildHref(self, ref, begin_pos, end_pos): + vals = {'ref': ref, 'start_pos': begin_pos + 1, 'end_pos': end_pos} + if self.args.link_type == 'local_igv': + vals['host'] = self.args.igv_host + vals['port'] = self.args.igv_port + return 'http://%(host)s:%(port)d/goto?locus=%(ref)s:%(start_pos)d-%(end_pos)d' % vals + else: # self.args.link_type == 'ucsc' + vals['org'] = self.args.ucsc_org + vals['db'] = self.args.ucsc_db + vals['ref'] = self.args.ucsc_chr_prefix + vals['ref'] + return 'http://genome.ucsc.edu/cgi-bin/hgTracks?org=%(org)s&db=%(db)s&position=%(ref)s:%(start_pos)d-%(end_pos)d' % vals + + +class PlotThumbnailsRunner(object): + """Helper class for running the roi_plot_thumbnails program. + + :ivar args: Arguments as returned by argparse. + """ + + def __init__(self, args): + self.args = args + + def run(self): + cmd_args = ['-if', self.args.in_file, + '-o', os.path.join(self.args.out_dir, 'thumbnail_'), + '--max-rois', self.args.max_rois, + '--max-value', self.args.max_value, + '--num-cols', self.args.num_cols, + '--num-rows', self.args.num_rows, + '--plot-height', self.args.plot_height, + '--plot-width', self.args.plot_width, + '--border-width', self.args.border_width, + '--spacing', self.args.spacing] + cmd_args = ['roi_plot_thumbnails'] + map(str, cmd_args) + #import pdb; pdb.set_trace() + p = subprocess.Popen(cmd_args, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + res = p.wait() + if res: + print 'ERROR', p.stdin, p.stderr + return res diff -r 000000000000 -r 61d9bdb6d519 ngs_roi/argparse.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ngs_roi/argparse.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,99 @@ +#!/usr/bin/env python +"""Helper for ROI tools when using argparse module. + +This module contains helper functions for setup of argparse.ArgumentParser +that is common to multiple report generating apps. +""" + +__author__ = 'Manuel Holtgrewe ' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + + +import os.path + + +def addFileArguments(parser): + """Adds --in-file, --out-file, and --out-dir to argument parser. + + These parameters are used for the input ROI file, the output HTML file and + the directory to write supplementary files to. + """ + group = parser.add_argument_group('Input / Output', + 'Input and output related parameters.') + group.add_argument('--in-file', metavar='ROI', dest='in_file', + required=True, help='ROI file to read') + group.add_argument('--out-file', dest='out_file', required=True, + help='path to output HTML file') + group.add_argument('--out-dir', dest='out_dir', + help='directory to write supplementary files to; ' + 'defaults to path of --out-file.') + + +def applyFileDefaults(args): + """Apply file-related default values (copying paths if not set).""" + if not args.out_dir: + args.out_dir = os.path.dirname(args.out_file) or '.' + + +def addPlotGridArguments(parser, default_plot_height=30, default_plot_width=30): + """Adds arguments related to plot grids. + + This is used for the thumbnail plotting but also for the tables. + """ + group = parser.add_argument_group('Plot Grid Configuration', + 'Arguments for the plot image grid.') + + group.add_argument('--max-rois', dest='max_rois', metavar='NUM', + type=int, default=0, + help='Maximal number of ROIs, 0 for all.') + group.add_argument('--max-value', dest='max_value', metavar='NUM', + type=int, default=0, + help='Largest y value to plot, 0 for all.') + + group.add_argument('--num-rows', dest='num_rows', metavar='ROWS', + type=int, default=50, + help='Number of rows per grid.') + group.add_argument('--num-cols', dest='num_cols', metavar='COLS', + type=int, default=40, + help='Number of columns per grid.') + + group.add_argument('--plot-height', dest='plot_height', metavar='HEIGHT', + type=int, default=default_plot_height, help='Height of one plot in px.') + group.add_argument('--plot-width', dest='plot_width', metavar='WIDTH', + type=int, default=default_plot_width, help='Width of one plot in px.') + + group.add_argument('--border-width', dest='border_width', metavar='WIDTH', + type=int, default=0, help='Border width.') + group.add_argument('--spacing', dest='spacing', metavar='SPACING', + type=int, default=2, help='Spacing.') + + +def addLinkArguments(parser): + """Adds arguments related to link creation. + + These parameters control the created links to local IGV browser HTTP + remote control or the UCSC genome browser. + """ + group = parser.add_argument_group('HTML Links', 'Arguments for HTML link creation.') + + group.add_argument('--link-target', dest='link_target', metavar='TARGET', + default='_blank', choices=['_blank', '_top'], + help='Select the link target to create (_blank or _top).') + + group.add_argument('--link-type', dest='link_type', metavar='TARGET', + default='local_igv', choices=['local_igv', 'ucsc'], + help='Select the type of links to create. One of ' + '"local_igv" and "ucsc".') + + group.add_argument('--igv-host', dest='igv_host', metavar='HOST', + default='localhost', help='Host for IGV link.') + group.add_argument('--igv-port', dest='igv_port', metavar='PORT', + type=int, default='60151', help='Port for IGV link.') + + group.add_argument('--ucsc-org', dest='ucsc_org', metavar='ORG', + default='human', help='Organism for UCSC browser link.') + group.add_argument('--ucsc-db', dest='ucsc_db', metavar='DB', + default='hg18', help='Assembly version for UCSC browser link.') + group.add_argument('--ucsc-chr-prefix', dest='ucsc_chr_prefix', metavar='PREFIX', + default='', help='Prefix for chromosome names in UCSC browser.') diff -r 000000000000 -r 61d9bdb6d519 ngs_roi/io.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ngs_roi/io.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,140 @@ +#!/usr/bin/env python +"""Input / Output Routines for ROI files. + +You can load the records sequentially one by one by iterating over a +RoiFile object: + +f1 = ngs_roi.io.RoiFile('in.roi') +for r in f1: + print r.ref, r.start_pos, r.end_pos + +f2 = ngs_roi.io.RoiFile('in.roi') +print f1.next().ref +print f1.next().ref +print f1.next().ref + +Alternatively, you can load all or a subset of the records: + +ten_records = ngs_roi.io.load('in.roi', 10) # first ten +all_records = ngs_roi.io.load('in.roi') +""" + +__author__ = 'Manuel Holtgrewe ' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + + +class RoiRecord(object): + """Represent one record in a ROI file. + + :ivar ref:name of reference + :type ref:str + :ivar start_pos:0-based start position + :type start_pos:int + :ivar end_pos:0-based end position + :type end_pos:int + :ivar region_name:name of the region + :type region_name:str + :ivar region_length:length of the region + :type region_length:int + :ivar strand:strand of the region, one of ['+', '-'] + :type strand:str + :ivar max_count:highest coverage + :type max_count:int + :ivar data:values of the extended data fields + :type data:list of str + :ivar points:the coverage over the region length + :type points:list of ints + + :ivar data_keys:list with key names for the data fields + :type data_keys:list of str or None + """ + + def __init__(self, ref, start_pos, end_pos, region_name, region_length, + strand, max_count, data, points, data_keys=None): + """Initialize RoiRecord.""" + self.ref = ref + self.start_pos = start_pos + self.end_pos = end_pos + self.strand = strand + self.region_length = region_length + self.region_name = region_name + self.max_count = max_count + self.data = data + self.points = points + self.data_keys = data_keys + + def __str__(self): + return 'RoiRecord(%s, %s, %s, %s, %s, %s, %s, %s, len([...])==%s, %s)' % \ + (repr(self.ref), self.start_pos, self.end_pos, + self.region_name, self.region_length, repr(self.strand), + self.max_count, self.data, len(self.points), self.data_keys) + def __repr__(self): + return self.__str__() + + +class RoiFile(object): + """File of ROI records. + + Can be used as an iterator. + + :ivar data_keys:Keys of additional data + :type data_keys:list of str + """ + + def __init__(self, path): + """Open ROI file and load header.""" + self.path = path + self.f = open(path, 'rb') + self.line = None + self.data_keys = self._loadKeys(path) + + def _loadKeys(self, path): + """Load keys from header, skipping comments.""" + while True: + self.line = self.f.readline() + if self.line == '': # EOF + return None + if self.line.startswith('##'): + keys = self.line[1:].strip().split('\t') + self.line = self.f.readline() + return keys[7:-1] + if self.line.startswith('#'): + continue # comment + break + + def __iter__(self): + """Return iterator (self).""" + return self + + def next(self): + """Return next record.""" + if self.line == '': # EOF + raise StopIteration + l = self.line + self.line = self.f.readline() + return self._buildRecord(l) + + def _buildRecord(self, line): + """Build RoiRecord from ROI line.""" + vals = line.split() + region_length = int(vals[4]) + data = vals[7:-1] + points = [int(x) for x in vals[-1].split(',')] + return RoiRecord(vals[0], int(vals[1]) - 1, int(vals[2]), vals[3], + region_length, vals[5], int(vals[6]), data, points, + self.data_keys) + + + +def load(path, max_count=0): + """Load ROI file and return it as a list of RoiRecord objects. + + NA values are translated to 0. + """ + result = [] + for i, x in enumerate(RoiFile(path)): + if max_count > 0 and i >= max_count: + break + result.append(x) + return result diff -r 000000000000 -r 61d9bdb6d519 roi_details.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_details.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,144 @@ +#!/usr/bin/env python +"""Generation of detailed ROI reports with larger plots. + +This report generation works for hundred of ROIs. +""" + +import argparse +import math +import os.path +import sys + +import Cheetah.Template +import matplotlib.pyplot as plt + +import rois + +PAGE_TPL = """ + + ROI Table + +

Detailed ROI Report

+ #for i, roi in enumerate($records) +

${roi.ref}:${roi.start_pos + 1}-${roi.end_pos+1}

+ +
+
chr:start-end name length strand
+
${roi.ref}:${roi.start_pos}-${roi.end_pos} ${roi.region_name} ${roi.region_length} ${roi.strand}
+
metrics
+
#for j, key in enumerate($data_keys)#$key: ${roi.data[$j]}, #end for#
+
+ #end for + + +""" + +class DetailedRoiGenerator(object): + """Generate detailed ROI report. + + :ivar args:Arguments from the comment line. + """ + + def __init__(self, args): + self.args = args + + def run(self): + """Run report generation, return status code. + + :return: integer with the result. + """ + print >>sys.stderr, 'Loading ROI' + keys, records = rois.loadRoi(self.args.in_file, self.args.max_rois) + + self.writeHtml(keys, records) + self.writePlots(records) + return 0 + + def writePlots(self, records): + for i, roi in enumerate(records): + file_name = '%s%d.png' % (os.path.join(self.args.out_dir, self.args.out_prefix), i) + print >>sys.stderr, 'Writing plot %s' % file_name + plt.figure(figsize=(6, 4)) + plt.gcf().subplots_adjust(bottom=0.1, left=0.1) + plt.plot(roi.points) + plt.ylabel('coverage') + plt.xlabel('ROI pos') + plt.savefig(file_name) + + def writeHtml(self, keys, records): + file_name = self.args.out_file + print >>sys.stderr, 'Writing HTML file %s' % file_name + + vals = {'args': self.args, 'records': records, 'data_keys': keys} + t = Cheetah.Template.Template(PAGE_TPL, searchList=vals) + + with open(file_name, 'wb') as f: + f.write(str(t)) + + +def main(): + parser = argparse.ArgumentParser(description='Plot ROI file.') + parser.add_argument('-o', '--out-file', dest='out_file', metavar='PATH', + help='Prefix of output HTML file. The tool will create ' + 'PNG files parallel to the HTML file.', + default='roi_plots.html') + parser.add_argument('-d', '--out-dir', dest='out_dir', metavar='DIR', + help='Directory to write images to. By default ' + 'this is the same as the directory for --out-file.', + default=None) + parser.add_argument('-p', '--out-prefix', dest='out_prefix', metavar='DIR', + help='Prefix of output PNG files.', default='roi_plot_grid') + parser.add_argument('-i', '--in-file', dest='in_file', metavar='FILE', + required=True, help='Path to ROI file to read.') + + group = parser.add_argument_group('Plot Configuration', 'Arguments for the plot images.') + + group.add_argument('--max-rois', dest='max_rois', metavar='NUM', + type=int, default=0, + help='Maximal number of ROIs, 0 for all.') + + group.add_argument('--num-rows', dest='num_rows', metavar='ROWS', + type=int, default=50, + help='Number of rows per grid.') + group.add_argument('--num-cols', dest='num_cols', metavar='COLS', + type=int, default=40, + help='Number of columns per grid.') + + group.add_argument('--plot-height', dest='plot_height', metavar='HEIGHT', + type=int, default=30, help='Height of one plot in px.') + group.add_argument('--plot-width', dest='plot_width', metavar='WIDTH', + type=int, default=30, help='Width of one plot in px.') + + group = parser.add_argument_group('HTML Links', 'Arguments for HTML link creation.') + + group.add_argument('--link-target', dest='link_target', metavar='TARGET', + default='_blank', choices=['_blank', '_top'], + help='Select the link target to create (_blank or _top).') + + group.add_argument('--link-type', dest='link_type', metavar='TARGET', + default='local_igv', choices=['local_igv', 'ucsc'], + help='Select the type of links to create. One of ' + '"local_igv" and "ucsc".') + + group.add_argument('--igv-host', dest='igv_host', metavar='HOST', + default='localhost', help='Host for IGV link.') + group.add_argument('--igv-port', dest='igv_port', metavar='PORT', + type=int, default='60151', help='Port for IGV link.') + + group.add_argument('--ucsc-org', dest='ucsc_org', metavar='ORG', + default='human', help='Organism for UCSC browser link.') + group.add_argument('--ucsc-db', dest='ucsc_db', metavar='DB', + default='hg18', help='Assembly version for UCSC browser link.') + group.add_argument('--ucsc-chr-prefix', dest='ucsc_chr_prefix', metavar='PREFIX', + default='', help='Prefix for chromosome names in UCSC browser.') + + args = parser.parse_args() + if not args.out_dir: + args.out_dir = os.path.dirname(args.out_file) or '.' + print args + app = DetailedRoiGenerator(args) + return app.run() + + +if __name__ == '__main__': + sys.exit(main()) diff -r 000000000000 -r 61d9bdb6d519 roi_feature_projection.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_feature_projection.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,79 @@ + + + Intersect ROIs with GFF/BED + + + + #if $roi_intersect_genome# ln -s ${roi_intersect_genome} ${roi_intersect_genome}.${roi_intersect_genome.ext}; #end if + #if $roi_intersect_in_roi# ln -s ${roi_intersect_in_roi} ${roi_intersect_in_roi}.${roi_intersect_in_roi.ext}; #end if + #if $roi_intersect_out_roi# touch ${roi_intersect_out_roi}; ln -s ${roi_intersect_out_roi} ${roi_intersect_out_roi}.roi; #end if + #if $roi_intersect_in_features # ln -s ${roi_intersect_in_features} ${roi_intersect_in_features}.${roi_intersect_in_features.ext}; #end if + + + roi_intersect $roi_intersect_strand_specific + --in-roi ${roi_intersect_in_roi}.${roi_intersect_in_roi.ext} + --in-features ${roi_intersect_in_features}.${roi_intersect_in_features.ext} + --out-roi ${roi_intersect_out_roi}.roi + --mode $roi_intersect_mode + #if $roi_intersect_genome# --genome ${roi_intersect_genome}.${roi_intersect_genome.ext} #end if + #if $verbosity # $verbosity #end if + #if $roi_intersect_gff_type # --gff-type $roi_intersect_gff_type #end if + #if $roi_intersect_gff_group_by# --gff-group-by $roi_intersect_gff_group_by #end if + ; + + + #if $roi_intersect_genome# rm -f ${roi_intersect_genome}.${roi_intersect_genome.ext}; #end if + #if $roi_intersect_in_roi# rm -f ${roi_intersect_in_roi}.${roi_intersect_in_roi.ext}; #end if + #if $roi_intersect_in_roi# rm -f ${roi_intersect_out_roi}.roi; #end if + #if $roi_intersect_in_features# rm -f ${roi_intersect_in_features}.${roi_intersect_in_features.ext}; #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + No help yet. + diff -r 000000000000 -r 61d9bdb6d519 roi_filter.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_filter.sh Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,79 @@ +#!/bin/bash + +# Sorting of ROI files. +# +# USAGE: sort_filter.sh [-n] -c COL -o OP -v VALUE OUT.roi +# +# Read lines from IN.roi, write header lines. Otherwise, check that the +# given column is in relation OP to the value and only write if this is +# fulfilled. Use -n to compare as number. + +# Maximal number of header lines. +MAX_HEADER=1000 + +# The arguments will go here. +COL= +OP= +VALUE= +TXT=1 + +# Parse option values. +while getopts "c:o:v:n" opt; do + case $opt in + c) + COL=$OPTARG + ;; + o) + case $OPTARG in + eq) + OP="==" + ;; + geq) + OP=">=" + ;; + leq) + OP="<=" + ;; + gt) + OP=">" + ;; + lt) + OP="<" + ;; + neq) + OP="!=" + ;; + \?) + echo "Invalid operator $OPTARG" >&2 + exit 1 + ;; + esac + ;; + v) + VALUE=$OPTARG + ;; + n) + TXT=0 + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# Check that -i or -o are given. +if [[ "$COL" == "" || "$OP" == "" || "$VALUE" == "" ]]; then + echo "Missing option -c, -o, or -v" >&2 + exit 1 +fi + +if [[ "x$TXT" == "x1" ]]; then + VALUE="\"$VALUE\"" +fi + +# Execute filtering. +#echo awk "/^#/ { print \$0 } !/^#/ { if (\$$COL $OP $VALUE) print \$0 }" >&2 +awk "/^#/ { print \$0 } !/^#/ { if (\$$COL $OP $VALUE) print \$0 }" + +exit $? diff -r 000000000000 -r 61d9bdb6d519 roi_filter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_filter.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,43 @@ + + + Filter ROI files + + + roi_filter.sh $numeric -c $column -o $operator -v $value <$input >$output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + No help yet. + diff -r 000000000000 -r 61d9bdb6d519 roi_intersect.ctd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_intersect.ctd Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,61 @@ + + + RoiIntersect + roi_intersect + 0.1 + Region Of Interest Intersection. + Compute the intersection of a ROI file and regions from a BED or GFF file. The result is a ROI file where each interval from the BED/GFF/GTF file that overlapped with one input ROI file is a region of interest, with the coverage counts projected to the new region of interest. + + http://www.seqan.de + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 61d9bdb6d519 roi_metrics.Rscript --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_metrics.Rscript Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,103 @@ +#!/usr/bin/env Rscript + +# Compute metrics (all except PCA) for ROI files. +# +# USAGE: roi_metrics.Rscript -i IN.roi -o OUT.roi + +# TODO(holtgrew): Bernd, can you give the English names of the metrics below in the comments? + +# Author: Bernd Jagla +# Author: Manuel Holtgrewe + +library('getopt') # parsing args +library('ngsroi') # ROI I/O +library('boot') # k3.linear() + +# ---------------------------------------------------------------------------- +# Command Line Parsing +# ---------------------------------------------------------------------------- + +# Parse command line options. +spec = matrix(c( + "help", "h", 0, "logical", + "infile", "i", 1, "character", + "outfile", "o", 1, "character" +), byrow=TRUE, ncol=4); +opt = getopt(spec); + +# If help was asked for print a friendly message and exit with a non-zero +# error code. +if (!is.null(opt$help) ) +{ + cat(getopt(spec, usage=TRUE)); + q(status=1); +} + +# Check arguments. +if (is.null(opt$infile)) +{ + write("-i/--infile missing.", stderr()); + q(status=1) +} +if (is.null(opt$out)) +{ + write("-o/--outfile missing.", stderr()); + q(status=1) +} + +# ---------------------------------------------------------------------------- +# Compute metrics. +# ---------------------------------------------------------------------------- + +# Read input files.x +roi = readROI(opt$infile) + +# Compute some basic statistics. +roi$min = as.numeric(lapply(roi$counts, min)) +roi$median = as.numeric(lapply(roi$counts, median)) +roi$mean = as.numeric(lapply(roi$counts, mean)) +roi$quantile75 = as.numeric(lapply(roi$counts, quantile, probs=0.75)) +roi$quantile95 = as.numeric(lapply(roi$counts, quantile, probs=0.95)) + +# Compute area under curve metric. +aoc <- function(v){ + m = max(v) + return (sum(v/m)/length(v)) +} +roi$aoc = as.numeric(lapply(roi$counts, aoc)) + +# Compute xreaXX metric. +xreaXX <- function(v){ + vo=order(v) + mx = max(v) + mi = min(v) + if((mx-mi)==0) return(0) + vn=(v-mi)/(mx-mi) + x=c(1:length(v))/length(v) + vno=vn[vo] + return(sum(x-vno)/length(v)) +} +roi$xreaXX = as.numeric(lapply(roi$counts, xreaXX)) + +# Compute r3linear metric. +roi$r3linear = as.numeric(lapply(roi$counts, k3.linear)) + +# Compute distMax3p metric. +distMax3p <- function(v) +{ + return(length(v)-which.max(rev(v))+1) +} +roi$distMax3p = as.numeric(lapply(roi$count, distMax3p)) + +# Compute distMax5p metric. +distMax5p <- function(v) +{ + return (which.max(v)) +} +roi$distMax5p = as.numeric(lapply(roi$count, distMax5p)) + +# Write output. +writeROI(roi, opt$outfile) + +# Quit with success code. +q(status=0) \ No newline at end of file diff -r 000000000000 -r 61d9bdb6d519 roi_metrics.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_metrics.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,37 @@ + + + Compute metrics for records in an ROI file. + + + roi_metrics.Rscript -i $input -o $output + + + + + + + + + + + + + + + + + + + + No help yet. + diff -r 000000000000 -r 61d9bdb6d519 roi_pca.Rscript --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_pca.Rscript Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,66 @@ +#!/usr/bin/env Rscript + +# Compute PCA metric for ROI files. +# +# USAGE: roi_pca.Rscript -i IN.roi -o OUT.roi + +# TODO(holtgrew): Bernd, can you give the English names of the metrics below in the comments? + +# Author: Bernd Jagla +# Author: Manuel Holtgrewe + +library('getopt') # parsing args +library('ngsroi') # ROI I/O +library('boot') # k3.linear() + +# ---------------------------------------------------------------------------- +# Command Line Parsing +# ---------------------------------------------------------------------------- + +# Parse command line options. +spec = matrix(c( + "help", "h", 0, "logical", + "infile", "i", 1, "character", + "outfile", "o", 1, "character" +), byrow=TRUE, ncol=4); +opt = getopt(spec); + +# If help was asked for print a friendly message and exit with a non-zero +# error code. +if (!is.null(opt$help) ) +{ + cat(getopt(spec, usage=TRUE)); + q(status=1); +} + +# Check arguments. +if (is.null(opt$infile)) +{ + write("-i/--infile missing.", stderr()); + q(status=1) +} +if (is.null(opt$out)) +{ + write("-o/--outfile missing.", stderr()); + q(status=1) +} + +# ---------------------------------------------------------------------------- +# Compute metrics. +# ---------------------------------------------------------------------------- + +# Read input files.x +roi = readROI(opt$infile) + +# Compute PCA. +data=roi[,8:length(roi)-1] +pca=princomp(data, na.action=na.exclude) +prj=predict(pca, data) +roi$pca1=prj[,1] +roi$pca2=prj[,2] + +# Write output. +writeROI(roi, opt$outfile) + +# Quit with success code. +q(status=0) \ No newline at end of file diff -r 000000000000 -r 61d9bdb6d519 roi_pca.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_pca.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,35 @@ + + + Compute PCA-based metric for records in an ROI file. + + + roi_pca.Rscript -i $input -o $output + + + + + + + + + + + + + + + + + + + + No help yet. + diff -r 000000000000 -r 61d9bdb6d519 roi_plot_thumbnails.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_plot_thumbnails.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,188 @@ +#!/usr/bin/env python +"""Thumbnail Plot Generator. + +This report generator uses the binary roi_plot_thumbnails (C++ program, must +be in PATH) to generate many PNG images with small ROI plots. It then creates +a HTML file that includes the PNG and adds an overlay image link map such that +when the user clicks on a plot, she is transfered to the according position in +a local IGV or the UCSC genome browser, for example. +""" + +__author__ = 'Manuel Holtgrewe ' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + +# TODO(holtgrew): Use Cheetah templates to generate HTML. +# TODO(holtgrew): Call roi_plot_thumbnails from Python script. +# TODO(holtgrew): from __future__ use print_function + + +import argparse +import math +import os.path +import sys + +import ngs_roi.app +import ngs_roi.argparse +import ngs_roi.io + + +class LinkRegion(object): + """Region on picture with genomic interval.""" + + def __init__(self, x1, y1, x2, y2, ref, begin_pos, end_pos): + self.x1 = x1 + self.x2 = x2 + self.y1 = y1 + self.y2 = y2 + self.ref = ref + self.begin_pos = begin_pos + self.end_pos = end_pos + + +class RoiPlotGrid(object): + """A grid of ROI plots. + + width -- width one one plot + height -- height of one plot + """ + + # Border in each direction. + BORDER = 0 + # Spacing between plots. + SPACE = 2 + + def __init__(self, width, height, columns, rows, bg_plot=(0, 0, 0, 10), + frame_col=(80, 80, 80, 255), fg_col=(0, 0, 0, 255)): + # Properties of the grid. + self.width = float(width) + self.height = float(height) + self.columns = columns + self.rows = rows + # State. + self.idx = 0 # currently drawn plot + self.canvas_width = 2 * self.BORDER + columns * width + (columns - 1) * self.SPACE + self.canvas_height = 2 * self.BORDER + rows * height + (rows - 1) * self.SPACE + # Colors. + self.bg_plot = bg_plot + self.frame_col = frame_col + self.fg_color = fg_col + # List of link region for image map. + self.link_regions = [] + + def plotStart(self, idx): + """Return pair with start coordinates of plot.""" + row = idx / self.columns + col = idx % self.columns + x = self.BORDER + col * (self.width + self.SPACE) + y = self.BORDER + row * (self.height + self.SPACE) + return x, y + + def plotRecord(self, record): + """Register plotting of a record.""" + start_x, start_y = self.plotStart(self.idx) + self.idx += 1 + # Register link region. + self.link_regions.append(LinkRegion( + start_x, start_y, start_x + self.width, start_y + self.height, + record.ref, record.start_pos, record.end_pos)) + + +class GridLinks(object): + """Link information for one grid.""" + + def __init__(self, file_name, link_regions): + self.file_name = file_name + self.link_regions = link_regions + + +class PlotThumbnailsApp(ngs_roi.app.App): + def __init__(self, args): + # Call parent's constructor. + ngs_roi.app.App.__init__(self, args) + self.prepareOutDir() + # Initialize members. + self.grid = None + self.plot_idx = 0 + self.grid_links = [] + + def run(self): + # Load ROI records. + print >>sys.stderr, 'Loading ROI' + + # Create plots. + runner = ngs_roi.app.PlotThumbnailsRunner(self.args) + runner.run() + + # Create HTML. + print >>sys.stderr, 'Creating HTML...' + num_plots = self.args.num_cols * self.args.num_rows # plots on grid + for i, roi in enumerate(ngs_roi.io.RoiFile(self.args.in_file)): + if self.args.max_rois > 0 and i >= self.args.max_rois: + break + # Write out old grid (if any) and create new one. + if i % num_plots == 0: + if self.grid: + print >>sys.stderr, ' Writing plot %d...' % self.plot_idx + self.writeGrid() + self.grid = RoiPlotGrid(self.args.plot_width, self.args.plot_height, + self.args.num_cols, self.args.num_rows) + # Put the next plot on the grid. + self.grid.plotRecord(roi) + print >>sys.stderr, ' Writing plot %d...' % self.plot_idx + self.writeGrid() # Write last grid. + self.createHtml(self.args.out_file) + return 0 + + def writeGrid(self): + """Register writing of grid.""" + if not self.grid: + return + # Append grid info. + file_name = 'thumbnail_%d.png' % self.plot_idx + file_name = os.path.join(self.args.out_dir, file_name) + self.plot_idx += 1 + self.grid_links.append(GridLinks(os.path.basename(file_name), self.grid.link_regions)) + + def createHtml(self, file_name): + print >>sys.stderr, 'Writing HTML to %s' % file_name + with open(file_name, 'wb') as f: + f.write('\n') + f.write('\n') + for gl in self.grid_links: + vals = (gl.file_name, gl.file_name, self.grid.canvas_width, self.grid.canvas_height) + f.write('\n' % vals) + f.write('\n' % gl.file_name) + for lr in gl.link_regions: + locus = (lr.ref, lr.begin_pos + 1, lr.end_pos) + vals = {'x1': lr.x1, 'x2': lr.x2, 'y1': lr.y1, 'y2': lr.y2, + 'title': '%s %d-%d' % locus, + 'href': self.buildHref(lr.ref, lr.begin_pos, lr.end_pos), + 'onclick': ''} + # Add onclick handler to prevent opening of new window. + vals['target_attr'] = '' + if self.args.link_target: + vals['target_attr'] = ' target="%s"' % self.args.link_target + if self.args.link_type == 'local_igv': + vals['target_attr'] = ' target="empty"' + f.write(' \n' % vals) + f.write('\n') + f.write('\n') + + +def main(): + """Program entry point.""" + parser = argparse.ArgumentParser(description='Plot ROI file.') + ngs_roi.argparse.addFileArguments(parser) + ngs_roi.argparse.addPlotGridArguments(parser) + ngs_roi.argparse.addLinkArguments(parser) + args = parser.parse_args() + ngs_roi.argparse.applyFileDefaults(args) + + app = PlotThumbnailsApp(args) + return app.run() + + +if __name__ == '__main__': + sys.exit(main()) diff -r 000000000000 -r 61d9bdb6d519 roi_plot_thumbnails.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_plot_thumbnails.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,70 @@ + + + Create ROI thumbnail plot grids. + + + roi_plot_thumbnails.py --in-file $input --out-file "$out_file" --out-dir "$out_file.files_path" + #if $max_rois # --max-rois $max_rois #end if + #if $num_rows # --num-rows $num_rows #end if + #if $num_cols # --num-cols $num_cols #end if + #if $plot_height # --plot-height $plot_height #end if + #if $plot_width # --plot-width $plot_width #end if + #if $link_target # --link-target $link_target #end if + #if $link_type # --link-type $link_type #end if + #if $igv_host # --igv-host $igv_host #end if + #if $igv_port # --igv-port $igv_port #end if + #if $ucsc_org # --ucsc-org $ucsc_org #end if + #if $ucsc_db # --ucsc-db $ucsc_db #end if + #if $ucsc_chr_prefix # --ucsc-chr-prefix "$ucsc_chr_prefix" #end if + + && + + roi_plot_grid -if $input -o "${out_file.files_path}/roi_plot_grid" + #if $max_rois # --max-rois $max_rois #end if + #if $num_rows # --num-rows $num_rows #end if + #if $num_cols # --num-cols $num_cols #end if + #if $plot_height # --plot-height $plot_height #end if + #if $plot_width # --plot-width $plot_width #end if + #if $max_value # --max-value $max_value #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + You don't need help! + + diff -r 000000000000 -r 61d9bdb6d519 roi_report.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_report.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,128 @@ +#!/usr/bin/env python +"""Create ROI overview report. + +This report consists of plots of all metrics (y: metric, x: rank of value). +Each plot is written out as a PNG file and we also create one output HTML file +that shows all HTML files. + +Plotting is done using the fine matplotlib. +""" + +from __future__ import print_function + +__author__ = 'Manuel Holtgrewe ' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + + +import sys +import os +import os.path +import argparse + +import Cheetah.Template +import matplotlib.pyplot as plt + +import ngs_roi.io +import ngs_roi.app +import ngs_roi.argparse + + +# The HTML template to use for generating the HTML page. +REPORT_TPL = """ + + + ROI Report + + +

ROI Report

+

Table of Contents

+ +

Plots

+ #for figure in $figures +

$figure.title

+ + #end for + + +""" + + +class ReportBuilder(ngs_roi.app.App): + """This class is used for building the report.""" + + def __init__(self, args): + self.args = args + self.in_file = self.args.in_file + self.out_file = self.args.out_file + self.out_dir = self.args.out_dir + self.prepareOutDir() + + def plotAndWrite(self, file_name, numbers, ylabel): + """Create plot of numbers and write as PNG. + + :param file_name:path to write plot to as image + :param numbers:list of numbers to plot + :param ylabel:label for y axis + """ + plt.figure() + plt.plot(numbers) + plt.ylabel(ylabel) + plt.savefig(file_name) + + def run(self): + # Load ROI. + print('Loading ROI...', file=sys.stderr) + records = ngs_roi.io.load(self.in_file) + keys = records[0].data_keys + # Create ROI plots. + print('Creating plots...', file=sys.stderr) + METRICS = [('start position', 'start_pos', lambda x: x.start_pos), + ('end position', 'end_pos', lambda x: x.end_pos), + ('region length', 'region_length', lambda x: x.region_length), + ('max count', 'max_count', lambda x: x.max_count)] + def getData(i): + def func(x): + try: + res = float(x.data[i]) + except ValueError: + res = 0 + return res + return func + for i, key in enumerate(keys): + slug = ''.join(x for x in key if x.isalnum()) + METRICS.append((key, slug, getData(i))) + figure_infos = [] + for title, slug, func in METRICS: + values = [func(x) for x in records] + file_name = 'report_%s.png' % slug + file_path = os.path.join(self.out_dir, file_name) + self.plotAndWrite(file_path, sorted(values), title) + figure_infos.append({'file_name': file_name, 'title': title, 'slug': slug}) + # Create report HTML. + name_space = {'figures': figure_infos} + t = Cheetah.Template.Template(REPORT_TPL, searchList=name_space) + with open(os.path.join(self.out_dir, 'index.html'), 'wb') as f: + f.write(str(t)) + with open(os.path.join(self.out_file), 'wb') as f: + f.write(str(t)) + +def main(): + """Program entry point.""" + + parser = argparse.ArgumentParser(description='Create ROI report.') + + ngs_roi.argparse.addFileArguments(parser) + args = parser.parse_args() + ngs_roi.argparse.applyFileDefaults(args) + + report_builder = ReportBuilder(args) + return report_builder.run() + + +if __name__ == '__main__': + sys.exit(main()) diff -r 000000000000 -r 61d9bdb6d519 roi_report.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_report.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,36 @@ + + + ROI Overview Report + + + roi_report.py --in-file $input --out-file "$out_file" --out-dir "$out_file.files_path" + + + + + + + + + + + + + + + + + + + + No help yet. + diff -r 000000000000 -r 61d9bdb6d519 roi_sort.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_sort.sh Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,86 @@ +#!/bin/bash + +# Sorting of ROI files. +# +# USAGE: sort_roi.sh [OPTIONS] -i IN.roi -o OUT.roi +# +# Options: +# -r reverse orientation +# -p sort by position (ref, start, end) -- default +# -c COL sort by column COL +# -n COL sort by column COL, compare as number + +# Maximal number of header lines. +MAX_HEADER=1000 + +# The parameters that we will pass to sort. +SORT_POS_ARGS="-k 1,1 -k 2,2n -k 3,3n" +SORT_POS_ARGS_REV="-k 1,1r -k 2,2nr -k 3,3nr" +SORT_COL_ARGS="-k" + +# The arguments will go here. +SORT_BY=pos +SORT_COL=0 +REVERSE= + +# Parse option values. +while getopts "pc:i:o:n:r" opt; do + case $opt in + i) + IN_FILE=$OPTARG + ;; + o) + OUT_FILE=$OPTARG + ;; + p) + SORT_BY=pos + ;; + c) + SORT_BY=c + SORT_COL=$OPTARG + ;; + n) + SORT_BY=n + SORT_COL=$OPTARG + ;; + r) + REVERSE=r + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# Check that -i or -o are given. +if [[ "$IN_FILE" == "" || "$OUT_FILE" == "" ]]; then + echo "Missing option -i or -o" >&2 + exit 1 +fi + +# Setup sort args. +case $SORT_BY in + c) + SORT_ARGS="-k ${SORT_COL},${SORT_COL}${REVERSE}" + ;; + n) + SORT_ARGS="-k ${SORT_COL},${SORT_COL}n${REVERSE}" + ;; + pos) + if [[ "$REVERSE" == "r" ]]; then + SORT_ARGS=${SORT_POS_ARGS_REV} + else + SORT_ARGS=${SORT_POS_ARGS} + fi + ;; +esac + +# Execute sorting. +#echo "SORT_ARGS=${SORT_ARGS}" 1>&2 +( + head -n ${MAX_HEADER} ${IN_FILE} | grep '^#'; + sort ${SORT_ARGS} <(grep -v '^#' ${IN_FILE}); +) > ${OUT_FILE} + +exit $? diff -r 000000000000 -r 61d9bdb6d519 roi_sort.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_sort.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,48 @@ + + + Sort ROI file + + + roi_sort.sh -i $input -o $output $reverse + #if str($by) == 'position' # -p #end if + #if str($by) == 'text_column' # -c $column #end if + #if str($by) == 'numeric_column' # -n $column #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + No help yet. + diff -r 000000000000 -r 61d9bdb6d519 roi_table.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_table.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,176 @@ +#!/usr/bin/env python +"""ROI Table Generator + +Generates a HTML page with a table of ROI record details. Besides showing the +numeric ROI information, it also gives little roi plots in a column. + +For the little ROI plots, it calls the program roi_plot_thumbnails that has to +be in the PATH. +""" + +__author__ = 'Manuel Holtgrewe ' +__copyright__ = 'Copyring 2013, Freie Universitaet Berlin' +__license__ = 'BSD 3-clause' + + +# TODO(holtgrew): Actually call roi_plot_thumbnails +# TODO(holtgrew): from __future__ use print_function + + +import argparse +import math +import os.path +import sys + +import Cheetah.Template + +import ngs_roi.app +import ngs_roi.argparse +import ngs_roi.io + + +# Main template. +PAGE_TPL = """ + + ROI Table + +

ROI Table

+ $table + + +""" + +# Template for a table. +TABLE_TPL = """ + + + + + + + + + + + #for i, key in enumerate($data_keys) + + #end for + + #for id, roi in enumerate($records) + + + + + + + + + + #for i, key in enumerate($data_keys) + + #end for + + #end for +
plotchrstartendnamelengthstrandmax_count$key
$roi.ref$fmtPos($roi.start_pos + 1)$fmtPos($roi.end_pos)$roi.region_name$fmtPos($roi.region_length)$roi.strand$roi.max_count$roi.data[$i]
+""" + + +class RoiTable(object): + """A table of ROI records with small plots.""" + + def __init__(self, args, keys, records, app): + self.args = args + self.keys = keys + self.records = records + self.app = app + + def tplFuncs(self): + def intWithCommas(x): + if type(x) not in [type(0), type(0L)]: + raise TypeError("Parameter must be an integer.") + if x < 0: + return '-' + intWithCommas(-x) + result = '' + while x >= 1000: + x, r = divmod(x, 1000) + result = ",%03d%s" % (r, result) + return "%d%s" % (x, result) + + def imgId(idx): + """Image id from roi record id.""" + return idx / (self.args.num_rows * self.args.num_cols) + + def imgX(idx): + """x position in image from record id.""" + x = idx % self.args.num_cols + res = x * self.args.plot_width + if x > 0: + res += (x - 1) * 2 + return res + + def imgY(idx): + """y position in image from record id.""" + y = idx / self.args.num_cols + res = y * self.args.plot_height + res += y * 2 + return res + + return {'fmtPos': intWithCommas, 'imgId': imgId, 'imgX': imgX, 'imgY': imgY} + + def render(self): + """Returns string with rendered table.""" + vals = {'data_keys': self.keys, 'records': self.records, 'args': self.args, + 'href': lambda x:self.app.buildHref(x.ref, x.start_pos, x.end_pos)} + vals.update(self.tplFuncs()) + t = Cheetah.Template.Template(TABLE_TPL, searchList=vals) + return str(t) + + +class TableApp(ngs_roi.app.App): + def __init__(self, args): + # Call parent's constructor and create output directory. + ngs_roi.app.App.__init__(self, args) + self.prepareOutDir() + + def run(self): + # Load ROI records. + print >>sys.stderr, 'Loading ROI' + records = ngs_roi.io.load(self.args.in_file, self.args.max_rois) + keys = [] + if records: + keys = records[0].data_keys + + # Create plots. + print >>sys.stderr, 'Creating plots...' + runner = ngs_roi.app.PlotThumbnailsRunner(self.args) + runner.run() + + # Create table. + print >>sys.stderr, 'Creating table...' + self.createHtml(self.args.out_file, keys, records) + return 0 + + def createHtml(self, file_name, keys, records): + print >>sys.stderr, 'Writing %s' % self.args.out_file + vals = {'table': RoiTable(self.args, keys, records, self).render()} + t = Cheetah.Template.Template(PAGE_TPL, searchList=vals) + with open(self.args.out_file, 'wb') as f: + f.write(str(t)) + + +def main(): + parser = argparse.ArgumentParser(description='Plot ROI file.') + + ngs_roi.argparse.addFileArguments(parser) + ngs_roi.argparse.addPlotGridArguments(parser, default_plot_height=60, + default_plot_width=90) + ngs_roi.argparse.addLinkArguments(parser) + args = parser.parse_args() + ngs_roi.argparse.applyFileDefaults(args) + + app = TableApp(args) + return app.run() + + +if __name__ == '__main__': + sys.exit(main()) diff -r 000000000000 -r 61d9bdb6d519 roi_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_table.xml Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,74 @@ + + + ROI Overview Report + + + roi_table.py --in-file $input --out-file "$out_file" --out-dir "$out_file.files_path" + #if $max_rois # --max-rois $max_rois #end if + #if $plot_height # --plot-height $plot_height #end if + #if $plot_width # --plot-width $plot_width #end if + #if $link_target # --link-target $link_target #end if + #if $link_type # --link-type $link_type #end if + #if $igv_host # --igv-host $igv_host #end if + #if $igv_port # --igv-port $igv_port #end if + #if $ucsc_org # --ucsc-org $ucsc_org #end if + #if $ucsc_db # --ucsc-db $ucsc_db #end if + #if $ucsc_chr_prefix # --ucsc-chr-prefix "$ucsc_chr_prefix" #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + No help yet. + diff -r 000000000000 -r 61d9bdb6d519 rois.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rois.py Thu Apr 18 08:03:38 2013 -0400 @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +class RoiRecord(object): + """Represent one record in a ROI file.""" + + def __init__(self, ref, start_pos, end_pos, region_name, region_length, + strand, max_count, data, points): + """Initialize RoiRecord.""" + self.ref = ref + self.start_pos = start_pos + self.end_pos = end_pos + self.strand = strand + self.region_length = region_length + self.region_name = region_name + self.max_count = max_count + self.data = data + self.points = points + + def __str__(self): + return 'RoiRecord(%s, %s, %s, %s, %s, %s, %s, %s, len([...])==%s)' % \ + (repr(self.ref), self.start_pos, self.end_pos, + self.region_name, self.region_length, repr(self.strand), + self.max_count, self.data, len(self.points)) + def __repr__(self): + return self.__str__() + + +def loadRoi(path, max_count=0): + """Load ROI file and return it as a list of RoiRecord objects. + + NA values are translated to 0. + """ + data_keys = [] + result = [] + i = 0 + with open(path, 'rb') as f: + for line in f: + if line.startswith('##'): + data_keys = line[2:].split('\t')[7:-1] + if line[0] == '#': + continue + if max_count > 0 and i >= max_count: + break + i += 1 + vals = line.split() + region_length = int(vals[4]) + data = vals[7:-1] + points = [int(x) for x in vals[-1].split(',')] + r = RoiRecord(vals[0], int(vals[1]) - 1, int(vals[2]), vals[3], + region_length, vals[5], int(vals[6]), data, points) + result.append(r) + #print ' => Loaded %d records.' % len(result) + return data_keys, result