Mercurial > repos > holtgrewe > ngs_roi
changeset 1:0ac4f6f3d984 draft
Uploaded
author | holtgrewe |
---|---|
date | Mon, 06 May 2013 12:34:43 -0400 |
parents | 61d9bdb6d519 |
children | 08cb79ffac4c |
files | bam2roi.xml gff_sort.sh ngs_roi/app.py roi_details.py roi_details.xml roi_metrics.Rscript roi_plot_thumbnails.py roi_sort.sh roi_table.py |
diffstat | 9 files changed, 203 insertions(+), 85 deletions(-) [+] |
line wrap: on
line diff
--- a/bam2roi.xml Thu Apr 18 08:03:38 2013 -0400 +++ b/bam2roi.xml Mon May 06 12:34:43 2013 -0400 @@ -28,9 +28,9 @@ help="When selected, a ROI continues over skipped bases." /> <param name="bam2roi_strand_specific" type="boolean" falsevalue="" truevalue="--strand-specific" label="Compute strand-specific ROIs" - label="When selected, the strands of the reads alignments are considered, e.g. there can be two or more ROIs on different strands that would overlap on the same strand." /> + help="When selected, the strands of the reads alignments are considered, e.g. there can be two or more ROIs on different strands that would overlap on the same strand." /> - <param name="bam2roi_verbosity" type="select" label="Verbosity" force_select="true" /> + <param name="bam2roi_verbosity" type="select" label="Verbosity" force_select="true"> <option value="" selected="true">normal</option> <option value="--verbose">verbose</option> <option value="--very-verbose">very verbose</option>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff_sort.sh Mon May 06 12:34:43 2013 -0400 @@ -0,0 +1,64 @@ +#!/bin/bash + +# Sorting of GFF files. +# +# USAGE: sort_gff.sh [OPTIONS] -i IN.roi -o OUT.roi +# +# Options: +# -r reverse orientation +# -p sort by position (ref, start, end) -- default + +# The parameters that we will pass to sort. +SORT_POS_ARGS="-k 1,1 -k 2,2n -k 3,3n" +SORT_POS_ARGS_REV="-k 1,1r -k 2,2nr -k 3,3nr" + +# The arguments will go here. +SORT_BY=beginPos +REVERSE= + +# Parse option values. +while getopts "pc:i:o:n:r" opt; do + case $opt in + i) + IN_FILE=$OPTARG + ;; + o) + OUT_FILE=$OPTARG + ;; + r) + REVERSE=r + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# Check that -i or -o are given. +if [[ "$IN_FILE" == "" || "$OUT_FILE" == "" ]]; then + echo "Missing option -i or -o" >&2 + exit 1 +fi + +# Setup sort args. +case $SORT_BY in + beginPos) + if [[ "$REVERSE" == "r" ]]; then + SORT_ARGS=${SORT_POS_ARGS_REV} + else + SORT_ARGS=${SORT_POS_ARGS} + fi + ;; +esac + +# Execute sorting. +#echo "OUT_FILE=${OUT_FILE}" 2>&2 +#echo "SORT_ARGS=${SORT_ARGS}" 1>&2 +( + export LC_ALL=C + #echo "sort ${SORT_ARGS} <(grep -v '^#' ${IN_FILE});" 1>&2 + sort ${SORT_ARGS} <(grep -v '^#' ${IN_FILE}); +) > ${OUT_FILE} + +exit $?
--- a/ngs_roi/app.py Thu Apr 18 08:03:38 2013 -0400 +++ b/ngs_roi/app.py Mon May 06 12:34:43 2013 -0400 @@ -57,6 +57,8 @@ '--spacing', self.args.spacing] cmd_args = ['roi_plot_thumbnails'] + map(str, cmd_args) #import pdb; pdb.set_trace() + import sys + print >>sys.stderr, 'Running %s' % ' '.join(cmd_args) p = subprocess.Popen(cmd_args, stderr=subprocess.PIPE, stdout=subprocess.PIPE) res = p.wait() if res:
--- a/roi_details.py Thu Apr 18 08:03:38 2013 -0400 +++ b/roi_details.py Mon May 06 12:34:43 2013 -0400 @@ -12,28 +12,51 @@ import Cheetah.Template import matplotlib.pyplot as plt -import rois +import ngs_roi.app +import ngs_roi.argparse +import ngs_roi.io PAGE_TPL = """ <html> - <head><title>ROI Table</title></head> + <head> + <title>ROI Table</title> + <style type="text/css"> + div.plot + { + float: left; + padding: 4px; + margin: 2px; + width: 420px; + } + + .plot h2 { margin-top: 3px; margin-bottom: 3px; text-align: center; } + .plot img { display: block; margin: 0 auto; } + </style> + </head> <body> <h1>Detailed ROI Report</h1> + #for i, roi in enumerate($records) - <h2>${roi.ref}:${roi.start_pos + 1}-${roi.end_pos+1}</h2> - <img src="${args.out_prefix}${i}.png" /> - <dl> - <dt>chr:start-end name length strand</dt> - <dd>${roi.ref}:${roi.start_pos}-${roi.end_pos} ${roi.region_name} ${roi.region_length} ${roi.strand}</dd> - <dt>metrics</dt> - <dd>#for j, key in enumerate($data_keys)#$key: ${roi.data[$j]}, #end for#</dd> - </dl> + <div class="plot"> + <h2>${roi.ref}:${roi.start_pos + 1}-${roi.end_pos+1}</h2> + <a href="${href($roi)}" target="dead"><img src="plot_${i}.png" /></a> + <p> + <b>chr:start-end</b> <a href="${href($roi)}" target="dead">${roi.ref}:${roi.start_pos}-${roi.end_pos} ${roi.strand}</a>; + <b>region name</b> ${roi.region_name}; + <b>region length</b> ${roi.region_length}; + </p> + #if $roi.data + <p>#for j, key in enumerate($data_keys)#<b>$key:</b> ${roi.data[$j]}; #end for#</p> + #end if + </div> #end for + <iframe name="dead" height="0" width="0"></iframe> + <div><code>$args</code></div> </body> </html> """ -class DetailedRoiGenerator(object): +class DetailedRoiGenerator(ngs_roi.app.App): """Generate detailed ROI report. :ivar args:Arguments from the comment line. @@ -48,28 +71,40 @@ :return: integer with the result. """ print >>sys.stderr, 'Loading ROI' - keys, records = rois.loadRoi(self.args.in_file, self.args.max_rois) + records = ngs_roi.io.load(self.args.in_file, self.args.max_rois) + keys = records[0].data_keys self.writeHtml(keys, records) self.writePlots(records) return 0 def writePlots(self, records): + COLOR = 'blue' + LINE_WIDTH = .5 + LINE_STYLE = '-' + TICK_FONT_SIZE = 8 + LABEL_FONT_SIZE = 10 for i, roi in enumerate(records): - file_name = '%s%d.png' % (os.path.join(self.args.out_dir, self.args.out_prefix), i) + file_name = 'plot_%d.png' % i + file_name = os.path.join(self.args.out_dir, file_name) print >>sys.stderr, 'Writing plot %s' % file_name - plt.figure(figsize=(6, 4)) - plt.gcf().subplots_adjust(bottom=0.1, left=0.1) - plt.plot(roi.points) - plt.ylabel('coverage') - plt.xlabel('ROI pos') + plt.figure(figsize=(4, 2.5)) + plt.gcf().subplots_adjust(bottom=0.16, left=0.15) + plt.plot(roi.points, color=COLOR, linewidth=LINE_WIDTH, linestyle=LINE_STYLE) + plt.ylim(ymin=0) + if self.args.max_value: + plt.ylim(ymax=self.args.max_value) + plt.tick_params(labelsize=TICK_FONT_SIZE) + plt.ylabel('coverage', fontsize=LABEL_FONT_SIZE, weight='semibold') + plt.xlabel('ROI beginPos', fontsize=LABEL_FONT_SIZE, weight='semibold') plt.savefig(file_name) def writeHtml(self, keys, records): file_name = self.args.out_file print >>sys.stderr, 'Writing HTML file %s' % file_name - vals = {'args': self.args, 'records': records, 'data_keys': keys} + vals = {'args': self.args, 'records': records, 'data_keys': keys, + 'href': lambda x: self.buildHref(x.ref, x.start_pos, x.end_pos)} t = Cheetah.Template.Template(PAGE_TPL, searchList=vals) with open(file_name, 'wb') as f: @@ -78,64 +113,12 @@ def main(): parser = argparse.ArgumentParser(description='Plot ROI file.') - parser.add_argument('-o', '--out-file', dest='out_file', metavar='PATH', - help='Prefix of output HTML file. The tool will create ' - 'PNG files parallel to the HTML file.', - default='roi_plots.html') - parser.add_argument('-d', '--out-dir', dest='out_dir', metavar='DIR', - help='Directory to write images to. By default ' - 'this is the same as the directory for --out-file.', - default=None) - parser.add_argument('-p', '--out-prefix', dest='out_prefix', metavar='DIR', - help='Prefix of output PNG files.', default='roi_plot_grid') - parser.add_argument('-i', '--in-file', dest='in_file', metavar='FILE', - required=True, help='Path to ROI file to read.') - - group = parser.add_argument_group('Plot Configuration', 'Arguments for the plot images.') - - group.add_argument('--max-rois', dest='max_rois', metavar='NUM', - type=int, default=0, - help='Maximal number of ROIs, 0 for all.') + ngs_roi.argparse.addFileArguments(parser) + ngs_roi.argparse.addPlotGridArguments(parser) + ngs_roi.argparse.addLinkArguments(parser) + args = parser.parse_args() + ngs_roi.argparse.applyFileDefaults(args) - group.add_argument('--num-rows', dest='num_rows', metavar='ROWS', - type=int, default=50, - help='Number of rows per grid.') - group.add_argument('--num-cols', dest='num_cols', metavar='COLS', - type=int, default=40, - help='Number of columns per grid.') - - group.add_argument('--plot-height', dest='plot_height', metavar='HEIGHT', - type=int, default=30, help='Height of one plot in px.') - group.add_argument('--plot-width', dest='plot_width', metavar='WIDTH', - type=int, default=30, help='Width of one plot in px.') - - group = parser.add_argument_group('HTML Links', 'Arguments for HTML link creation.') - - group.add_argument('--link-target', dest='link_target', metavar='TARGET', - default='_blank', choices=['_blank', '_top'], - help='Select the link target to create (_blank or _top).') - - group.add_argument('--link-type', dest='link_type', metavar='TARGET', - default='local_igv', choices=['local_igv', 'ucsc'], - help='Select the type of links to create. One of ' - '"local_igv" and "ucsc".') - - group.add_argument('--igv-host', dest='igv_host', metavar='HOST', - default='localhost', help='Host for IGV link.') - group.add_argument('--igv-port', dest='igv_port', metavar='PORT', - type=int, default='60151', help='Port for IGV link.') - - group.add_argument('--ucsc-org', dest='ucsc_org', metavar='ORG', - default='human', help='Organism for UCSC browser link.') - group.add_argument('--ucsc-db', dest='ucsc_db', metavar='DB', - default='hg18', help='Assembly version for UCSC browser link.') - group.add_argument('--ucsc-chr-prefix', dest='ucsc_chr_prefix', metavar='PREFIX', - default='', help='Prefix for chromosome names in UCSC browser.') - - args = parser.parse_args() - if not args.out_dir: - args.out_dir = os.path.dirname(args.out_file) or '.' - print args app = DetailedRoiGenerator(args) return app.run()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_details.xml Mon May 06 12:34:43 2013 -0400 @@ -0,0 +1,65 @@ +<?xml version="1.0"?> +<tool id="roi_table" name="ROI Details"> + <description>ROI Details</description> + + <command interpreter="python"> + roi_details.py --in-file $input --out-file "$out_file" --out-dir "$out_file.files_path" + #if $max_rois # --max-rois $max_rois #end if + #if $link_target # --link-target $link_target #end if + #if $link_type # --link-type $link_type #end if + #if $igv_host # --igv-host $igv_host #end if + #if $igv_port # --igv-port $igv_port #end if + #if $ucsc_org # --ucsc-org $ucsc_org #end if + #if $ucsc_db # --ucsc-db $ucsc_db #end if + #if $ucsc_chr_prefix # --ucsc-chr-prefix "$ucsc_chr_prefix" #end if + </command> + + <!-- + Input Files and Parameters + --> + <inputs> + <param name="input" format="roi" type="data" + label="ROI file to generate details for."/> + + <param name="max_rois" type="integer" value="100" + label="Maximal total number of records to process. 0 for all." /> + + <param name="link_type" type="select" label="Link target."> + <option value="local_igv" label="Local IGV Instance" /> + <option value="ucsc" label="UCSC Genome Browser" /> + </param> + + <param name="link_target" type="select" label="Open links in."> + <option value="_blank" selected="true">new window</option> + <option value="_top">this window</option> + <option value="">this frame</option> + </param> + + <param name="igv_host" type="text" label="Host for the IGV link." value="localhost" /> + <param name="igv_port" type="integer" label="Port for the IGV link." value="60151" /> + + <param name="ucsc_org" type="text" label="UCSC Genome Browser org value." value="human" /> + <param name="ucsc_db" type="text" label="UCSC Genome Browser db value." value="hg18" /> + <param name="ucsc_chr_prefix" type="text" label="Prefix to add to contig names." value="" /> + </inputs> + + <!-- + Output Files + --> + <outputs> + <data name="out_file" format="html" label="${input.name} Table" /> + </outputs> + + <!-- + Recognize errors by return code and not output to stderr. + --> + <stdio> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + + <!-- + Tool Help + --> + <help>No help yet.</help> +</tool>
--- a/roi_metrics.Rscript Thu Apr 18 08:03:38 2013 -0400 +++ b/roi_metrics.Rscript Mon May 06 12:34:43 2013 -0400 @@ -49,7 +49,7 @@ # Compute metrics. # ---------------------------------------------------------------------------- -# Read input files.x +# Read input files. roi = readROI(opt$infile) # Compute some basic statistics.
--- a/roi_plot_thumbnails.py Thu Apr 18 08:03:38 2013 -0400 +++ b/roi_plot_thumbnails.py Mon May 06 12:34:43 2013 -0400 @@ -148,7 +148,7 @@ print >>sys.stderr, 'Writing HTML to %s' % file_name with open(file_name, 'wb') as f: f.write('<html><body>\n') - f.write('<iframe name="empty" height="0" width="0" src="about:blank"></iframe>\n') + f.write('<h1>ROI Thumbnail Plots</h1>') for gl in self.grid_links: vals = (gl.file_name, gl.file_name, self.grid.canvas_width, self.grid.canvas_height) f.write('<img src="%s" usemap="#%s" width="%d" height="%d" />\n' % vals) @@ -168,7 +168,8 @@ f.write(' <area shape="rect" coords="%(x1)d,%(y1)d,%(x2)d,%(y2)d" ' 'alt="%(title)s" title="%(title)s" href="%(href)s"%(target_attr)s />\n' % vals) f.write('</map>\n') - f.write('</body></html>\n') + f.write('<iframe name="empty" height="0" width="0" src="about:blank"></iframe>\n') + f.write('<div><code>' + str(self.args) + '</code></div></body></html>\n') def main():
--- a/roi_sort.sh Thu Apr 18 08:03:38 2013 -0400 +++ b/roi_sort.sh Mon May 06 12:34:43 2013 -0400 @@ -19,7 +19,7 @@ SORT_COL_ARGS="-k" # The arguments will go here. -SORT_BY=pos +SORT_BY=beginPos SORT_COL=0 REVERSE= @@ -33,7 +33,7 @@ OUT_FILE=$OPTARG ;; p) - SORT_BY=pos + SORT_BY=beginPos ;; c) SORT_BY=c @@ -65,9 +65,9 @@ SORT_ARGS="-k ${SORT_COL},${SORT_COL}${REVERSE}" ;; n) - SORT_ARGS="-k ${SORT_COL},${SORT_COL}n${REVERSE}" + SORT_ARGS="-k ${SORT_COL},${SORT_COL}g${REVERSE}" ;; - pos) + beginPos) if [[ "$REVERSE" == "r" ]]; then SORT_ARGS=${SORT_POS_ARGS_REV} else @@ -79,7 +79,9 @@ # Execute sorting. #echo "SORT_ARGS=${SORT_ARGS}" 1>&2 ( + export LC_ALL=C head -n ${MAX_HEADER} ${IN_FILE} | grep '^#'; + #echo "sort ${SORT_ARGS} <(grep -v '^#' ${IN_FILE});" 1>&2 sort ${SORT_ARGS} <(grep -v '^#' ${IN_FILE}); ) > ${OUT_FILE}