# HG changeset patch # User holtgrewe # Date 1376315937 14400 # Node ID 19597b57ada87c529f38796e453c2f0bf2eb5c3b # Parent 71241c4dc4d00c3086f6b68ca7e89320654886cb Uploaded diff -r 71241c4dc4d0 -r 19597b57ada8 bam2roi.ctd --- a/bam2roi.ctd Fri Jun 28 14:01:42 2013 -0400 +++ b/bam2roi.ctd Mon Aug 12 09:58:57 2013 -0400 @@ -1,26 +1,31 @@ - - Bam2roi + bam2roi - 0.1 Create ROI from BAM file. Calculated consecutive regions of coverage from alignment file IN.bam and write regions of interst to file OUT.roi. Counting is performed over the entire region (including intron and N-regions) based on the CIGAR string of the alignment record. - http://www.seqan.de - + + + - - + + + + + + + + @@ -31,15 +36,18 @@ - + - - - - - - - + + + + + + + + + + diff -r 71241c4dc4d0 -r 19597b57ada8 bam2roi.xml --- a/bam2roi.xml Fri Jun 28 14:01:42 2013 -0400 +++ b/bam2roi.xml Mon Aug 12 09:58:57 2013 -0400 @@ -1,77 +1,33 @@ - - - - ln -s ${bam2roi_input_file} ${bam2roi_input_file}.${bam2roi_input_file.ext}; - touch ${bam2roi_output_file}; - ln -s ${bam2roi_output_file} ${bam2roi_output_file}.${bam2roi_output_file.ext}; - - - bam2roi - $bam2roi_verbosity + + bam2roi + $bam2roi_verbose + $bam2roi_very_verbose + --input-file "$bam2roi_input_file" + --output-file "$bam2roi_output_file" $bam2roi_strand_specific $bam2roi_ignore_pairing $bam2roi_link_over_skipped - --input-file $bam2roi_input_file.${bam2roi_input_file.ext} - --output-file $bam2roi_output_file.${bam2roi_output_file.ext}; - - - rm -f ${bam2roi_input_file}.${bam2roi_input_file.ext}; - rm -f ${bam2roi_output_file}.${bam2roi_output_file.ext}; - - + #if $bam2roi_input_file + --input-file-file-ext ${bam2roi_input_file.ext} + #end if + #if $bam2roi_output_file + --output-file-file-ext ${bam2roi_output_file.ext} + #end if Create ROI from BAM file. - - - - - - - - - - - - - + + + + + + - - - + - - - - - No help yet. - - - - bam2roi - Cheetah - diff -r 71241c4dc4d0 -r 19597b57ada8 bed_sort.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bed_sort.sh Mon Aug 12 09:58:57 2013 -0400 @@ -0,0 +1,64 @@ +#!/bin/bash + +# Sorting of BED files. +# +# USAGE: sort_bed.sh [OPTIONS] -i IN.roi -o OUT.roi +# +# Options: +# -r reverse orientation +# -p sort by position (ref, start, end) -- default + +# The parameters that we will pass to sort. +SORT_POS_ARGS="-k 1,1 -k 2,2n -k 3,3n" +SORT_POS_ARGS_REV="-k 1,1r -k 2,2nr -k 3,3nr" + +# The arguments will go here. +SORT_BY=beginPos +REVERSE= + +# Parse option values. +while getopts "pc:i:o:n:r" opt; do + case $opt in + i) + IN_FILE=$OPTARG + ;; + o) + OUT_FILE=$OPTARG + ;; + r) + REVERSE=r + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# Check that -i or -o are given. +if [[ "$IN_FILE" == "" || "$OUT_FILE" == "" ]]; then + echo "Missing option -i or -o" >&2 + exit 1 +fi + +# Setup sort args. +case $SORT_BY in + beginPos) + if [[ "$REVERSE" == "r" ]]; then + SORT_ARGS=${SORT_POS_ARGS_REV} + else + SORT_ARGS=${SORT_POS_ARGS} + fi + ;; +esac + +# Execute sorting. +#echo "OUT_FILE=${OUT_FILE}" 2>&2 +#echo "SORT_ARGS=${SORT_ARGS}" 1>&2 +( + export LC_ALL=C + #echo "sort ${SORT_ARGS} <(grep -v '^#' ${IN_FILE});" 1>&2 + sort ${SORT_ARGS} <(grep -v '^#' ${IN_FILE}); +) > ${OUT_FILE} + +exit $? diff -r 71241c4dc4d0 -r 19597b57ada8 ctd2galaxy.py --- a/ctd2galaxy.py Fri Jun 28 14:01:42 2013 -0400 +++ b/ctd2galaxy.py Mon Aug 12 09:58:57 2013 -0400 @@ -5,6 +5,10 @@ only used here at the moment. """ +# TODO(holtgrew): Option lists do not work at the moment. + +__author__ = 'Manuel Holtgrewe ' + try: import argparse except ImportError: @@ -14,6 +18,12 @@ import xml.sax import xml.sax.saxutils + +# The suffix to identify file extension options (for '--arg-${NO}${SUFFIX}' +# and '--${PARAM_NAME}${SUFFIX}'). +FILE_EXT_SUFFIX = '-file-ext' + + class CTDFormatException(Exception): """Raised when there is a format error in CTD.""" @@ -59,10 +69,12 @@ :ivar children: children of the node :type children: dict with name to node mapping :ivar cli_element: CLIElement that this parameter is mapped to. + :ivar required: Whether or not this parameter is required. + :type required: bool """ def __init__(self, kind='', name='', description='', value='', type_='', tags='', - restrictions='', supported_formats=''): + restrictions='', supported_formats='', required=False): """Initialize the object.""" self.kind = kind self.name = name @@ -76,6 +88,7 @@ self.parent = None # not set, usually a list self.children = {} self.cli_element = None + self.required = required def computePath(self, is_root=True, path=[]): """Compute path entry from parent links. @@ -110,8 +123,8 @@ def __str__(self): """Return string representation.""" t = (self.name, self.description, self.value, self.type_, self.tags, - self.supported_formats, self.children, self.path) - return 'ParametersNode(%s, %s, %s, %s, %s, %s, %s, path=%s)' % tuple(map(repr, t)) + self.supported_formats, self.children, self.path, self.required) + return 'ParametersNode(%s, %s, %s, %s, %s, %s, %s, path=%s, %s)' % tuple(map(repr, t)) def __repr__(self): """Return programmatic representation, same as __str__().""" @@ -194,13 +207,16 @@ # Create the top level Tool object. self.tool = Tool() self.result = self.tool + if not attrs.get('name'): + raise CTDFormatException('No attribute "name" in tag.') + self.tool.name = attrs.get('name') elif self.stack == ['tool', 'cli', 'clielement']: # Create a new CLIElement object for a tag. if not attrs.get('isList'): raise CTDFormatException('No attribute isList in .') if attrs.get('optionIdentifier') is None: raise CTDFormatException('no attribute optionIdentifier in .') - is_list = (attrs.get('isList') == 'false') + is_list = (attrs.get('isList') == 'true') option_identifier = attrs.get('optionIdentifier') self.tool.cli_elements.append(CLIElement(option_identifier=option_identifier, is_list=is_list)) elif self.stack == ['tool', 'cli', 'clielement', 'mapping']: @@ -221,19 +237,21 @@ node.parent = self.parameter_node self.parameter_node.children[name] = node self.parameter_node = node - elif self.stack[:2] == ['tool', 'PARAMETERS'] and self.stack[-1] == 'ITEM': - # Create a new item ParametersNode for the entry. + elif self.stack[:2] == ['tool', 'PARAMETERS'] and self.stack[-1] in ['ITEM', 'ITEMLIST']: + # Create a new item ParametersNode for the / entry. if not attrs.get('name'): - raise CTDFormatException('no attribute name in ') + raise CTDFormatException('no attribute name in /') name = attrs.get('name') value = attrs.get('value') type_ = attrs.get('type') tags = attrs.get('tags') description = attrs.get('description') restrictions = attrs.get('restrictions') - supported_formats = attrs.get('supported_formats') + required = attrs.get('required') == 'true' + supported_formats = attrs.get('supported_formats', '') + kind = {'ITEM': 'item', 'ITEMLIST': 'itemlist'}[self.stack[-1]] child = ParametersNode( - kind='item', name=name, description=description, value=value, + kind=kind, name=name, description=description, value=value, type_=type_, tags=tags, supported_formats=supported_formats, restrictions=restrictions) self.parameter_node.children[name] = child @@ -248,9 +266,7 @@ def characters(self, content): """Handle characters in XML file.""" - if self.stack == ['tool', 'name']: - self.tool.name += content - elif self.stack == ['tool', 'executableName']: + if self.stack == ['tool', 'executableName']: self.tool.executable_name += content elif self.stack == ['tool', 'version']: self.tool.version += content @@ -299,7 +315,7 @@ def appendTag(self, tag, text='', args={}): """Append a tag to self.result with text content only or no content at all.""" e = xml.sax.saxutils.quoteattr - args_str = ' '.join('%s=%s' % (key, e(str(value))) for key, value in args.items()) + args_str = ' '.join('%s=%s' % (key, e(str(value))) for key, value in args.items() if value is not None) if args_str: args_str = ' '+ args_str vals = {'indent': self.indent(), @@ -330,14 +346,14 @@ def handleParameters(self, node): """Recursion for appending tags for ParametersNode.""" for pn in node.children.values(): - if pn.kind == 'item': + if pn.kind in ['item', 'itemlist']: args = {'name': pn.name, 'value': pn.value, 'type': pn.type_, 'description': pn.description, 'restrictions': pn.restrictions, 'tags': pn.tags} - self.appendTag('ITEM', args=args) + self.appendTag(pn.kind.upper(), args=args) else: # node.kind == 'node' args = {'name': pn.name, 'description': pn.description} @@ -375,7 +391,7 @@ self.closeTag('clielement') self.indent_level -= 1 self.closeTag('cli') - # , , group + # , , , group self.openTag('PARAMETERS', args={'version': 1.4, 'xsi:noNamespaceSchemaLocation': 'http://open-ms.sourceforge.net/schemas/Param_1_4.xsd', 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance'}) @@ -390,6 +406,34 @@ f.write(x) +class GalaxyCommandSnippet(object): + """Stores a snippet for the Galaxy tag. + + Such a snippet consists of a list of text that will be concatenated using + space as the separator. + + Optionally, a condition can be given which will be pasted verbatimly into + an #if condition that will also be properly closed. As a bonus, the + snippet will be properly indented. + """ + + def __init__(self, words, condition=None, indent=4, level=1): + self.words = words + self.condition = condition + self.indent = indent + self.level = level + + def build(self): + res = [] + if self.condition: + res.append('#if %s' % self.condition) + res.append(' '.join(self.words)) + if self.condition: + res[-1] = ' ' * self.indent + res[-1] + res.append('#end if') + return '\n'.join([' ' * self.indent * self.level + l for l in res]) + + class GalaxyWriter(XMLWriter): """Write a Tool to the Galaxy format.""" @@ -424,12 +468,16 @@ def addInputParam(self, param_node): """Add a ParametersNode object if it is to go to .""" - if param_node.tags and 'output file' in param_node.tags.split(','): + if param_node.type_ == 'output-file': return # Skip output files - if param_node.kind != 'item': + if param_node.kind not in ['item', 'itemlist']: return # Skip if not item. + if param_node.name.endswith('-file-ext'): + return # Skip if extension to override. args = {} - if param_node.tags and 'input file' in param_node.tags.split(','): + if param_node.tags and 'required' not in param_node.tags.split(','): + args['optional'] = 'true' # false would be default + if param_node.type_ == 'input-file': args['type'] = 'data' args['format'] = ','.join([x.replace('*', '').replace('.', '') for x in param_node.supported_formats.split(',')]) @@ -469,10 +517,12 @@ def addOutputParam(self, param_node): """Add a ParametersNode object if it is to go to .""" - if not param_node.tags or not 'output file' in param_node.tags.split(','): + if param_node.type_ != 'output-file': return # Only add for output files. + if param_node.name.endswith('-file-ext'): + return # Skip if extension to override. args = {} - if '.' in param_node.supported_formats: + if '.' in param_node.supported_formats: args['format'] = param_node.supported_formats.split(',')[0].split('.')[-1] else: args['format'] = param_node.supported_formats.split(',')[0].split('*')[-1] @@ -482,18 +532,44 @@ def addCommandTag(self, tool): """Write tag to self.result.""" - lst = [] + file_ext_elements = [] + # Process non-file-extension arguments. + snippets = [] for ce in tool.cli_elements: + if ce.param_node.name.endswith('-file-ext'): + file_ext_elements.append(ce) + continue # Skip -file-ext options. + # The name of the variable that is used. + var_name = '$' + ce.mapping_path.replace('-', '_').replace('.', '_') + # Check whether it is optional. + optional = bool(ce.param_node.tags and 'required' not in ce.param_node.tags.split(',')) + # Check whether it is a boolean. bool_param = False if ce.param_node.type_ == 'string' and ce.param_node.restrictions and \ sorted(ce.param_node.restrictions.split(',')) == ['false', 'true']: bool_param = True - if not bool_param and ce.option_identifier: - lst.append(ce.option_identifier) - # The path mapping is not ideal but should work OK. - lst.append('$' + ce.mapping_path.replace('-', '_').replace('.', '_')) - txt = [tool.executable_name] + lst - self.appendTag('command', text=' '.join(txt)) + # Get variable name. + val = '"' + var_name + '"' + # Build the snippet for the command. + if bool_param: + # The true value for boolean parameters is the argument itself. + snippets.append(GalaxyCommandSnippet([var_name])) + else: + condition = {True: var_name, False: None}.get(optional) + snippets.append(GalaxyCommandSnippet([ce.option_identifier, val], + condition=condition)) + # Process file extension arguments. + ext_overrides = [] + for ce in file_ext_elements: + if ce.option_identifier == '--write-ctd-file-ext': + continue # Skip special args. + # The name of the variable that is used. + var_name = ce.mapping_path[:-len('-file-ext')].replace('-', '_').replace('.', '_') + snippets.append(GalaxyCommandSnippet([ce.option_identifier, '${%s.ext}' % var_name], + condition='$%s' % var_name)) + # Finalize building the command line. + txt = GalaxyCommandSnippet([tool.executable_name]).build() + '\n' + '\n'.join([s.build() for s in snippets]) + self.appendTag('command', text=txt) def main(): diff -r 71241c4dc4d0 -r 19597b57ada8 gff_sort.sh --- a/gff_sort.sh Fri Jun 28 14:01:42 2013 -0400 +++ b/gff_sort.sh Mon Aug 12 09:58:57 2013 -0400 @@ -9,8 +9,8 @@ # -p sort by position (ref, start, end) -- default # The parameters that we will pass to sort. -SORT_POS_ARGS="-k 1,1 -k 2,2n -k 3,3n" -SORT_POS_ARGS_REV="-k 1,1r -k 2,2nr -k 3,3nr" +SORT_POS_ARGS="-k 1,1 -k 4,4n -k 5,5n" +SORT_POS_ARGS_REV="-k 1,1r -k 4,4nr -k 5,5nr" # The arguments will go here. SORT_BY=beginPos diff -r 71241c4dc4d0 -r 19597b57ada8 ngs_roi/app.py --- a/ngs_roi/app.py Fri Jun 28 14:01:42 2013 -0400 +++ b/ngs_roi/app.py Mon Aug 12 09:58:57 2013 -0400 @@ -44,16 +44,9 @@ def __init__(self, args): self.args = args - def _mkLinks(self): - """Create symlink.""" - subprocess.call(['ln', '-s', self.args.in_file, self.args.in_file + '.roi']) - - def _rmLinks(self): - """Remove symlink.""" - subprocess.call(['rm', '-f', self.args.in_file + '.roi']) - def run(self): - cmd_args = ['-if', self.args.in_file + '.roi', + cmd_args = ['-if', self.args.in_file, + '--in-file-ext', 'roi', '-o', os.path.join(self.args.out_dir, 'thumbnail_'), '--max-rois', self.args.max_rois, '--max-value', self.args.max_value, @@ -67,10 +60,8 @@ #import pdb; pdb.set_trace() import sys print >>sys.stderr, 'Running %s' % ' '.join(cmd_args) - self._mkLinks() p = subprocess.Popen(cmd_args, stderr=subprocess.PIPE, stdout=subprocess.PIPE) res = p.wait() - self._rmLinks() if res: print 'ERROR', p.stdin, p.stderr return res diff -r 71241c4dc4d0 -r 19597b57ada8 roi_feature_projection.ctd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_feature_projection.ctd Mon Aug 12 09:58:57 2013 -0400 @@ -0,0 +1,77 @@ + + + roi_feature_projection + Region Of Interest Projection. + Compute the projection of a ROI file to regions from a BED or GFF file. The result is a ROI file where each interval from the BED/GFF/GTF file that overlapped with one input ROI file is a region of interest, with the coverage counts projected to the new region of interest. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 71241c4dc4d0 -r 19597b57ada8 roi_feature_projection.xml --- a/roi_feature_projection.xml Fri Jun 28 14:01:42 2013 -0400 +++ b/roi_feature_projection.xml Mon Aug 12 09:58:57 2013 -0400 @@ -1,87 +1,52 @@ - - Intersect ROIs with GFF/BED - - - - #if $roi_intersect_genome# ln -s ${roi_intersect_genome} ${roi_intersect_genome}.${roi_intersect_genome.ext}; #end if - #if $roi_intersect_in_roi# ln -s ${roi_intersect_in_roi} ${roi_intersect_in_roi}.${roi_intersect_in_roi.ext}; #end if - #if $roi_intersect_out_roi# touch ${roi_intersect_out_roi}; ln -s ${roi_intersect_out_roi} ${roi_intersect_out_roi}.roi; #end if - #if $roi_intersect_in_features # ln -s ${roi_intersect_in_features} ${roi_intersect_in_features}.${roi_intersect_in_features.ext}; #end if - - - roi_feature_projection $roi_intersect_strand_specific - --in-roi ${roi_intersect_in_roi}.${roi_intersect_in_roi.ext} - --in-features ${roi_intersect_in_features}.${roi_intersect_in_features.ext} - --out-roi ${roi_intersect_out_roi}.roi - --mode $roi_intersect_mode - #if $roi_intersect_genome# --genome ${roi_intersect_genome}.${roi_intersect_genome.ext} #end if - #if $verbosity # $verbosity #end if - #if $roi_intersect_gff_type # --gff-type $roi_intersect_gff_type #end if - #if $roi_intersect_gff_group_by# --gff-group-by $roi_intersect_gff_group_by #end if - ; - - - #if $roi_intersect_genome# rm -f ${roi_intersect_genome}.${roi_intersect_genome.ext}; #end if - #if $roi_intersect_in_roi# rm -f ${roi_intersect_in_roi}.${roi_intersect_in_roi.ext}; #end if - #if $roi_intersect_in_roi# rm -f ${roi_intersect_out_roi}.roi; #end if - #if $roi_intersect_in_features# rm -f ${roi_intersect_in_features}.${roi_intersect_in_features.ext}; #end if - - - + + roi_feature_projection + $roi_feature_projection_quiet + $roi_feature_projection_verbose + $roi_feature_projection_very_verbose + --in-roi "$roi_feature_projection_in_roi" + --in-features "$roi_feature_projection_in_features" + --out-roi "$roi_feature_projection_out_roi" + --genome "$roi_feature_projection_genome" + --mode "$roi_feature_projection_mode" + $roi_feature_projection_strand_specific + --gff-type "$roi_feature_projection_gff_type" + --gff-group-by "$roi_feature_projection_gff_group_by" + #if $roi_feature_projection_in_roi + --in-roi-file-ext ${roi_feature_projection_in_roi.ext} + #end if + #if $roi_feature_projection_in_features + --in-features-file-ext ${roi_feature_projection_in_features.ext} + #end if + #if $roi_feature_projection_out_roi + --out-roi-file-ext ${roi_feature_projection_out_roi.ext} + #end if + #if $roi_feature_projection_genome + --genome-file-ext ${roi_feature_projection_genome.ext} + #end if + Region Of Interest Projection. - - - - - - - - - - - - - - - - - + + + + + + + + + + + - - - - + - - - - - No help yet. - - - - Cheetah - roi_feature_projection - diff -r 71241c4dc4d0 -r 19597b57ada8 roi_filter.sh diff -r 71241c4dc4d0 -r 19597b57ada8 roi_plot_9.sh --- a/roi_plot_9.sh Fri Jun 28 14:01:42 2013 -0400 +++ b/roi_plot_9.sh Mon Aug 12 09:58:57 2013 -0400 @@ -13,6 +13,9 @@ SORT_COL=0 REVERSE= +# Current directory. +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + helpme () { echo "host: ${HOST}" echo "Plot ROIs 9 per page to PDF with links to IGV." @@ -51,10 +54,10 @@ fi gawk -v fileName=${IN_FILE}.tmp.ps \ - -f plot.awk \ + -f ${DIR}/plot.awk \ ${USED_INFILE} |gnuplot 2> /dev/null gawk -v roiFile=${USED_INFILE} \ - -f ps2pswLinks.gawk \ + -f ${DIR}/ps2pswLinks.gawk \ ${IN_FILE}.tmp.ps > ${IN_FILE}.tmp.ln.ps ps2pdf ${IN_FILE}.tmp.ln.ps ${OUT_FILE} # rm ${IN_FILE}.tmp.ps @@ -62,3 +65,5 @@ if [ "x${IN_FILE##*.}" == "xgz" ]; then echo rm $USED_INFILE fi + + diff -r 71241c4dc4d0 -r 19597b57ada8 roi_plot_thumbnails.ctd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roi_plot_thumbnails.ctd Mon Aug 12 09:58:57 2013 -0400 @@ -0,0 +1,73 @@ + + + roi_plot_thumbnails + Create plot grids for ROI file. + Create PNG images with plot grids to OUT${i}.png from ROI records in IN.roi. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 71241c4dc4d0 -r 19597b57ada8 roi_plot_thumbnails.py diff -r 71241c4dc4d0 -r 19597b57ada8 roi_plot_thumbnails.xml --- a/roi_plot_thumbnails.xml Fri Jun 28 14:01:42 2013 -0400 +++ b/roi_plot_thumbnails.xml Mon Aug 12 09:58:57 2013 -0400 @@ -17,24 +17,6 @@ #if $ucsc_org # --ucsc-org $ucsc_org #end if #if $ucsc_db # --ucsc-db $ucsc_db #end if #if $ucsc_chr_prefix # --ucsc-chr-prefix "$ucsc_chr_prefix" #end if - - && - - ln -s ${input} ${input}.${input.ext} - - ; - - roi_plot_thumbnails -if $input -o "${out_file.files_path}/thumbnail_" - #if $max_rois # --max-rois $max_rois #end if - #if $num_rows # --num-rows $num_rows #end if - #if $num_cols # --num-cols $num_cols #end if - #if $plot_height # --plot-height $plot_height #end if - #if $plot_width # --plot-width $plot_width #end if - #if $max_value # --max-value $max_value #end if - - ; - - rm -f ${input}.${input.ext}