# HG changeset patch # User devteam # Date 1498171171 14400 # Node ID 94248d5b9b8b7e4156845dab29d060b9d63078fe # Parent d94e778c3ad1ef6c644baf57c091d924c5797df5 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689 diff -r d94e778c3ad1 -r 94248d5b9b8b flanking_features.py --- a/flanking_features.py Fri Dec 18 19:37:56 2015 -0500 +++ b/flanking_features.py Thu Jun 22 18:39:31 2017 -0400 @@ -1,5 +1,5 @@ #!/usr/bin/env python -#By: Guruprasad Ananda +# By: Guruprasad Ananda """ Fetch closest up/downstream interval from features corresponding to every interval in primary @@ -9,23 +9,26 @@ -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval """ +from __future__ import print_function import fileinput import sys + from bx.cookbook import doc_optparse from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper from bx.intervals.operations import quicksect from bx.tabular.io import ParseError from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped + from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper assert sys.version_info[:2] >= ( 2, 4 ) def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down): - #direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases - #threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand - #threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand + # direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases + # threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand + # threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand if direction == 1: if node.maxend <= threshold_up: if node.end == node.maxend: @@ -103,11 +106,11 @@ result_up = [] result_down = [] if (strand == '+' and up) or (strand == '-' and down): - #upstream +ve strand and downstream -ve strand cases + # upstream +ve strand and downstream -ve strand cases get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None) if (strand == '+' and down) or (strand == '-' and up): - #downstream +ve strand and upstream -ve strand case + # downstream +ve strand and upstream -ve strand case get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node )) if result_up: @@ -123,7 +126,7 @@ if result_down: if not(either): - #The last element of result_down will be the closest element to the given interval + # The last element of result_down will be the closest element to the given interval yield [ interval, result_down[-1].other ] if either and (result_up or result_down): @@ -132,12 +135,12 @@ if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)): iter_val = [ interval, result_up[res_ind].other ] else: - #The last element of result_down will be the closest element to the given interval + # The last element of result_down will be the closest element to the given interval iter_val = [ interval, result_down[-1].other ] elif result_up: iter_val = [ interval, result_up[res_ind].other ] elif result_down: - #The last element of result_down will be the closest element to the given interval + # The last element of result_down will be the closest element to the given interval iter_val = [ interval, result_down[-1].other ] yield iter_val @@ -203,14 +206,15 @@ out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) ) else: out_file.write( "%s\n" % result ) - except ParseError, exc: + except ParseError as exc: fail( "Invalid file format: %s" % str( exc ) ) - print "Direction: %s" % (direction) + print("Direction: %s" % (direction)) if g1.skipped > 0: - print skipped( g1, filedesc=" of 1st dataset" ) + print(skipped( g1, filedesc=" of 1st dataset" )) if g2.skipped > 0: - print skipped( g2, filedesc=" of 2nd dataset" ) + print(skipped( g2, filedesc=" of 2nd dataset" )) + if __name__ == "__main__": main() diff -r d94e778c3ad1 -r 94248d5b9b8b flanking_features.xml --- a/flanking_features.xml Fri Dec 18 19:37:56 2015 -0500 +++ b/flanking_features.xml Thu Jun 22 18:39:31 2017 -0400 @@ -1,86 +1,87 @@ - for every interval - - bx-python - galaxy-ops - - - flanking_features.py $input1 $input2 $out_file1 $direction - - #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__): - -1 1,4,5,7 --gff1 - #else: - -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} - #end if - - #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__): - -2 1,4,5,7 --gff2 - #else: - -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + for every interval + + macros.xml + + + - - - - - - - - - - - - - - - +#if $input2.is_of_type('gff') + -2 1,4,5,7 --gff2 +#else: + -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} +#end if + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - \ No newline at end of file + ]]> + diff -r d94e778c3ad1 -r 94248d5b9b8b macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jun 22 18:39:31 2017 -0400 @@ -0,0 +1,20 @@ + + + + + bx-python + galaxy-ops + + + +----- + +**Screencasts!** + +See Galaxy Interval Operation Screencasts_ (right click to open this link in another window). + +.. _Screencasts: https://galaxyproject.org/learn/interval-operations/ + +----- + + diff -r d94e778c3ad1 -r 94248d5b9b8b tool_dependencies.xml --- a/tool_dependencies.xml Fri Dec 18 19:37:56 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ - - - - - - - - - diff -r d94e778c3ad1 -r 94248d5b9b8b utils/__init__.pyc Binary file utils/__init__.pyc has changed diff -r d94e778c3ad1 -r 94248d5b9b8b utils/gff_util.py --- a/utils/gff_util.py Fri Dec 18 19:37:56 2015 -0500 +++ b/utils/gff_util.py Thu Jun 22 18:39:31 2017 -0400 @@ -1,11 +1,12 @@ """ Provides utilities for working with GFF files. """ +import copy -import copy from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper -from bx.tabular.io import Header, Comment, ParseError -from utils.odict import odict +from bx.tabular.io import Comment, Header, ParseError + +from .odict import odict class GFFInterval( GenomicInterval ): @@ -144,7 +145,7 @@ self.default_strand, fix_strand=self.fix_strand ) return interval - def next( self ): + def __next__( self ): """ Returns next GFFFeature. """ # @@ -177,10 +178,10 @@ while not self.seed_interval: try: self.seed_interval = GenomicIntervalReader.next( self ) - except ParseError, e: + except ParseError as e: handle_parse_error( e ) # TODO: When no longer supporting python 2.4 use finally: - #finally: + # finally: raw_size += len( self.current_line ) # If header or comment, clear seed interval and return it with its size. @@ -205,19 +206,19 @@ try: interval = GenomicIntervalReader.next( self ) raw_size += len( self.current_line ) - except StopIteration, e: + except StopIteration as e: # No more intervals to read, but last feature needs to be # returned. interval = None raw_size += len( self.current_line ) break - except ParseError, e: + except ParseError as e: handle_parse_error( e ) raw_size += len( self.current_line ) continue # TODO: When no longer supporting python 2.4 use finally: - #finally: - #raw_size += len( self.current_line ) + # finally: + # raw_size += len( self.current_line ) # Ignore comments. if isinstance( interval, Comment ): @@ -263,6 +264,7 @@ convert_gff_coords_to_bed( feature ) return feature + next = __next__ # This line should be removed once the bx-python port to Python3 is finished def convert_bed_coords_to_gff( interval ): @@ -374,7 +376,9 @@ # -- Get function that generates line/feature key. -- - get_transcript_id = lambda fields: parse_gff_attributes( fields[8] )[ 'transcript_id' ] + def get_transcript_id(fields): + return parse_gff_attributes( fields[8] )[ 'transcript_id' ] + if strict: # Strict GTF parsing uses transcript_id only to group lines into feature. key_fn = get_transcript_id @@ -382,7 +386,8 @@ # Use lenient parsing where chromosome + transcript_id is the key. This allows # transcripts with same ID on different chromosomes; this occurs in some popular # datasources, such as RefGenes in UCSC. - key_fn = lambda fields: fields[0] + '_' + get_transcript_id( fields ) + def key_fn(fields): + return fields[0] + '_' + get_transcript_id( fields ) # Aggregate intervals by transcript_id and collect comments. feature_intervals = odict() diff -r d94e778c3ad1 -r 94248d5b9b8b utils/gff_util.pyc Binary file utils/gff_util.pyc has changed diff -r d94e778c3ad1 -r 94248d5b9b8b utils/odict.pyc Binary file utils/odict.pyc has changed