# HG changeset patch
# User devteam
# Date 1498171171 14400
# Node ID 94248d5b9b8b7e4156845dab29d060b9d63078fe
# Parent d94e778c3ad1ef6c644baf57c091d924c5797df5
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
diff -r d94e778c3ad1 -r 94248d5b9b8b flanking_features.py
--- a/flanking_features.py Fri Dec 18 19:37:56 2015 -0500
+++ b/flanking_features.py Thu Jun 22 18:39:31 2017 -0400
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-#By: Guruprasad Ananda
+# By: Guruprasad Ananda
"""
Fetch closest up/downstream interval from features corresponding to every interval in primary
@@ -9,23 +9,26 @@
-G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval
-H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval
"""
+from __future__ import print_function
import fileinput
import sys
+
from bx.cookbook import doc_optparse
from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper
from bx.intervals.operations import quicksect
from bx.tabular.io import ParseError
from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped
+
from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper
assert sys.version_info[:2] >= ( 2, 4 )
def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down):
- #direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases
- #threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand
- #threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand
+ # direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases
+ # threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand
+ # threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand
if direction == 1:
if node.maxend <= threshold_up:
if node.end == node.maxend:
@@ -103,11 +106,11 @@
result_up = []
result_down = []
if (strand == '+' and up) or (strand == '-' and down):
- #upstream +ve strand and downstream -ve strand cases
+ # upstream +ve strand and downstream -ve strand cases
get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None)
if (strand == '+' and down) or (strand == '-' and up):
- #downstream +ve strand and upstream -ve strand case
+ # downstream +ve strand and upstream -ve strand case
get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node ))
if result_up:
@@ -123,7 +126,7 @@
if result_down:
if not(either):
- #The last element of result_down will be the closest element to the given interval
+ # The last element of result_down will be the closest element to the given interval
yield [ interval, result_down[-1].other ]
if either and (result_up or result_down):
@@ -132,12 +135,12 @@
if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)):
iter_val = [ interval, result_up[res_ind].other ]
else:
- #The last element of result_down will be the closest element to the given interval
+ # The last element of result_down will be the closest element to the given interval
iter_val = [ interval, result_down[-1].other ]
elif result_up:
iter_val = [ interval, result_up[res_ind].other ]
elif result_down:
- #The last element of result_down will be the closest element to the given interval
+ # The last element of result_down will be the closest element to the given interval
iter_val = [ interval, result_down[-1].other ]
yield iter_val
@@ -203,14 +206,15 @@
out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) )
else:
out_file.write( "%s\n" % result )
- except ParseError, exc:
+ except ParseError as exc:
fail( "Invalid file format: %s" % str( exc ) )
- print "Direction: %s" % (direction)
+ print("Direction: %s" % (direction))
if g1.skipped > 0:
- print skipped( g1, filedesc=" of 1st dataset" )
+ print(skipped( g1, filedesc=" of 1st dataset" ))
if g2.skipped > 0:
- print skipped( g2, filedesc=" of 2nd dataset" )
+ print(skipped( g2, filedesc=" of 2nd dataset" ))
+
if __name__ == "__main__":
main()
diff -r d94e778c3ad1 -r 94248d5b9b8b flanking_features.xml
--- a/flanking_features.xml Fri Dec 18 19:37:56 2015 -0500
+++ b/flanking_features.xml Thu Jun 22 18:39:31 2017 -0400
@@ -1,86 +1,87 @@
- for every interval
-
- bx-python
- galaxy-ops
-
-
- flanking_features.py $input1 $input2 $out_file1 $direction
-
- #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
- -1 1,4,5,7 --gff1
- #else:
- -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
- #end if
-
- #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
- -2 1,4,5,7 --gff2
- #else:
- -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}
- #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ for every interval
+
+ macros.xml
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+#if $input2.is_of_type('gff')
+ -2 1,4,5,7 --gff2
+#else:
+ -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}
+#end if
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
\ No newline at end of file
+ ]]>
+
diff -r d94e778c3ad1 -r 94248d5b9b8b macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jun 22 18:39:31 2017 -0400
@@ -0,0 +1,20 @@
+
+
+
+
+ bx-python
+ galaxy-ops
+
+
+
+-----
+
+**Screencasts!**
+
+See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
+
+.. _Screencasts: https://galaxyproject.org/learn/interval-operations/
+
+-----
+
+
diff -r d94e778c3ad1 -r 94248d5b9b8b tool_dependencies.xml
--- a/tool_dependencies.xml Fri Dec 18 19:37:56 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-
-
-
-
-
-
-
-
-
diff -r d94e778c3ad1 -r 94248d5b9b8b utils/__init__.pyc
Binary file utils/__init__.pyc has changed
diff -r d94e778c3ad1 -r 94248d5b9b8b utils/gff_util.py
--- a/utils/gff_util.py Fri Dec 18 19:37:56 2015 -0500
+++ b/utils/gff_util.py Thu Jun 22 18:39:31 2017 -0400
@@ -1,11 +1,12 @@
"""
Provides utilities for working with GFF files.
"""
+import copy
-import copy
from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper
-from bx.tabular.io import Header, Comment, ParseError
-from utils.odict import odict
+from bx.tabular.io import Comment, Header, ParseError
+
+from .odict import odict
class GFFInterval( GenomicInterval ):
@@ -144,7 +145,7 @@
self.default_strand, fix_strand=self.fix_strand )
return interval
- def next( self ):
+ def __next__( self ):
""" Returns next GFFFeature. """
#
@@ -177,10 +178,10 @@
while not self.seed_interval:
try:
self.seed_interval = GenomicIntervalReader.next( self )
- except ParseError, e:
+ except ParseError as e:
handle_parse_error( e )
# TODO: When no longer supporting python 2.4 use finally:
- #finally:
+ # finally:
raw_size += len( self.current_line )
# If header or comment, clear seed interval and return it with its size.
@@ -205,19 +206,19 @@
try:
interval = GenomicIntervalReader.next( self )
raw_size += len( self.current_line )
- except StopIteration, e:
+ except StopIteration as e:
# No more intervals to read, but last feature needs to be
# returned.
interval = None
raw_size += len( self.current_line )
break
- except ParseError, e:
+ except ParseError as e:
handle_parse_error( e )
raw_size += len( self.current_line )
continue
# TODO: When no longer supporting python 2.4 use finally:
- #finally:
- #raw_size += len( self.current_line )
+ # finally:
+ # raw_size += len( self.current_line )
# Ignore comments.
if isinstance( interval, Comment ):
@@ -263,6 +264,7 @@
convert_gff_coords_to_bed( feature )
return feature
+ next = __next__ # This line should be removed once the bx-python port to Python3 is finished
def convert_bed_coords_to_gff( interval ):
@@ -374,7 +376,9 @@
# -- Get function that generates line/feature key. --
- get_transcript_id = lambda fields: parse_gff_attributes( fields[8] )[ 'transcript_id' ]
+ def get_transcript_id(fields):
+ return parse_gff_attributes( fields[8] )[ 'transcript_id' ]
+
if strict:
# Strict GTF parsing uses transcript_id only to group lines into feature.
key_fn = get_transcript_id
@@ -382,7 +386,8 @@
# Use lenient parsing where chromosome + transcript_id is the key. This allows
# transcripts with same ID on different chromosomes; this occurs in some popular
# datasources, such as RefGenes in UCSC.
- key_fn = lambda fields: fields[0] + '_' + get_transcript_id( fields )
+ def key_fn(fields):
+ return fields[0] + '_' + get_transcript_id( fields )
# Aggregate intervals by transcript_id and collect comments.
feature_intervals = odict()
diff -r d94e778c3ad1 -r 94248d5b9b8b utils/gff_util.pyc
Binary file utils/gff_util.pyc has changed
diff -r d94e778c3ad1 -r 94248d5b9b8b utils/odict.pyc
Binary file utils/odict.pyc has changed