Mercurial > repos > devteam > flanking_features
diff flanking_features.py @ 1:850c05b9af00 draft
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author | devteam |
---|---|
date | Tue, 13 Oct 2015 12:50:14 -0400 |
parents | e928e029f6eb |
children | 94248d5b9b8b |
line wrap: on
line diff
--- a/flanking_features.py Tue Apr 01 09:13:13 2014 -0400 +++ b/flanking_features.py Tue Oct 13 12:50:14 2015 -0400 @@ -10,21 +10,23 @@ -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval """ -import sys, traceback, fileinput -from warnings import warn +import fileinput +import sys from bx.cookbook import doc_optparse -from galaxy.tools.util.galaxyops import * -from bx.intervals.io import * +from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper from bx.intervals.operations import quicksect -from utils.gff_util import * +from bx.tabular.io import ParseError +from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped +from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper assert sys.version_info[:2] >= ( 2, 4 ) -def get_closest_feature (node, direction, threshold_up, threshold_down, report_func_up, report_func_down): + +def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down): #direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases #threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand #threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand - if direction == 1: + if direction == 1: if node.maxend <= threshold_up: if node.end == node.maxend: report_func_up(node) @@ -60,10 +62,11 @@ if node.right: get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down) + def proximal_region_finder(readers, region, comments=True): """ - Returns an iterator that yields elements of the form [ <original_interval>, <closest_feature> ]. - Intervals are GenomicInterval objects. + Returns an iterator that yields elements of the form [ <original_interval>, <closest_feature> ]. + Intervals are GenomicInterval objects. """ primary = readers[0] features = readers[1] @@ -76,13 +79,13 @@ up, down = True, True if region == 'Either': either = True - + # Read features into memory: rightTree = quicksect.IntervalTree() for item in features: if type( item ) is GenomicInterval: rightTree.insert( item, features.linenum, item ) - + for interval in primary: if type( interval ) is Header: yield interval @@ -96,33 +99,33 @@ if chrom not in rightTree.chroms: continue else: - root = rightTree.chroms[chrom] #root node for the chrom tree + root = rightTree.chroms[chrom] # root node for the chrom tree result_up = [] result_down = [] - if (strand == '+' and up) or (strand == '-' and down): + if (strand == '+' and up) or (strand == '-' and down): #upstream +ve strand and downstream -ve strand cases - get_closest_feature (root, 1, start, None, lambda node: result_up.append( node ), None) - + get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None) + if (strand == '+' and down) or (strand == '-' and up): #downstream +ve strand and upstream -ve strand case - get_closest_feature (root, 0, None, end-1, None, lambda node: result_down.append( node )) - + get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node )) + if result_up: - if len(result_up) > 1: #The results_up list has a list of intervals upstream to the given interval. + if len(result_up) > 1: # The results_up list has a list of intervals upstream to the given interval. ends = [] for n in result_up: ends.append(n.end) - res_ind = ends.index(max(ends)) #fetch the index of the closest interval i.e. the interval with the max end from the results_up list + res_ind = ends.index(max(ends)) # fetch the index of the closest interval i.e. the interval with the max end from the results_up list else: res_ind = 0 if not(either): yield [ interval, result_up[res_ind].other ] - - if result_down: + + if result_down: if not(either): #The last element of result_down will be the closest element to the given interval - yield [ interval, result_down[-1].other ] - + yield [ interval, result_down[-1].other ] + if either and (result_up or result_down): iter_val = [] if result_up and result_down: @@ -137,7 +140,8 @@ #The last element of result_down will be the closest element to the given interval iter_val = [ interval, result_down[-1].other ] yield iter_val - + + def main(): options, args = doc_optparse.parse( __doc__ ) try: @@ -148,7 +152,7 @@ in_fname, in2_fname, out_fname, direction = args except: doc_optparse.exception() - + # Set readers to handle either GFF or default format. if in1_gff_format: in1_reader_wrapper = GFFIntervalToBEDReaderWrapper @@ -160,22 +164,22 @@ in2_reader_wrapper = NiceReaderWrapper g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ), - chrom_col=chr_col_1, - start_col=start_col_1, - end_col=end_col_1, - strand_col=strand_col_1, - fix_strand=True ) + chrom_col=chr_col_1, + start_col=start_col_1, + end_col=end_col_1, + strand_col=strand_col_1, + fix_strand=True ) g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ), - chrom_col=chr_col_2, - start_col=start_col_2, - end_col=end_col_2, - strand_col=strand_col_2, - fix_strand=True ) + chrom_col=chr_col_2, + start_col=start_col_2, + end_col=end_col_2, + strand_col=strand_col_2, + fix_strand=True ) # Find flanking features. out_file = open( out_fname, "w" ) try: - for result in proximal_region_finder([g1,g2], direction): + for result in proximal_region_finder([g1, g2], direction): if type( result ) is list: line, closest_feature = result # Need to join outputs differently depending on file types. @@ -185,12 +189,12 @@ # Invervals are in BED coordinates; need to convert to GFF. line = convert_bed_coords_to_gff( line ) closest_feature = convert_bed_coords_to_gff( closest_feature ) - + # Replace double quotes with single quotes in closest feature's attributes. - out_file.write( "%s closest_feature \"%s\" \n" % - ( "\t".join( line.fields ), \ + out_file.write( "%s closest_feature \"%s\" \n" % + ( "\t".join( line.fields ), "\t".join( closest_feature.fields ).replace( "\"", "\\\"" ) - ) ) + ) ) else: # Output is BED + closest feature fields. output_line_fields = [] @@ -202,7 +206,7 @@ except ParseError, exc: fail( "Invalid file format: %s" % str( exc ) ) - print "Direction: %s" %(direction) + print "Direction: %s" % (direction) if g1.skipped > 0: print skipped( g1, filedesc=" of 1st dataset" ) if g2.skipped > 0: